コード例 #1
0
ファイル: test_util.py プロジェクト: fakedrake/WikipediaBase
    def test_interval(self):
        class DateTime(datetime.datetime):
            x = 0

            @classmethod
            def now(cls):
                return cls.x

        datetime.datetime = DateTime

        util.time_interval("now")
        for i in xrange(20):
            # Increment time between calls
            DateTime.x += 1
            self.assertEqual(util.time_interval("now"), 1)
コード例 #2
0
ファイル: dbfetcher.py プロジェクト: fakedrake/WikipediaBase
def _main():
    """
    dbfetcher.py {category_map, category_fix} [in file] [out file]

    Create a category map or fix the names of the catgories replacing
    spaces with _. Input and output files default to stdin and stdout.

    Note that there are about 1M categories and 11M (namespace 0)
    articles.

    Examples:

    dbfetcher.py category_map > category-map.txt
    """
    import string

    try:
        fi = open(sys.argv[2]) if sys.argv[2] != '-' else sys.stdin
    except IndexError:
        fi = sys.stdin

    try:
        fo = open(sys.argv[3], 'w') if sys.argv[2] != '-' else sys.stdout
    except IndexError:
        fo = sys.stdout

    err = sys.stderr

    if sys.argv[1] == "category_map":

        fetcher = get_dbfetcher()
        gen = fetcher.all_article_categories()

        err.write("SQL: %s\n" % fetcher.cmd)
        for counter, (id, cat) in enumerate(gen):

            if counter == 0:
                sys.stderr.write("[ <time interval> ] Start...\n")

            fo.write("%s %s\n" % (id, cat))
            if counter % OUTPUT_SPARSITY == 0:
                sys.stderr.write("[ %s ] %d categories parsed...\n" %
                                 (time_interval(), counter))

    elif sys.argv[1] == "category_fix":
        for i, l in enumerate(fi):
            lstr = l.strip()

            if len(lstr):
                car, cdr = string.split(lstr, " ", 1)
                fo.write("%s %s\n" % (car, string.replace(cdr, " ", "_")))

    elif sys.argv[1] == 'dump_categories':
        fetcher = get_dbfetcher()

        for i, (id, cat) in enumerate(fetcher.categories()):
            fo.write("%s %s\n" % (id, cat))

            if i == 0:
                err.write("SQL: %s\n", fetcher.cmd)

            if i % OUTPUT_SPARSITY == 0:
                err.write("[ %s ] %d categories dumped (current: %s)\n" %
                          (time_interval(), i, cat))

    else:
        sys.stderr.write(_main.__doc__.strip() + "\n")