Ejemplo n.º 1
0
def main(args):
    for filepath in args.files:
        try:
            load_func = EXT_FORMAT[filepath.rpartition('.')[-1]]
        except KeyError:
            print "Unrecognized format extension: {}".format(filepath)
        else:
            with open(filepath, 'rb') as fp:
                count = load_func(report_progress(open(filepath, 'rb')))
                print "Loaded {} documents from {}".format(count, filepath)
Ejemplo n.º 2
0
def main(args):
    indexer = search.IndexerContext(settings.XAPIAN_DB)
    with indexer as conn:
        search.create_index(conn)

    count = 0
    with indexer as conn, open(args.file) as fp:
        for count, line in enumerate(report_progress(fp)):
            key, cat = line.strip().split('\t')
            cat = eval(cat)
            if isinstance(cat, list):
                cat = cat[0]
            doc = conn.get_document(key)
            doc.add_term('category', cat)
            indexer.conn.replace(doc)

    print "Updated {} documents".format(count)
Ejemplo n.º 3
0
def main(args):
    indexer = search.IndexerContext(settings.XAPIAN_DB)
    with indexer as conn:
        search.create_index(conn)

    count = 0
    with indexer as conn, open(args.file) as fp:
        for count, line in enumerate(report_progress(fp)):
            key, cat = line.strip().split('\t')
            cat = eval(cat)
            if isinstance(cat, list):
                cat = cat[0]
            doc = conn.get_document(key)
            doc.add_term('category', cat)
            indexer.conn.replace(doc)

    print "Updated {} documents".format(count)