def parse(engine): for paths in traverse_local(): parse_tender(engine, paths) def parse_threaded(engine): def fnc(p): parse_tender(engine, p) threaded(traverse_local(), fnc, num_threads=10) if __name__ == '__main__': p = OptionParser() p.add_option("--year", dest="year", type=int, default=None) p.add_option("--num", dest="num", type=int, default=None) options, args = p.parse_args() engine = get_engine() if options.year and options.num: paths = generate_paths(options.year, options.num) if paths is not None: parse_tender(engine, paths) else: parse_threaded(engine) #parse(engine)
'title': cpv_title }, ['document_uri', 'code']) engine['document'].upsert(data, ['uri']) extract_plain(engine, data['uri'], lang_doc) #print data['uri'] def parse(engine): for paths in traverse_local(): parse_tender(engine, paths) if __name__ == '__main__': if "DATABASE" in os.environ: db_addr = os.environ['DATABASE'] else: db_addr = 'postgresql://localhost/opented' p = OptionParser() p.add_option("--db", dest="database", default=db_addr) p.add_option("--year", dest="year", type=int, default=None) p.add_option("--num", dest="num", type=int, default=None) options, args = p.parse_args() engine = dataset.connect(options.database) if options.year and options.num: parse_tender(engine, generate_paths(options.year, options.num)) else: parse(engine)