from easiparse import importer, extractor, cooccurrences import logging logging.basicConfig(level=logging.DEBUG, format="%(levelname)-8s %(message)s") def get_parser(): parser = OptionParser() parser.add_option("-e", "--execute", dest="execute", help="execution action") return parser if __name__ == "__main__": parser = get_parser() (options, args) = parser.parse_args() print options, args config = yaml.load( open( "config.yaml", 'rU' ) ) if options.execute=='import': importer.main(config) if options.execute=='extract': extractor.main(config) if options.execute=='cooccurrences': cooccurrences.main(config) if options.execute=='exportcooc': cooccurrences.exportcooc(config)
def worker(config, input_path, mongodb, limit=None): try: isi_file = codecs.open(input_path, "rU", encoding="ascii",\ errors="replace") except Exception, exc: logging.error("Error reading file %s"%input_path) return output_file = codecs.open( join(config['output_path'], split(input_path)[1]),\ "w+", encoding="ascii", errors="replace") subtotal = importer.main( isi_file, config, output_file, mongodb, limit=limit ) logging.debug("extracted %d matching notices in %s"%(subtotal, isi_file)) if __name__ == "__main__": config = yaml.load( open( "config.yaml", 'rU' ) ) glob_list = glob(config['input_path']) mongodb = pymongo.Connection(config['mongo_host'],\ config['mongo_port'])[config['mongo_db_name']] for input_path in glob_list: reactor.callInThread(worker, config, input_path, mongodb, limit=None) #asyncparser = AsyncParse(config, input_path, mongodb, None)