def _run_indexer(options): logger.info("Starting indexer %s:%s ..." % (options.host, options.port)) # initialize crawler service = WaveformIndexer((options.host, options.port), MyHandler) service.log = logger try: # prepare paths if ',' in options.data: paths = options.data.split(',') else: paths = [options.data] paths = service._prepare_paths(paths) if not paths: return # prepare map file if options.mapping_file: with open(options.mapping_file, 'r') as f: data = f.readlines() mappings = parse_mapping_data(data) logger.info("Parsed %d lines from mapping file %s" % (len(data), options.mapping_file)) else: mappings = {} # create file queue and worker processes manager = multiprocessing.Manager() in_queue = manager.dict() work_queue = manager.list() out_queue = manager.list() log_queue = manager.list() # spawn processes for i in range(options.number_of_cpus): args = (i, in_queue, work_queue, out_queue, log_queue, mappings) p = multiprocessing.Process(target=worker, args=args) p.daemon = True p.start() # connect to database engine = create_engine(options.db_uri, encoding='utf-8', convert_unicode=True) metadata = Base.metadata # recreate database if options.drop_database: metadata.drop_all(engine, checkfirst=True) metadata.create_all(engine, checkfirst=True) # initialize database + options _session = sessionmaker(bind=engine) service.session = _session service.options = options service.mappings = mappings # set queues service.input_queue = in_queue service.work_queue = work_queue service.output_queue = out_queue service.log_queue = log_queue service.paths = paths service._reset_walker() service._step_walker() service.serve_forever(options.poll_interval) except KeyboardInterrupt: quit() logger.info("Indexer stopped.")
def _run_indexer(options): logging.info("Starting indexer %s:%s ..." % (options.host, options.port)) # initialize crawler service = WaveformIndexer((options.host, options.port), MyHandler) service.log = logging try: # prepare paths if ',' in options.data: paths = options.data.split(',') else: paths = [options.data] paths = service._prepare_paths(paths) if not paths: return # prepare map file if options.mapping_file: with open(options.mapping_file, 'r') as f: data = f.readlines() mappings = parse_mapping_data(data) logging.info("Parsed %d lines from mapping file %s" % (len(data), options.mapping_file)) else: mappings = {} # create file queue and worker processes manager = multiprocessing.Manager() in_queue = manager.dict() work_queue = manager.list() out_queue = manager.list() log_queue = manager.list() # spawn processes for i in range(options.number_of_cpus): args = (i, in_queue, work_queue, out_queue, log_queue, mappings) p = multiprocessing.Process(target=worker, args=args) p.daemon = True p.start() # connect to database engine = create_engine(options.db_uri, encoding=native_str('utf-8'), convert_unicode=True) metadata = Base.metadata # recreate database if options.drop_database: metadata.drop_all(engine, checkfirst=True) metadata.create_all(engine, checkfirst=True) # initialize database + options _session = sessionmaker(bind=engine) service.session = _session service.options = options service.mappings = mappings # set queues service.input_queue = in_queue service.work_queue = work_queue service.output_queue = out_queue service.log_queue = log_queue service.paths = paths service._reset_walker() service._step_walker() service.serve_forever(options.poll_interval) except KeyboardInterrupt: quit() logging.info("Indexer stopped.")
def test_parseMappingData(self): """ Tests for function parse_mapping_data. """ # 1 data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 1970-01-01 2007-12-31", "BW.MANZ.00.EHE GE.ROTZ..EHZ 2008-01-01", " ", ".MANZ.00.EHE GE.ROTZ..EHZ", "# comment", "BW...EHE GE.ROTZ..EHZ"] results = parse_mapping_data(data) self.assertEqual(len(results['.MANZ.00.EHE']), 1) self.assertEqual(results['.MANZ.00.EHE'][0]['network'], 'GE') self.assertEqual(results['.MANZ.00.EHE'][0]['station'], 'ROTZ') self.assertEqual(results['.MANZ.00.EHE'][0]['location'], '') self.assertEqual(results['.MANZ.00.EHE'][0]['channel'], 'EHZ') self.assertEqual(results['.MANZ.00.EHE'][0]['starttime'], None) self.assertEqual(results['.MANZ.00.EHE'][0]['endtime'], None) self.assertEqual(len(results['BW.MANZ.00.EHE']), 2) self.assertEqual(len(results['BW...EHE']), 1) # 2 invalid ids data = ["BWMANZ00EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parse_mapping_data, data) data = ["BW.MANZ.00EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parse_mapping_data, data) data = ["BW.MANZ.00.EHE. GE.ROTZ..EHZ"] self.assertRaises(Exception, parse_mapping_data, data) data = ["XXX.MANZ.00.EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parse_mapping_data, data) data = ["BW.XXXXXX.00.EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parse_mapping_data, data) data = ["BW.MANZ.XXX.EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parse_mapping_data, data) data = ["BW.MANZ.00.XXXX GE.ROTZ..EHZ"] self.assertRaises(Exception, parse_mapping_data, data) # 3 invalid date/times data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 2008 2009"] self.assertRaises(Exception, parse_mapping_data, data) data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 2009-01-01 2008-01-01"] self.assertRaises(Exception, parse_mapping_data, data)