def _runIndexer(options): logging.info("Starting indexer %s:%s ..." % (options.host, options.port)) # initialize crawler service = WaveformIndexer((options.host, options.port), MyHandler) service.log = logging try: # prepare paths if ',' in options.data: paths = options.data.split(',') else: paths = [options.data] paths = service._preparePaths(paths) if not paths: return # prepare map file if options.mapping_file: data = open(options.mapping_file, 'r').readlines() mappings = parseMappingData(data) logging.info("Parsed %d lines from mapping file %s" % (len(data), options.mapping_file)) else: mappings = {} # create file queue and worker processes manager = multiprocessing.Manager() in_queue = manager.dict() work_queue = manager.list() out_queue = manager.list() log_queue = manager.list() # spawn processes for i in range(options.number_of_cpus): args = (i, in_queue, work_queue, out_queue, log_queue, mappings) p = multiprocessing.Process(target=worker, args=args) p.daemon = True p.start() # connect to database engine = create_engine(options.db_uri, encoding='utf-8', convert_unicode=True) metadata = Base.metadata # recreate database if options.drop_database: metadata.drop_all(engine, checkfirst=True) metadata.create_all(engine, checkfirst=True) # initialize database + options Session = sessionmaker(bind=engine) service.session = Session service.options = options service.mappings = mappings # set queues service.input_queue = in_queue service.work_queue = work_queue service.output_queue = out_queue service.log_queue = log_queue service.paths = paths service._resetWalker() service._stepWalker() service.serve_forever(options.poll_interval) except KeyboardInterrupt: quit() logging.info("Indexer stopped.")
def _runIndexer(options): logging.info("Starting indexer %s:%s ..." % (options.host, options.port)) # initialize crawler service = WaveformIndexer((options.host, options.port), MyHandler) service.log = logging try: # prepare paths if ',' in options.data: paths = options.data.split(',') else: paths = [options.data] paths = service._preparePaths(paths) if not paths: return # prepare map file if options.map_file: data = open(options.map_file, 'r').readlines() mappings = parseMappingData(data) logging.info("Parsed %d lines from mapping file %s" % \ (len(data), options.map_file)) else: mappings = {} # create file queue and worker processes manager = multiprocessing.Manager() in_queue = manager.dict() work_queue = manager.list() out_queue = manager.list() log_queue = manager.list() # spawn processes for i in range(options.number_of_cpus): args = (i, in_queue, work_queue, out_queue, log_queue, mappings) p = multiprocessing.Process(target=worker, args=args) p.daemon = True p.start() # connect to database engine = create_engine(options.db_uri, encoding='utf-8', convert_unicode=True) metadata = Base.metadata # recreate database if options.drop_database: metadata.drop_all(engine, checkfirst=True) metadata.create_all(engine, checkfirst=True) # initialize database + options Session = sessionmaker(bind=engine) service.session = Session service.options = options service.mappings = mappings # set queues service.input_queue = in_queue service.work_queue = work_queue service.output_queue = out_queue service.log_queue = log_queue service.paths = paths service._resetWalker() service._stepWalker() service.serve_forever(options.poll_interval) except KeyboardInterrupt: quit() logging.info("Indexer stopped.")
def test_parseMappingData(self): """ Tests for function parseMappingData. """ # 1 data = [ "BW.MANZ.00.EHE GE.ROTZ..EHZ 1970-01-01 2007-12-31", "BW.MANZ.00.EHE GE.ROTZ..EHZ 2008-01-01", " ", ".MANZ.00.EHE GE.ROTZ..EHZ", "# comment", "BW...EHE GE.ROTZ..EHZ", ] results = parseMappingData(data) self.assertEqual(len(results[".MANZ.00.EHE"]), 1) self.assertEqual(results[".MANZ.00.EHE"][0]["network"], "GE") self.assertEqual(results[".MANZ.00.EHE"][0]["station"], "ROTZ") self.assertEqual(results[".MANZ.00.EHE"][0]["location"], "") self.assertEqual(results[".MANZ.00.EHE"][0]["channel"], "EHZ") self.assertEqual(results[".MANZ.00.EHE"][0]["starttime"], None) self.assertEqual(results[".MANZ.00.EHE"][0]["endtime"], None) self.assertEqual(len(results["BW.MANZ.00.EHE"]), 2) self.assertEqual(len(results["BW...EHE"]), 1) # 2 invalid ids data = ["BWMANZ00EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.00EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.00.EHE. GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["XXX.MANZ.00.EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.XXXXXX.00.EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.XXX.EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.00.XXXX GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) # 3 invalid date/times data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 2008 2009"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 2009-01-01 2008-01-01"] self.assertRaises(Exception, parseMappingData, data)
def test_parseMappingData(self): """ Tests for function parseMappingData. """ #1 data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 1970-01-01 2007-12-31", "BW.MANZ.00.EHE GE.ROTZ..EHZ 2008-01-01", " ", ".MANZ.00.EHE GE.ROTZ..EHZ", "# comment", "BW...EHE GE.ROTZ..EHZ"] results = parseMappingData(data) self.assertEquals(len(results['.MANZ.00.EHE']), 1) self.assertEquals(results['.MANZ.00.EHE'][0]['network'], 'GE') self.assertEquals(results['.MANZ.00.EHE'][0]['station'], 'ROTZ') self.assertEquals(results['.MANZ.00.EHE'][0]['location'], '') self.assertEquals(results['.MANZ.00.EHE'][0]['channel'], 'EHZ') self.assertEquals(results['.MANZ.00.EHE'][0]['starttime'], None) self.assertEquals(results['.MANZ.00.EHE'][0]['endtime'], None) self.assertEquals(len(results['BW.MANZ.00.EHE']), 2) self.assertEquals(len(results['BW...EHE']), 1) #2 invalid ids data = ["BWMANZ00EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.00EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.00.EHE. GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["XXX.MANZ.00.EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.XXXXXX.00.EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.XXX.EHE GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.00.XXXX GE.ROTZ..EHZ"] self.assertRaises(Exception, parseMappingData, data) #3 invalid date/times data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 2008 2009"] self.assertRaises(Exception, parseMappingData, data) data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 2009-01-01 2008-01-01"] self.assertRaises(Exception, parseMappingData, data)