Beispiel #1
0
def _run_indexer(options):
    logger.info("Starting indexer %s:%s ..." % (options.host, options.port))
    # initialize crawler
    service = WaveformIndexer((options.host, options.port), MyHandler)
    service.log = logger
    try:
        # prepare paths
        if ',' in options.data:
            paths = options.data.split(',')
        else:
            paths = [options.data]
        paths = service._prepare_paths(paths)
        if not paths:
            return
        # prepare map file
        if options.mapping_file:
            with open(options.mapping_file, 'r') as f:
                data = f.readlines()
            mappings = parse_mapping_data(data)
            logger.info("Parsed %d lines from mapping file %s" %
                        (len(data), options.mapping_file))
        else:
            mappings = {}
        # create file queue and worker processes
        manager = multiprocessing.Manager()
        in_queue = manager.dict()
        work_queue = manager.list()
        out_queue = manager.list()
        log_queue = manager.list()
        # spawn processes
        for i in range(options.number_of_cpus):
            args = (i, in_queue, work_queue, out_queue, log_queue, mappings)
            p = multiprocessing.Process(target=worker, args=args)
            p.daemon = True
            p.start()
        # connect to database
        engine = create_engine(options.db_uri, encoding='utf-8',
                               convert_unicode=True)
        metadata = Base.metadata
        # recreate database
        if options.drop_database:
            metadata.drop_all(engine, checkfirst=True)
        metadata.create_all(engine, checkfirst=True)
        # initialize database + options
        _session = sessionmaker(bind=engine)
        service.session = _session
        service.options = options
        service.mappings = mappings
        # set queues
        service.input_queue = in_queue
        service.work_queue = work_queue
        service.output_queue = out_queue
        service.log_queue = log_queue
        service.paths = paths
        service._reset_walker()
        service._step_walker()
        service.serve_forever(options.poll_interval)
    except KeyboardInterrupt:
        quit()
    logger.info("Indexer stopped.")
Beispiel #2
0
def _run_indexer(options):
    logging.info("Starting indexer %s:%s ..." % (options.host, options.port))
    # initialize crawler
    service = WaveformIndexer((options.host, options.port), MyHandler)
    service.log = logging
    try:
        # prepare paths
        if ',' in options.data:
            paths = options.data.split(',')
        else:
            paths = [options.data]
        paths = service._prepare_paths(paths)
        if not paths:
            return
        # prepare map file
        if options.mapping_file:
            with open(options.mapping_file, 'r') as f:
                data = f.readlines()
            mappings = parse_mapping_data(data)
            logging.info("Parsed %d lines from mapping file %s" %
                         (len(data), options.mapping_file))
        else:
            mappings = {}
        # create file queue and worker processes
        manager = multiprocessing.Manager()
        in_queue = manager.dict()
        work_queue = manager.list()
        out_queue = manager.list()
        log_queue = manager.list()
        # spawn processes
        for i in range(options.number_of_cpus):
            args = (i, in_queue, work_queue, out_queue, log_queue, mappings)
            p = multiprocessing.Process(target=worker, args=args)
            p.daemon = True
            p.start()
        # connect to database
        engine = create_engine(options.db_uri, encoding=native_str('utf-8'),
                               convert_unicode=True)
        metadata = Base.metadata
        # recreate database
        if options.drop_database:
            metadata.drop_all(engine, checkfirst=True)
        metadata.create_all(engine, checkfirst=True)
        # initialize database + options
        _session = sessionmaker(bind=engine)
        service.session = _session
        service.options = options
        service.mappings = mappings
        # set queues
        service.input_queue = in_queue
        service.work_queue = work_queue
        service.output_queue = out_queue
        service.log_queue = log_queue
        service.paths = paths
        service._reset_walker()
        service._step_walker()
        service.serve_forever(options.poll_interval)
    except KeyboardInterrupt:
        quit()
    logging.info("Indexer stopped.")
Beispiel #3
0
 def test_parseMappingData(self):
     """
     Tests for function parse_mapping_data.
     """
     # 1
     data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 1970-01-01 2007-12-31",
             "BW.MANZ.00.EHE GE.ROTZ..EHZ 2008-01-01",
             " ",
             ".MANZ.00.EHE GE.ROTZ..EHZ",
             "# comment",
             "BW...EHE GE.ROTZ..EHZ"]
     results = parse_mapping_data(data)
     self.assertEqual(len(results['.MANZ.00.EHE']), 1)
     self.assertEqual(results['.MANZ.00.EHE'][0]['network'], 'GE')
     self.assertEqual(results['.MANZ.00.EHE'][0]['station'], 'ROTZ')
     self.assertEqual(results['.MANZ.00.EHE'][0]['location'], '')
     self.assertEqual(results['.MANZ.00.EHE'][0]['channel'], 'EHZ')
     self.assertEqual(results['.MANZ.00.EHE'][0]['starttime'], None)
     self.assertEqual(results['.MANZ.00.EHE'][0]['endtime'], None)
     self.assertEqual(len(results['BW.MANZ.00.EHE']), 2)
     self.assertEqual(len(results['BW...EHE']), 1)
     # 2 invalid ids
     data = ["BWMANZ00EHE GE.ROTZ..EHZ"]
     self.assertRaises(Exception, parse_mapping_data, data)
     data = ["BW.MANZ.00EHE GE.ROTZ..EHZ"]
     self.assertRaises(Exception, parse_mapping_data, data)
     data = ["BW.MANZ.00.EHE. GE.ROTZ..EHZ"]
     self.assertRaises(Exception, parse_mapping_data, data)
     data = ["XXX.MANZ.00.EHE GE.ROTZ..EHZ"]
     self.assertRaises(Exception, parse_mapping_data, data)
     data = ["BW.XXXXXX.00.EHE GE.ROTZ..EHZ"]
     self.assertRaises(Exception, parse_mapping_data, data)
     data = ["BW.MANZ.XXX.EHE GE.ROTZ..EHZ"]
     self.assertRaises(Exception, parse_mapping_data, data)
     data = ["BW.MANZ.00.XXXX GE.ROTZ..EHZ"]
     self.assertRaises(Exception, parse_mapping_data, data)
     # 3 invalid date/times
     data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 2008 2009"]
     self.assertRaises(Exception, parse_mapping_data, data)
     data = ["BW.MANZ.00.EHE GE.ROTZ..EHZ 2009-01-01 2008-01-01"]
     self.assertRaises(Exception, parse_mapping_data, data)