def test_index_by_fk_1(self): columns = {'id': '1', 'area': '2', 'type': '3'} parsed_message = Message(1, 'area_alias', columns, 'delete') handler.SCHEMA = SCHEMA self.handler = handler.Handler(SCHEMA.keys()) for entity_type, entity in SCHEMA.items(): self.handler.cores[entity_type] = mock.Mock() self.handler._index_by_fk(parsed_message) calls = self.handler.db_session().execute.call_args_list self.assertEqual(len(calls), 6) actual_queries = [str(call[0][0]) for call in calls] expected_queries = [ 'SELECT place_1.id AS place_1_id \n' 'FROM musicbrainz.place AS place_1 JOIN musicbrainz.area ON musicbrainz.area.id = place_1.area \n' 'WHERE musicbrainz.area.id = :id_1', 'SELECT label_1.id AS label_1_id \n' 'FROM musicbrainz.label AS label_1 JOIN musicbrainz.area ON musicbrainz.area.id = label_1.area \n' 'WHERE musicbrainz.area.id = :id_1', 'SELECT artist_1.id AS artist_1_id \n' 'FROM musicbrainz.artist AS artist_1 JOIN musicbrainz.area ON musicbrainz.area.id = artist_1.end_area \n' 'WHERE musicbrainz.area.id = :id_1', 'SELECT artist_1.id AS artist_1_id \n' 'FROM musicbrainz.artist AS artist_1 JOIN musicbrainz.area ON musicbrainz.area.id = artist_1.area \n' 'WHERE musicbrainz.area.id = :id_1', 'SELECT artist_1.id AS artist_1_id \n' 'FROM musicbrainz.artist AS artist_1 JOIN musicbrainz.area ON musicbrainz.area.id = artist_1.begin_area \n' 'WHERE musicbrainz.area.id = :id_1', 'SELECT musicbrainz.area.id AS musicbrainz_area_id \n' 'FROM musicbrainz.area \n' 'WHERE musicbrainz.area.id = :id_1' ] self.assertEqual(expected_queries, actual_queries)
def __init__(self): self.cores = {} for core_name in SCHEMA.keys(): self.cores[core_name] = solr_connection(core_name) solr_version_check(core_name) # Used to define the batch size of the pending messages list try: self.batch_size = config.CFG.getint("sir", "live_index_batch_size") except (NoOptionError, AttributeError): self.batch_size = 1 # Defines how long the handler should wait before processing messages. # Used to trigger the process_message callback to prevent starvation # in pending_messages in case it doesn't fill up to batch_size try: self.process_delay = config.CFG.getint("sir", "process_delay") except (NoOptionError, AttributeError): self.process_delay = 120 logger.info("Batch size is set to %s", self.batch_size) logger.info("Process delay is set to %s seconds", self.process_delay) self.db_session = db_session() self.pending_messages = [] self.pending_entities = defaultdict(set) self.process_timer = ReusableTimer(self.process_delay, self.process_messages) self.queue_lock = Lock()
def __init__(self): self.cores = {} #: Maps entity type names to Solr cores for corename in SCHEMA.keys(): self.cores[corename] = solr_connection(corename) solr_version_check(corename) self.session = db_session( ) #: The database session used by the handler
def generate_func(args): """ This is the entry point for this trigger_generation module. This function gets called from :func:`~sir.__main__.main`. """ generate(trigger_filename=args["trigger_file"], function_filename=args["function_file"], broker_id=args["broker_id"], entities=args["entity_type"] or SCHEMA.keys())
def test_index_by_fk_3(self): columns = {'release_group': 1} parsed_message = Message(1, 'release', columns, 'delete') handler.SCHEMA = SCHEMA self.handler = handler.Handler(SCHEMA.keys()) for entity_type, entity in SCHEMA.items(): self.handler.cores[entity_type] = mock.Mock() self.handler._index_by_fk(parsed_message) calls = self.handler.db_session().execute.call_args_list self.assertEqual(len(calls), 1) actual_queries = [str(call[0][0]) for call in calls] expected_queries = [ 'SELECT musicbrainz.release_group.id AS musicbrainz_release_group_id \n' 'FROM musicbrainz.release_group \n' 'WHERE musicbrainz.release_group.id = :id_1' ] self.assertEqual(expected_queries, actual_queries)
def watch(args): """ Watch AMQP queues for messages. :param [str] entity_type: Entity types to watch. """ try: create_amqp_connection() except socket_error as e: logger.error("Couldn't connect to RabbitMQ, check your settings. %s", e) exit(1) try: entities = args["entity_type"] or SCHEMA.keys() _watch_impl(entities) except URLError as e: logger.error("Connecting to Solr failed: %s", e) exit(1)
def from_amqp_message(cls, queue, amqp_message): """ Parses an AMQP message. :param str queue: The queue name :param amqp.basic_message.Message amqp_message: :rtype: :class:`sir.amqp.message.Message` :raises sir.amqp.message.InvalidMessageContentException: If the message content could not be parsed :raises ValueError: If the entity type in the message was invalid or any of the IDs was not numeric (in case ``type`` is not :data:`MESSAGE_TYPES.delete`) or the queue is unknown """ if queue not in QUEUE_TO_TYPE.keys(): raise ValueError("%s is not a valid queue name" % queue) else: message_type = QUEUE_TO_TYPE[queue] dbg_msg = amqp_message.body if len(dbg_msg) > 20: dbg_msg = dbg_msg[:20] + "..." logger.debug("Recieved message from queue %s: %s" % (queue, dbg_msg)) split_message = amqp_message.body.split(" ") if not len(split_message) >= 2: raise InvalidMessageContentException("AMQP messages must at least " "contain 2 entries separated" " by spaces") entity_type = split_message[0].replace("_", "-") if entity_type == "release-raw": # See https://git.io/vDcdo entity_type = "cdstub" if entity_type not in SCHEMA.keys(): raise ValueError("Received a message with the invalid entity type " "%s" % entity_type) ids = split_message[1:] return cls(message_type, entity_type, ids)
def __init__(self): self.cores = {} for core_name in SCHEMA.keys(): self.cores[core_name] = solr_connection(core_name) solr_version_check(core_name) # Used to define the batch size of the pending messages list try: self.batch_size = config.CFG.getint("sir", "live_index_batch_size") except (NoOptionError, AttributeError): self.batch_size = 1 # Defines how long the handler should wait before processing messages. # Used to trigger the process_message callback to prevent starvation # in pending_messages in case it doesn't fill up to batch_size try: self.process_delay = config.CFG.getint("sir", "process_delay") except (NoOptionError, AttributeError): self.process_delay = 120 # Used to limit the number of queried rows from PGSQL. Anything above this limit # raises a INDEX_LIMIT_EXCEEDED error try: self.index_limit = config.CFG.getint("sir", "index_limit") except (NoOptionError, AttributeError): self.index_limit = 40000 logger.info("Batch size is set to %s", self.batch_size) logger.info("Process delay is set to %s seconds", self.process_delay) logger.info("Index limit is set to %s rows", self.index_limit) self.db_session = db_session() self.pending_messages = [] self.pending_entities = defaultdict(set) self.processing = False self.channel = None self.connection = None self.last_message = time.time()
def setUp(self): super(CallbackWrapperTest, self).setUp() self.handler = handler.Handler(SCHEMA.keys()) self.channel = self.handler.channel = mock.MagicMock() self.handler.connection = mock.MagicMock()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-d", "--debug", action="store_true") parser.add_argument("--sqltimings", action="store_true") subparsers = parser.add_subparsers() reindex_parser = subparsers.add_parser("reindex", help="Reindexes all or a single " "entity type") reindex_parser.set_defaults(func=reindex) reindex_parser.add_argument('--entity-type', action='append', help="Which entity types to index.", choices=SCHEMA.keys()) generate_trigger_parser = subparsers.add_parser("triggers", help="Generate triggers") generate_trigger_parser.set_defaults(func=generate_triggers) generate_trigger_parser.add_argument('-t', '--trigger-file', action="store", default="sql/CreateTriggers.sql", help="The filename to save the " "triggers into") generate_trigger_parser.add_argument('-f', '--function-file', action="store", default="sql/CreateFunctions.sql", help="The filename to save the " "functions into") generate_trigger_parser.add_argument('-bid', '--broker-id', action="store", default="1", help="ID of the AMQP broker row " "in the database.") amqp_setup_parser = subparsers.add_parser("amqp_setup", help="Set up AMQP exchanges and " "queues") amqp_setup_parser.set_defaults(func=setup_rabbitmq) amqp_watch_parser = subparsers.add_parser("amqp_watch", help="Watch AMQP queues for " "changes") amqp_watch_parser.set_defaults(func=watch) args = parser.parse_args() if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) loghandler = logging.StreamHandler() if args.debug: formatter = logging.Formatter(fmt="%(processName)s %(asctime)s " "%(levelname)s: %(message)s") else: formatter = logging.Formatter(fmt="%(asctime)s: %(message)s") loghandler.setFormatter(formatter) logger.addHandler(loghandler) mplogger = multiprocessing.get_logger() mplogger.setLevel(logging.ERROR) mplogger.addHandler(loghandler) if args.sqltimings: from sqlalchemy import event from sqlalchemy.engine import Engine import time sqltimelogger = logging.getLogger("sqltimer") sqltimelogger.setLevel(logging.DEBUG) sqltimelogger.addHandler(loghandler) @event.listens_for(Engine, "before_cursor_execute") def before_cursor_execute(conn, cursor, statement, parameters, context, executemany): conn.info.setdefault('query_start_time', []).append(time.time()) sqltimelogger.debug("Start Query: %s", statement) @event.listens_for(Engine, "after_cursor_execute") def after_cursor_execute(conn, cursor, statement, parameters, context, executemany): total = time.time() - conn.info['query_start_time'].pop(-1) sqltimelogger.debug("Query Complete!") sqltimelogger.debug("Total Time: %f", total) config.read_config() try: init_raven_client(config.CFG.get("sentry", "dsn")) except ConfigParser.Error as e: logger.info("Skipping Raven client initialization. Configuration issue: %s", e) func = args.func args = vars(args) func(args)