Exemple #1
0
    def test_index_by_fk_1(self):
        columns = {'id': '1', 'area': '2', 'type': '3'}
        parsed_message = Message(1, 'area_alias', columns, 'delete')
        handler.SCHEMA = SCHEMA
        self.handler = handler.Handler(SCHEMA.keys())
        for entity_type, entity in SCHEMA.items():
            self.handler.cores[entity_type] = mock.Mock()

        self.handler._index_by_fk(parsed_message)
        calls = self.handler.db_session().execute.call_args_list
        self.assertEqual(len(calls), 6)
        actual_queries = [str(call[0][0]) for call in calls]
        expected_queries = [
            'SELECT place_1.id AS place_1_id \n'
            'FROM musicbrainz.place AS place_1 JOIN musicbrainz.area ON musicbrainz.area.id = place_1.area \n'
            'WHERE musicbrainz.area.id = :id_1',
            'SELECT label_1.id AS label_1_id \n'
            'FROM musicbrainz.label AS label_1 JOIN musicbrainz.area ON musicbrainz.area.id = label_1.area \n'
            'WHERE musicbrainz.area.id = :id_1',
            'SELECT artist_1.id AS artist_1_id \n'
            'FROM musicbrainz.artist AS artist_1 JOIN musicbrainz.area ON musicbrainz.area.id = artist_1.end_area \n'
            'WHERE musicbrainz.area.id = :id_1',
            'SELECT artist_1.id AS artist_1_id \n'
            'FROM musicbrainz.artist AS artist_1 JOIN musicbrainz.area ON musicbrainz.area.id = artist_1.area \n'
            'WHERE musicbrainz.area.id = :id_1',
            'SELECT artist_1.id AS artist_1_id \n'
            'FROM musicbrainz.artist AS artist_1 JOIN musicbrainz.area ON musicbrainz.area.id = artist_1.begin_area \n'
            'WHERE musicbrainz.area.id = :id_1',
            'SELECT musicbrainz.area.id AS musicbrainz_area_id \n'
            'FROM musicbrainz.area \n'
            'WHERE musicbrainz.area.id = :id_1'
        ]

        self.assertEqual(expected_queries, actual_queries)
Exemple #2
0
    def __init__(self):
        self.cores = {}
        for core_name in SCHEMA.keys():
            self.cores[core_name] = solr_connection(core_name)
            solr_version_check(core_name)

        # Used to define the batch size of the pending messages list
        try:
            self.batch_size = config.CFG.getint("sir", "live_index_batch_size")
        except (NoOptionError, AttributeError):
            self.batch_size = 1
        # Defines how long the handler should wait before processing messages.
        # Used to trigger the process_message callback to prevent starvation
        # in pending_messages in case it doesn't fill up to batch_size
        try:
            self.process_delay = config.CFG.getint("sir", "process_delay")
        except (NoOptionError, AttributeError):
            self.process_delay = 120

        logger.info("Batch size is set to %s", self.batch_size)
        logger.info("Process delay is set to %s seconds", self.process_delay)

        self.db_session = db_session()
        self.pending_messages = []
        self.pending_entities = defaultdict(set)
        self.process_timer = ReusableTimer(self.process_delay,
                                           self.process_messages)
        self.queue_lock = Lock()
Exemple #3
0
    def __init__(self):
        self.cores = {}  #: Maps entity type names to Solr cores
        for corename in SCHEMA.keys():
            self.cores[corename] = solr_connection(corename)
            solr_version_check(corename)

        self.session = db_session(
        )  #: The database session used by the handler
Exemple #4
0
def generate_func(args):
    """
    This is the entry point for this trigger_generation module. This function
    gets called from :func:`~sir.__main__.main`.
    """
    generate(trigger_filename=args["trigger_file"],
             function_filename=args["function_file"],
             broker_id=args["broker_id"],
             entities=args["entity_type"] or SCHEMA.keys())
Exemple #5
0
    def test_index_by_fk_3(self):
        columns = {'release_group': 1}
        parsed_message = Message(1, 'release', columns, 'delete')
        handler.SCHEMA = SCHEMA
        self.handler = handler.Handler(SCHEMA.keys())
        for entity_type, entity in SCHEMA.items():
            self.handler.cores[entity_type] = mock.Mock()

        self.handler._index_by_fk(parsed_message)
        calls = self.handler.db_session().execute.call_args_list
        self.assertEqual(len(calls), 1)
        actual_queries = [str(call[0][0]) for call in calls]
        expected_queries = [
            'SELECT musicbrainz.release_group.id AS musicbrainz_release_group_id \n'
            'FROM musicbrainz.release_group \n'
            'WHERE musicbrainz.release_group.id = :id_1'
        ]
        self.assertEqual(expected_queries, actual_queries)
Exemple #6
0
def watch(args):
    """
    Watch AMQP queues for messages.

    :param [str] entity_type: Entity types to watch.
    """
    try:
        create_amqp_connection()
    except socket_error as e:
        logger.error("Couldn't connect to RabbitMQ, check your settings. %s", e)
        exit(1)

    try:
        entities = args["entity_type"] or SCHEMA.keys()
        _watch_impl(entities)
    except URLError as e:
        logger.error("Connecting to Solr failed: %s", e)
        exit(1)
Exemple #7
0
    def from_amqp_message(cls, queue, amqp_message):
        """
        Parses an AMQP message.

        :param str queue: The queue name
        :param amqp.basic_message.Message amqp_message:
        :rtype: :class:`sir.amqp.message.Message`
        :raises sir.amqp.message.InvalidMessageContentException: If the message
                content could not be parsed
        :raises ValueError: If the entity type in the message was invalid or
                any of the IDs was not numeric (in case ``type`` is not
                :data:`MESSAGE_TYPES.delete`) or the queue is unknown
        """
        if queue not in QUEUE_TO_TYPE.keys():
            raise ValueError("%s is not a valid queue name" % queue)
        else:
            message_type = QUEUE_TO_TYPE[queue]

        dbg_msg = amqp_message.body
        if len(dbg_msg) > 20:
            dbg_msg = dbg_msg[:20] + "..."
        logger.debug("Recieved message from queue %s: %s" % (queue, dbg_msg))

        split_message = amqp_message.body.split(" ")
        if not len(split_message) >= 2:
            raise InvalidMessageContentException("AMQP messages must at least "
                                                 "contain 2 entries separated"
                                                 " by spaces")

        entity_type = split_message[0].replace("_", "-")
        if entity_type == "release-raw":  # See https://git.io/vDcdo
            entity_type = "cdstub"
        if entity_type not in SCHEMA.keys():
            raise ValueError("Received a message with the invalid entity type "
                             "%s"
                             % entity_type)

        ids = split_message[1:]

        return cls(message_type, entity_type, ids)
Exemple #8
0
    def __init__(self):
        self.cores = {}
        for core_name in SCHEMA.keys():
            self.cores[core_name] = solr_connection(core_name)
            solr_version_check(core_name)

        # Used to define the batch size of the pending messages list
        try:
            self.batch_size = config.CFG.getint("sir", "live_index_batch_size")
        except (NoOptionError, AttributeError):
            self.batch_size = 1
        # Defines how long the handler should wait before processing messages.
        # Used to trigger the process_message callback to prevent starvation
        # in pending_messages in case it doesn't fill up to batch_size
        try:
            self.process_delay = config.CFG.getint("sir", "process_delay")
        except (NoOptionError, AttributeError):
            self.process_delay = 120
        # Used to limit the number of queried rows from PGSQL. Anything above this limit
        # raises a INDEX_LIMIT_EXCEEDED error
        try:
            self.index_limit = config.CFG.getint("sir", "index_limit")
        except (NoOptionError, AttributeError):
            self.index_limit = 40000

        logger.info("Batch size is set to %s", self.batch_size)
        logger.info("Process delay is set to %s seconds", self.process_delay)
        logger.info("Index limit is set to %s rows", self.index_limit)

        self.db_session = db_session()
        self.pending_messages = []
        self.pending_entities = defaultdict(set)
        self.processing = False
        self.channel = None
        self.connection = None
        self.last_message = time.time()
Exemple #9
0
    def __init__(self):
        self.cores = {}
        for core_name in SCHEMA.keys():
            self.cores[core_name] = solr_connection(core_name)
            solr_version_check(core_name)

        # Used to define the batch size of the pending messages list
        try:
            self.batch_size = config.CFG.getint("sir", "live_index_batch_size")
        except (NoOptionError, AttributeError):
            self.batch_size = 1
        # Defines how long the handler should wait before processing messages.
        # Used to trigger the process_message callback to prevent starvation
        # in pending_messages in case it doesn't fill up to batch_size
        try:
            self.process_delay = config.CFG.getint("sir", "process_delay")
        except (NoOptionError, AttributeError):
            self.process_delay = 120
        # Used to limit the number of queried rows from PGSQL. Anything above this limit
        # raises a INDEX_LIMIT_EXCEEDED error
        try:
            self.index_limit = config.CFG.getint("sir", "index_limit")
        except (NoOptionError, AttributeError):
            self.index_limit = 40000

        logger.info("Batch size is set to %s", self.batch_size)
        logger.info("Process delay is set to %s seconds", self.process_delay)
        logger.info("Index limit is set to %s rows", self.index_limit)

        self.db_session = db_session()
        self.pending_messages = []
        self.pending_entities = defaultdict(set)
        self.processing = False
        self.channel = None
        self.connection = None
        self.last_message = time.time()
Exemple #10
0
 def setUp(self):
     super(CallbackWrapperTest, self).setUp()
     self.handler = handler.Handler(SCHEMA.keys())
     self.channel = self.handler.channel = mock.MagicMock()
     self.handler.connection = mock.MagicMock()
Exemple #11
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--debug", action="store_true")
    parser.add_argument("--sqltimings", action="store_true")
    subparsers = parser.add_subparsers()

    reindex_parser = subparsers.add_parser("reindex",
                                           help="Reindexes all or a single "
                                           "entity type")
    reindex_parser.set_defaults(func=reindex)
    reindex_parser.add_argument('--entity-type', action='append',
                                help="Which entity types to index.",
                                choices=SCHEMA.keys())

    generate_trigger_parser = subparsers.add_parser("triggers",
                                                    help="Generate triggers")
    generate_trigger_parser.set_defaults(func=generate_triggers)
    generate_trigger_parser.add_argument('-t', '--trigger-file',
                                         action="store",
                                         default="sql/CreateTriggers.sql",
                                         help="The filename to save the "
                                         "triggers into")
    generate_trigger_parser.add_argument('-f', '--function-file',
                                         action="store",
                                         default="sql/CreateFunctions.sql",
                                         help="The filename to save the "
                                         "functions into")
    generate_trigger_parser.add_argument('-bid', '--broker-id',
                                         action="store",
                                         default="1",
                                         help="ID of the AMQP broker row "
                                         "in the database.")

    amqp_setup_parser = subparsers.add_parser("amqp_setup",
                                              help="Set up AMQP exchanges and "
                                              "queues")
    amqp_setup_parser.set_defaults(func=setup_rabbitmq)

    amqp_watch_parser = subparsers.add_parser("amqp_watch",
                                              help="Watch AMQP queues for "
                                              "changes")
    amqp_watch_parser.set_defaults(func=watch)

    args = parser.parse_args()
    if args.debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    loghandler = logging.StreamHandler()
    if args.debug:
        formatter = logging.Formatter(fmt="%(processName)s %(asctime)s  "
                                      "%(levelname)s: %(message)s")
    else:
        formatter = logging.Formatter(fmt="%(asctime)s: %(message)s")
    loghandler.setFormatter(formatter)
    logger.addHandler(loghandler)

    mplogger = multiprocessing.get_logger()
    mplogger.setLevel(logging.ERROR)
    mplogger.addHandler(loghandler)

    if args.sqltimings:
        from sqlalchemy import event
        from sqlalchemy.engine import Engine
        import time

        sqltimelogger = logging.getLogger("sqltimer")
        sqltimelogger.setLevel(logging.DEBUG)
        sqltimelogger.addHandler(loghandler)

        @event.listens_for(Engine, "before_cursor_execute")
        def before_cursor_execute(conn, cursor, statement,
                                  parameters, context, executemany):
            conn.info.setdefault('query_start_time', []).append(time.time())
            sqltimelogger.debug("Start Query: %s", statement)

        @event.listens_for(Engine, "after_cursor_execute")
        def after_cursor_execute(conn, cursor, statement,
                                 parameters, context, executemany):
            total = time.time() - conn.info['query_start_time'].pop(-1)
            sqltimelogger.debug("Query Complete!")
            sqltimelogger.debug("Total Time: %f", total)

    config.read_config()
    try:
        init_raven_client(config.CFG.get("sentry", "dsn"))
    except ConfigParser.Error as e:
        logger.info("Skipping Raven client initialization. Configuration issue: %s", e)
    func = args.func
    args = vars(args)
    func(args)