Esempio n. 1
0
 def _handle_internal(self, params):
     with self.conn.begin():
         source_id = find_or_insert_source(self.conn, params.application_id, params.account_id, params.application_version)
         format_ids = {}
         for p in params.submissions:
             if p['format']:
                 if p['format'] not in format_ids:
                     format_ids[p['format']] = find_or_insert_format(self.conn, p['format'])
                 p['format_id'] = format_ids[p['format']]
         for p in params.submissions:
             mbids = p['mbids'] or [None]
             for mbid in mbids:
                 values = {
                     'mbid': mbid or None,
                     'puid': p['puid'] or None,
                     'bitrate': p['bitrate'] or None,
                     'fingerprint': p['fingerprint'],
                     'length': p['duration'],
                     'format_id': p.get('format_id'),
                     'source_id': source_id
                 }
                 meta_values = dict((n, p[n] or None) for n in self.meta_fields)
                 if any(meta_values.itervalues()):
                     values['meta_id'] = insert_meta(self.conn, meta_values)
                 if p['foreignid']:
                     values['foreignid_id'] = find_or_insert_foreignid(self.conn, p['foreignid'])
                 insert_submission(self.conn, values)
     return {}
Esempio n. 2
0
 def _handle_internal(self, params):
     with self.conn.begin():
         source_id = find_or_insert_source(
             self.conn, params.application_id, params.account_id, params.application_version
         )
         format_ids = {}
         for p in params.submissions:
             if p["format"]:
                 if p["format"] not in format_ids:
                     format_ids[p["format"]] = find_or_insert_format(self.conn, p["format"])
                 p["format_id"] = format_ids[p["format"]]
         for p in params.submissions:
             mbids = p["mbids"] or [None]
             for mbid in mbids:
                 values = {
                     "mbid": mbid or None,
                     "puid": p["puid"] or None,
                     "bitrate": p["bitrate"] or None,
                     "fingerprint": p["fingerprint"],
                     "length": p["duration"],
                     "format_id": p.get("format_id"),
                     "source_id": source_id,
                 }
                 meta_values = dict((n, p[n] or None) for n in self.meta_fields)
                 if any(meta_values.itervalues()):
                     values["meta_id"] = insert_meta(self.conn, meta_values)
                 if p["foreignid"]:
                     values["foreignid_id"] = find_or_insert_foreignid(self.conn, p["foreignid"])
                 insert_submission(self.conn, values)
     return {}
def test_find_or_insert_source(conn):
    rows = conn.execute("SELECT id, account_id, application_id FROM source ORDER BY id").fetchall()
    expected_rows = [
        (1, 1, 1),
        (2, 2, 2),
    ]
    assert_equals(expected_rows, rows)
    id = find_or_insert_source(conn, 1, 1)
    assert_equals(1, id)
    id = find_or_insert_source(conn, 2, 2)
    assert_equals(2, id)
    id = find_or_insert_source(conn, 1, 2)
    assert_equals(3, id)
    rows = conn.execute("SELECT id, account_id, application_id FROM source ORDER BY id").fetchall()
    expected_rows = [
        (1, 1, 1),
        (2, 2, 2),
        (3, 2, 1),
    ]
    assert_equals(expected_rows, rows)
def test_find_or_insert_source(conn):
    rows = conn.execute("SELECT id, account_id, application_id FROM source ORDER BY id").fetchall()
    expected_rows = [
        (1, 1, 1),
        (2, 2, 2),
    ]
    assert_equals(expected_rows, rows)
    id = find_or_insert_source(conn, 1, 1)
    assert_equals(1, id)
    id = find_or_insert_source(conn, 2, 2)
    assert_equals(2, id)
    id = find_or_insert_source(conn, 1, 2)
    assert_equals(3, id)
    rows = conn.execute("SELECT id, account_id, application_id FROM source ORDER BY id").fetchall()
    expected_rows = [
        (1, 1, 1),
        (2, 2, 2),
        (3, 2, 1),
    ]
    assert_equals(expected_rows, rows)
Esempio n. 5
0
 def _handle_internal(self, params):
     with self.conn.begin():
         source_id = find_or_insert_source(self.conn, params.application_id, params.account_id)
         format_ids = {}
         for p in params.submissions:
             if p['format'] and p['format'] not in format_ids:
                 format_ids[p['format']] = find_or_insert_format(self.conn, p['format'])
         for p in params.submissions:
             for mbid in p['mbids']:
                 insert_submission(self.conn, {
                     'mbid': mbid or None,
                     'puid': p['puid'] or None,
                     'bitrate': p['bitrate'] or None,
                     'fingerprint': p['fingerprint'],
                     'length': p['duration'],
                     'format_id': format_ids[p['format']] if p['format'] else None,
                     'source_id': source_id
                 })
     return {}
Esempio n. 6
0
def import_submission(ingest_db, app_db, fingerprint_db, index_pool,
                      submission):
    # type: (IngestDB, AppDB, FingerprintDB, IndexClientPool, Dict[str, Any]) -> Optional[Dict[str, Any]]
    """
    Import the given submission into the main fingerprint database
    """

    handled_at = datetime.datetime.now(pytz.utc)

    update_stmt = schema.submission.update().where(
        schema.submission.c.id == submission['id'])
    ingest_db.execute(update_stmt.values(handled=True))
    ingest_db.execute(update_stmt.values(handled=True, handled_at=handled_at))
    logger.info("Importing submission %d with MBIDs %s", submission['id'],
                submission['mbid'])

    num_unique_items = len(set(submission['fingerprint']))
    if num_unique_items < const.FINGERPRINT_MIN_UNIQUE_ITEMS:
        logger.info("Skipping, has only %d unique items", num_unique_items)
        return None

    num_query_items = fingerprint_db.execute(
        "SELECT icount(acoustid_extract_query(%(fp)s))",
        dict(fp=submission['fingerprint']))
    if not num_query_items:
        logger.info("Skipping, no data to index")
        return None

    source_id = submission['source_id']
    if source_id is not None:
        source = get_source(app_db, source_id)
        if source is None:
            logger.error("Source not found")
            return None
    else:
        source = {
            'application_id': submission['application_id'],
            'version': submission['application_version'],
            'account_id': submission['account_id'],
        }
        source_id = find_or_insert_source(app_db, source['application_id'],
                                          source['account_id'],
                                          source['version'])

    submission_result = {
        'submission_id': submission['id'],
        'created': submission['created'],
        'handled_at': handled_at,
        'account_id': source['account_id'],
        'application_id': source['application_id'],
        'application_version': source['version'],
    }

    format_id = submission['format_id']
    if format_id is None and submission['format'] is not None:
        format_id = find_or_insert_format(app_db, submission['format'])

    fingerprint = {
        'id': None,
        'track_id': None,
        'fingerprint': submission['fingerprint'],
        'length': submission['length'],
        'bitrate': submission['bitrate'],
        'format_id': format_id,
    }

    searcher = FingerprintSearcher(fingerprint_db, index_pool, fast=False)
    searcher.min_score = const.TRACK_MERGE_THRESHOLD
    matches = searcher.search(submission['fingerprint'], submission['length'])
    if matches:
        all_track_ids = set()  # type: Set[int]
        possible_track_ids = set()  # type: Set[int]
        for m in matches:
            if m.track_id in all_track_ids:
                continue
            all_track_ids.add(m.track_id)
            logger.debug("Fingerprint %d with track %d is %d%% similar",
                         m.fingerprint_id, m.track_id, m.score * 100)
            if can_add_fp_to_track(fingerprint_db, m.track_id,
                                   submission['fingerprint'],
                                   submission['length']):
                possible_track_ids.add(m.track_id)
                if not fingerprint['track_id']:
                    fingerprint['track_id'] = m.track_id
                    if m.score > const.FINGERPRINT_MERGE_THRESHOLD:
                        fingerprint['id'] = m.fingerprint_id
        if len(possible_track_ids) > 1:
            for group in can_merge_tracks(fingerprint_db, possible_track_ids):
                if fingerprint['track_id'] in group and len(group) > 1:
                    fingerprint['track_id'] = min(group)
                    group.remove(fingerprint['track_id'])
                    merge_tracks(fingerprint_db, ingest_db,
                                 fingerprint['track_id'], list(group))
                    break

    if not fingerprint['track_id']:
        fingerprint['track_id'] = insert_track(fingerprint_db)

    assert isinstance(fingerprint['track_id'], int)
    submission_result['track_id'] = fingerprint['track_id']

    if not fingerprint['id']:
        fingerprint['id'] = insert_fingerprint(fingerprint_db, ingest_db,
                                               fingerprint, submission['id'],
                                               source_id)
    else:
        assert isinstance(fingerprint['id'], int)
        inc_fingerprint_submission_count(fingerprint_db, ingest_db,
                                         fingerprint['id'], submission['id'],
                                         source_id)

    submission_result['fingerprint_id'] = fingerprint['id']

    if submission['mbid'] and submission[
            'mbid'] != '00000000-0000-0000-0000-000000000000':
        insert_mbid(fingerprint_db, ingest_db, fingerprint['track_id'],
                    submission['mbid'], submission['id'], source_id)
        submission_result['mbid'] = submission['mbid']

    if submission['puid'] and submission[
            'puid'] != '00000000-0000-0000-0000-000000000000':
        insert_puid(fingerprint_db, ingest_db, fingerprint['track_id'],
                    submission['puid'], submission['id'], source_id)
        submission_result['puid'] = submission['puid']

    if submission['meta_id'] or submission['meta']:
        meta_id = submission['meta_id']  # type: Optional[int]
        meta_gid = None  # type: Optional[uuid.UUID]
        if meta_id is None:
            meta_id, meta_gid = find_or_insert_meta(fingerprint_db,
                                                    submission['meta'])
        else:
            found, meta_gid = check_meta_id(fingerprint_db, meta_id)
            if not found:
                logger.error("Meta not found")
                meta_id = None
        if meta_id is not None:
            insert_track_meta(fingerprint_db, ingest_db,
                              fingerprint['track_id'], meta_id,
                              submission['id'], source_id)
            submission_result['meta_id'] = meta_id
            submission_result['meta_gid'] = meta_gid

    if submission['foreignid_id'] or submission['foreignid']:
        foreignid_id = submission['foreignid_id']
        if foreignid_id is None:
            foreignid = submission['foreignid']
            foreignid_id = find_or_insert_foreignid(fingerprint_db, foreignid)
        else:
            foreignid = get_foreignid(fingerprint_db, foreignid_id)
        insert_track_foreignid(fingerprint_db, ingest_db,
                               fingerprint['track_id'], foreignid_id,
                               submission['id'], source_id)
        submission_result['foreignid'] = foreignid

    insert_submission_result(ingest_db, submission_result)

    return fingerprint
Esempio n. 7
0
    def _handle_internal(self, params):
        response = {'submissions': []}
        ids = set()
        with self.conn.begin():
            source_id = find_or_insert_source(self.conn, params.application_id,
                                              params.account_id,
                                              params.application_version)
            format_ids = {}
            for p in params.submissions:
                if p['format']:
                    if p['format'] not in format_ids:
                        format_ids[p['format']] = find_or_insert_format(
                            self.conn, p['format'])
                    p['format_id'] = format_ids[p['format']]
            for p in params.submissions:
                mbids = p['mbids'] or [None]
                for mbid in mbids:
                    values = {
                        'mbid': mbid or None,
                        'puid': p['puid'] or None,
                        'bitrate': p['bitrate'] or None,
                        'fingerprint': p['fingerprint'],
                        'length': p['duration'],
                        'format_id': p.get('format_id'),
                        'source_id': source_id
                    }
                    meta_values = dict(
                        (n, p[n] or None) for n in self.meta_fields)
                    if any(meta_values.itervalues()):
                        values['meta_id'] = insert_meta(self.conn, meta_values)
                    if p['foreignid']:
                        values['foreignid_id'] = find_or_insert_foreignid(
                            self.conn, p['foreignid'])
                    id = insert_submission(self.conn, values)
                    ids.add(id)
                    submission = {'id': id, 'status': 'pending'}
                    if p['index']:
                        submission['index'] = p['index']
                    response['submissions'].append(submission)

        if self.redis is not None:
            self.redis.publish('channel.submissions', json.dumps(list(ids)))

        clients_waiting_key = 'submission.waiting'
        clients_waiting = self.redis.incr(clients_waiting_key) - 1
        try:
            max_wait = 10
            self.redis.expire(clients_waiting_key, max_wait)
            tracks = {}
            remaining = min(max(0, max_wait - 2**clients_waiting), params.wait)
            logger.debug('starting to wait at %f %d', remaining,
                         clients_waiting)
            while remaining > 0 and ids:
                logger.debug('waiting %f seconds', remaining)
                time.sleep(0.5)  # XXX replace with LISTEN/NOTIFY
                remaining -= 0.5
                tracks = lookup_submission_status(self.conn, ids)
                if not tracks:
                    continue
                for submission in response['submissions']:
                    id = submission['id']
                    track_gid = tracks.get(id)
                    if track_gid is not None:
                        submission['status'] = 'imported'
                        submission['result'] = {'id': track_gid}
                        ids.remove(id)
        finally:
            self.redis.decr(clients_waiting_key)

        return response
Esempio n. 8
0
    def _handle_internal(self, params):
        response = {'submissions': []}
        ids = set()
        with self.conn.begin():
            source_id = find_or_insert_source(self.conn, params.application_id, params.account_id, params.application_version)
            format_ids = {}
            for p in params.submissions:
                if p['format']:
                    if p['format'] not in format_ids:
                        format_ids[p['format']] = find_or_insert_format(self.conn, p['format'])
                    p['format_id'] = format_ids[p['format']]
            for p in params.submissions:
                mbids = p['mbids'] or [None]
                for mbid in mbids:
                    values = {
                        'mbid': mbid or None,
                        'puid': p['puid'] or None,
                        'bitrate': p['bitrate'] or None,
                        'fingerprint': p['fingerprint'],
                        'length': p['duration'],
                        'format_id': p.get('format_id'),
                        'source_id': source_id
                    }
                    meta_values = dict((n, p[n] or None) for n in self.meta_fields)
                    if any(meta_values.itervalues()):
                        values['meta_id'] = insert_meta(self.conn, meta_values)
                    if p['foreignid']:
                        values['foreignid_id'] = find_or_insert_foreignid(self.conn, p['foreignid'])
                    id = insert_submission(self.conn, values)
                    ids.add(id)
                    submission = {'id': id, 'status': 'pending'}
                    if p['index']:
                        submission['index'] = p['index']
                    response['submissions'].append(submission)

        if self.redis is not None:
            self.redis.publish('channel.submissions', json.dumps(list(ids)))

        clients_waiting_key = 'submission.waiting'
        clients_waiting = self.redis.incr(clients_waiting_key) - 1
        try:
            max_wait = 10
            self.redis.expire(clients_waiting_key, max_wait)
            tracks = {}
            remaining = min(max(0, max_wait - 2 ** clients_waiting), params.wait)
            logger.debug('starting to wait at %f %d', remaining, clients_waiting)
            while remaining > 0 and ids:
                logger.debug('waiting %f seconds', remaining)
                time.sleep(0.5) # XXX replace with LISTEN/NOTIFY
                remaining -= 0.5
                tracks = lookup_submission_status(self.conn, ids)
                if not tracks:
                    continue
                for submission in response['submissions']:
                    id = submission['id']
                    track_gid = tracks.get(id)
                    if track_gid is not None:
                        submission['status'] = 'imported'
                        submission['result'] = {'id': track_gid}
                        ids.remove(id)
        finally:
            self.redis.decr(clients_waiting_key)

        return response