def test_merge_tracks_disabled_both(conn): prepare_database(conn, """ TRUNCATE track_mbid CASCADE; INSERT INTO track_mbid (track_id, mbid, submission_count, disabled) VALUES (1, '97edb73c-4dac-11e0-9096-0025225356f3', 9, true); INSERT INTO track_mbid (track_id, mbid, submission_count, disabled) VALUES (2, '97edb73c-4dac-11e0-9096-0025225356f3', 11, true); """) merge_tracks(conn, 1, [2]) rows = conn.execute("SELECT track_id, mbid, submission_count, disabled FROM track_mbid ORDER BY track_id, mbid").fetchall() expected_rows = [ (1, '97edb73c-4dac-11e0-9096-0025225356f3', 20, True), ] assert_equals(expected_rows, rows)
def test_merge_tracks_disabled_both(ctx): # type: (ScriptContext) -> None prepare_database(ctx.db.get_fingerprint_db(), """ TRUNCATE track_mbid CASCADE; INSERT INTO track_mbid (track_id, mbid, submission_count, disabled) VALUES (1, '97edb73c-4dac-11e0-9096-0025225356f3', 9, true); INSERT INTO track_mbid (track_id, mbid, submission_count, disabled) VALUES (2, '97edb73c-4dac-11e0-9096-0025225356f3', 11, true); """) merge_tracks(ctx.db.get_fingerprint_db(), ctx.db.get_ingest_db(), 1, [2]) rows = ctx.db.get_fingerprint_db().execute("SELECT track_id, mbid, submission_count, disabled FROM track_mbid ORDER BY track_id, mbid").fetchall() expected_rows = [ (1, UUID('97edb73c-4dac-11e0-9096-0025225356f3'), 20, True), ] assert_equals(expected_rows, rows)
def test_merge_tracks(conn): prepare_database( conn, """ TRUNCATE track_mbid CASCADE; INSERT INTO fingerprint (fingerprint, length, track_id, submission_count) VALUES (%(fp1)s, %(len1)s, 1, 1), (%(fp2)s, %(len2)s, 2, 1); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (1, 1, '97edb73c-4dac-11e0-9096-0025225356f3', 10); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (2, 1, 'd575d506-4da4-11e0-b951-0025225356f3', 15); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (3, 2, 'd575d506-4da4-11e0-b951-0025225356f3', 50); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (4, 3, '97edb73c-4dac-11e0-9096-0025225356f3', 25); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (5, 4, '5d0290a6-4dad-11e0-a47a-0025225356f3', 30); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (1, '97edb73c-4dac-11e0-9096-0025225356f4', 10); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (1, 'd575d506-4da4-11e0-b951-0025225356f4', 15); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (2, 'd575d506-4da4-11e0-b951-0025225356f4', 50); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (3, '97edb73c-4dac-11e0-9096-0025225356f4', 25); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (4, '5d0290a6-4dad-11e0-a47a-0025225356f4', 30); INSERT INTO track_mbid_change (track_mbid_id, account_id, disabled) VALUES (2, 1, true); INSERT INTO track_mbid_change (track_mbid_id, account_id, disabled) VALUES (3, 1, true); INSERT INTO track_mbid_change (track_mbid_id, account_id, disabled) VALUES (4, 1, true); INSERT INTO track_mbid_change (track_mbid_id, account_id, disabled) VALUES (5, 1, true); """, dict(fp1=TEST_1A_FP_RAW, len1=TEST_1A_LENGTH, fp2=TEST_1B_FP_RAW, len2=TEST_1B_LENGTH)) merge_tracks(conn, 3, [1, 2, 4]) rows = conn.execute( "SELECT id, track_id FROM fingerprint ORDER BY id").fetchall() assert_equals([(1, 3), (2, 3)], rows) rows = conn.execute( "SELECT id, track_id, mbid, submission_count FROM track_mbid ORDER BY track_id, mbid" ).fetchall() expected = [(5, 3, UUID('5d0290a6-4dad-11e0-a47a-0025225356f3'), 30), (1, 3, UUID('97edb73c-4dac-11e0-9096-0025225356f3'), 35), (2, 3, UUID('d575d506-4da4-11e0-b951-0025225356f3'), 65)] assert_equals(expected, rows) rows = conn.execute( "SELECT track_id, puid, submission_count FROM track_puid ORDER BY track_id, puid" ).fetchall() expected = [(3, UUID('5d0290a6-4dad-11e0-a47a-0025225356f4'), 30), (3, UUID('97edb73c-4dac-11e0-9096-0025225356f4'), 35), (3, UUID('d575d506-4da4-11e0-b951-0025225356f4'), 65)] assert_equals(expected, rows) rows = conn.execute( "SELECT track_mbid_id, account_id FROM track_mbid_change ORDER BY track_mbid_id, account_id" ).fetchall() expected_rows = [(1, 1), (2, 1), (2, 1), (5, 1)] assert_equals(expected_rows, rows) rows = conn.execute( "SELECT id, new_id FROM track ORDER BY id, new_id").fetchall() assert_equals([(1, 3), (2, 3), (3, None), (4, 3)], rows)
def test_merge_tracks_disabled_both(conn): prepare_database( conn, """ TRUNCATE track_mbid CASCADE; INSERT INTO track_mbid (track_id, mbid, submission_count, disabled) VALUES (1, '97edb73c-4dac-11e0-9096-0025225356f3', 9, true); INSERT INTO track_mbid (track_id, mbid, submission_count, disabled) VALUES (2, '97edb73c-4dac-11e0-9096-0025225356f3', 11, true); """) merge_tracks(conn, 1, [2]) rows = conn.execute( "SELECT track_id, mbid, submission_count, disabled FROM track_mbid ORDER BY track_id, mbid" ).fetchall() expected_rows = [ (1, UUID('97edb73c-4dac-11e0-9096-0025225356f3'), 20, True), ] assert_equals(expected_rows, rows)
def test_merge_tracks(conn): prepare_database(conn, """ TRUNCATE track_mbid CASCADE; INSERT INTO fingerprint (fingerprint, length, track_id, submission_count) VALUES (%(fp1)s, %(len1)s, 1, 1), (%(fp2)s, %(len2)s, 2, 1); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (1, 1, '97edb73c-4dac-11e0-9096-0025225356f3', 10); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (2, 1, 'd575d506-4da4-11e0-b951-0025225356f3', 15); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (3, 2, 'd575d506-4da4-11e0-b951-0025225356f3', 50); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (4, 3, '97edb73c-4dac-11e0-9096-0025225356f3', 25); INSERT INTO track_mbid (id, track_id, mbid, submission_count) VALUES (5, 4, '5d0290a6-4dad-11e0-a47a-0025225356f3', 30); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (1, '97edb73c-4dac-11e0-9096-0025225356f4', 10); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (1, 'd575d506-4da4-11e0-b951-0025225356f4', 15); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (2, 'd575d506-4da4-11e0-b951-0025225356f4', 50); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (3, '97edb73c-4dac-11e0-9096-0025225356f4', 25); INSERT INTO track_puid (track_id, puid, submission_count) VALUES (4, '5d0290a6-4dad-11e0-a47a-0025225356f4', 30); INSERT INTO track_mbid_change (track_mbid_id, account_id, disabled) VALUES (2, 1, true); INSERT INTO track_mbid_change (track_mbid_id, account_id, disabled) VALUES (3, 1, true); INSERT INTO track_mbid_change (track_mbid_id, account_id, disabled) VALUES (4, 1, true); INSERT INTO track_mbid_change (track_mbid_id, account_id, disabled) VALUES (5, 1, true); """, dict(fp1=TEST_1A_FP_RAW, len1=TEST_1A_LENGTH, fp2=TEST_1B_FP_RAW, len2=TEST_1B_LENGTH)) merge_tracks(conn, 3, [1, 2, 4]) rows = conn.execute("SELECT id, track_id FROM fingerprint ORDER BY id").fetchall() assert_equals([(1, 3), (2, 3)], rows) rows = conn.execute("SELECT id, track_id, mbid, submission_count FROM track_mbid ORDER BY track_id, mbid").fetchall() expected = [ (5, 3, '5d0290a6-4dad-11e0-a47a-0025225356f3', 30), (1, 3, '97edb73c-4dac-11e0-9096-0025225356f3', 35), (2, 3, 'd575d506-4da4-11e0-b951-0025225356f3', 65) ] assert_equals(expected, rows) rows = conn.execute("SELECT track_id, puid, submission_count FROM track_puid ORDER BY track_id, puid").fetchall() expected = [ (3, '5d0290a6-4dad-11e0-a47a-0025225356f4', 30), (3, '97edb73c-4dac-11e0-9096-0025225356f4', 35), (3, 'd575d506-4da4-11e0-b951-0025225356f4', 65) ] assert_equals(expected, rows) rows = conn.execute("SELECT track_mbid_id, account_id FROM track_mbid_change ORDER BY track_mbid_id, account_id").fetchall() expected_rows = [(1, 1), (2, 1), (2, 1), (5, 1)] assert_equals(expected_rows, rows) rows = conn.execute("SELECT id, new_id FROM track ORDER BY id, new_id").fetchall() assert_equals([(1, 3), (2, 3), (3, None), (4, 3)], rows)
def import_submission(ingest_db, app_db, fingerprint_db, index_pool, submission): # type: (IngestDB, AppDB, FingerprintDB, IndexClientPool, Dict[str, Any]) -> Optional[Dict[str, Any]] """ Import the given submission into the main fingerprint database """ handled_at = datetime.datetime.now(pytz.utc) update_stmt = schema.submission.update().where( schema.submission.c.id == submission['id']) ingest_db.execute(update_stmt.values(handled=True)) ingest_db.execute(update_stmt.values(handled=True, handled_at=handled_at)) logger.info("Importing submission %d with MBIDs %s", submission['id'], submission['mbid']) num_unique_items = len(set(submission['fingerprint'])) if num_unique_items < const.FINGERPRINT_MIN_UNIQUE_ITEMS: logger.info("Skipping, has only %d unique items", num_unique_items) return None num_query_items = fingerprint_db.execute( "SELECT icount(acoustid_extract_query(%(fp)s))", dict(fp=submission['fingerprint'])) if not num_query_items: logger.info("Skipping, no data to index") return None source_id = submission['source_id'] if source_id is not None: source = get_source(app_db, source_id) if source is None: logger.error("Source not found") return None else: source = { 'application_id': submission['application_id'], 'version': submission['application_version'], 'account_id': submission['account_id'], } source_id = find_or_insert_source(app_db, source['application_id'], source['account_id'], source['version']) submission_result = { 'submission_id': submission['id'], 'created': submission['created'], 'handled_at': handled_at, 'account_id': source['account_id'], 'application_id': source['application_id'], 'application_version': source['version'], } format_id = submission['format_id'] if format_id is None and submission['format'] is not None: format_id = find_or_insert_format(app_db, submission['format']) fingerprint = { 'id': None, 'track_id': None, 'fingerprint': submission['fingerprint'], 'length': submission['length'], 'bitrate': submission['bitrate'], 'format_id': format_id, } searcher = FingerprintSearcher(fingerprint_db, index_pool, fast=False) searcher.min_score = const.TRACK_MERGE_THRESHOLD matches = searcher.search(submission['fingerprint'], submission['length']) if matches: all_track_ids = set() # type: Set[int] possible_track_ids = set() # type: Set[int] for m in matches: if m.track_id in all_track_ids: continue all_track_ids.add(m.track_id) logger.debug("Fingerprint %d with track %d is %d%% similar", m.fingerprint_id, m.track_id, m.score * 100) if can_add_fp_to_track(fingerprint_db, m.track_id, submission['fingerprint'], submission['length']): possible_track_ids.add(m.track_id) if not fingerprint['track_id']: fingerprint['track_id'] = m.track_id if m.score > const.FINGERPRINT_MERGE_THRESHOLD: fingerprint['id'] = m.fingerprint_id if len(possible_track_ids) > 1: for group in can_merge_tracks(fingerprint_db, possible_track_ids): if fingerprint['track_id'] in group and len(group) > 1: fingerprint['track_id'] = min(group) group.remove(fingerprint['track_id']) merge_tracks(fingerprint_db, ingest_db, fingerprint['track_id'], list(group)) break if not fingerprint['track_id']: fingerprint['track_id'] = insert_track(fingerprint_db) assert isinstance(fingerprint['track_id'], int) submission_result['track_id'] = fingerprint['track_id'] if not fingerprint['id']: fingerprint['id'] = insert_fingerprint(fingerprint_db, ingest_db, fingerprint, submission['id'], source_id) else: assert isinstance(fingerprint['id'], int) inc_fingerprint_submission_count(fingerprint_db, ingest_db, fingerprint['id'], submission['id'], source_id) submission_result['fingerprint_id'] = fingerprint['id'] if submission['mbid'] and submission[ 'mbid'] != '00000000-0000-0000-0000-000000000000': insert_mbid(fingerprint_db, ingest_db, fingerprint['track_id'], submission['mbid'], submission['id'], source_id) submission_result['mbid'] = submission['mbid'] if submission['puid'] and submission[ 'puid'] != '00000000-0000-0000-0000-000000000000': insert_puid(fingerprint_db, ingest_db, fingerprint['track_id'], submission['puid'], submission['id'], source_id) submission_result['puid'] = submission['puid'] if submission['meta_id'] or submission['meta']: meta_id = submission['meta_id'] # type: Optional[int] meta_gid = None # type: Optional[uuid.UUID] if meta_id is None: meta_id, meta_gid = find_or_insert_meta(fingerprint_db, submission['meta']) else: found, meta_gid = check_meta_id(fingerprint_db, meta_id) if not found: logger.error("Meta not found") meta_id = None if meta_id is not None: insert_track_meta(fingerprint_db, ingest_db, fingerprint['track_id'], meta_id, submission['id'], source_id) submission_result['meta_id'] = meta_id submission_result['meta_gid'] = meta_gid if submission['foreignid_id'] or submission['foreignid']: foreignid_id = submission['foreignid_id'] if foreignid_id is None: foreignid = submission['foreignid'] foreignid_id = find_or_insert_foreignid(fingerprint_db, foreignid) else: foreignid = get_foreignid(fingerprint_db, foreignid_id) insert_track_foreignid(fingerprint_db, ingest_db, fingerprint['track_id'], foreignid_id, submission['id'], source_id) submission_result['foreignid'] = foreignid insert_submission_result(ingest_db, submission_result) return fingerprint
def import_submission(conn, submission, index=None): """ Import the given submission into the main fingerprint database """ with conn.begin(): update_stmt = schema.submission.update().where( schema.submission.c.id == submission['id']) conn.execute(update_stmt.values(handled=True)) mbids = [] if submission['mbid']: mbids.append(resolve_mbid_redirect(conn, submission['mbid'])) if submission['puid']: min_duration = submission['length'] - 15 max_duration = submission['length'] + 15 mbids.extend(find_puid_mbids(conn, submission['puid'], min_duration, max_duration)) logger.info("Importing submission %d with MBIDs %s", submission['id'], ', '.join(mbids)) num_unique_items = len(set(submission['fingerprint'])) if num_unique_items < const.FINGERPRINT_MIN_UNIQUE_ITEMS: logger.info("Skipping, has only %d unique items", num_unique_items) return num_query_items = conn.execute("SELECT icount(acoustid_extract_query(%(fp)s))", dict(fp=submission['fingerprint'])) if not num_query_items: logger.info("Skipping, no data to index") return searcher = FingerprintSearcher(conn, index, fast=False) searcher.min_score = const.TRACK_MERGE_THRESHOLD matches = searcher.search(submission['fingerprint'], submission['length']) fingerprint = { 'id': None, 'track_id': None, 'fingerprint': submission['fingerprint'], 'length': submission['length'], 'bitrate': submission['bitrate'], 'format_id': submission['format_id'], } if matches: match = matches[0] all_track_ids = set() possible_track_ids = set() for m in matches: if m['track_id'] in all_track_ids: continue all_track_ids.add(m['track_id']) logger.debug("Fingerprint %d with track %d is %d%% similar", m['id'], m['track_id'], m['score'] * 100) if can_add_fp_to_track(conn, m['track_id'], submission['fingerprint'], submission['length']): possible_track_ids.add(m['track_id']) if not fingerprint['track_id']: fingerprint['track_id'] = m['track_id'] if m['score'] > const.FINGERPRINT_MERGE_THRESHOLD: fingerprint['id'] = m['id'] if len(possible_track_ids) > 1: for group in can_merge_tracks(conn, possible_track_ids): if fingerprint['track_id'] in group and len(group) > 1: fingerprint['track_id'] = min(group) group.remove(fingerprint['track_id']) merge_tracks(conn, fingerprint['track_id'], list(group)) break if not fingerprint['track_id']: fingerprint['track_id'] = insert_track(conn) if not fingerprint['id']: fingerprint['id'] = insert_fingerprint(conn, fingerprint, submission['id'], submission['source_id']) else: inc_fingerprint_submission_count(conn, fingerprint['id'], submission['id'], submission['source_id']) for mbid in mbids: insert_mbid(conn, fingerprint['track_id'], mbid, submission['id'], submission['source_id']) if submission['puid'] and submission['puid'] != '00000000-0000-0000-0000-000000000000': insert_puid(conn, fingerprint['track_id'], submission['puid'], submission['id'], submission['source_id']) if submission['meta_id']: insert_track_meta(conn, fingerprint['track_id'], submission['meta_id'], submission['id'], submission['source_id']) if submission['foreignid_id']: insert_track_foreignid(conn, fingerprint['track_id'], submission['foreignid_id'], submission['id'], submission['source_id']) return fingerprint
def import_submission(conn, submission, index=None): """ Import the given submission into the main fingerprint database """ with conn.begin(): update_stmt = schema.submission.update().where( schema.submission.c.id == submission['id']) conn.execute(update_stmt.values(handled=True)) mbids = [] if submission['mbid']: mbids.append(resolve_mbid_redirect(conn, submission['mbid'])) logger.info("Importing submission %d with MBIDs %s", submission['id'], ', '.join(mbids)) num_unique_items = len(set(submission['fingerprint'])) if num_unique_items < const.FINGERPRINT_MIN_UNIQUE_ITEMS: logger.info("Skipping, has only %d unique items", num_unique_items) return num_query_items = conn.execute( "SELECT icount(acoustid_extract_query(%(fp)s))", dict(fp=submission['fingerprint'])) if not num_query_items: logger.info("Skipping, no data to index") return searcher = FingerprintSearcher(conn, index, fast=False) searcher.min_score = const.TRACK_MERGE_THRESHOLD matches = searcher.search(submission['fingerprint'], submission['length']) fingerprint = { 'id': None, 'track_id': None, 'fingerprint': submission['fingerprint'], 'length': submission['length'], 'bitrate': submission['bitrate'], 'format_id': submission['format_id'], } if matches: all_track_ids = set() possible_track_ids = set() for m in matches: if m['track_id'] in all_track_ids: continue all_track_ids.add(m['track_id']) logger.debug("Fingerprint %d with track %d is %d%% similar", m['id'], m['track_id'], m['score'] * 100) if can_add_fp_to_track(conn, m['track_id'], submission['fingerprint'], submission['length']): possible_track_ids.add(m['track_id']) if not fingerprint['track_id']: fingerprint['track_id'] = m['track_id'] if m['score'] > const.FINGERPRINT_MERGE_THRESHOLD: fingerprint['id'] = m['id'] if len(possible_track_ids) > 1: for group in can_merge_tracks(conn, possible_track_ids): if fingerprint['track_id'] in group and len(group) > 1: fingerprint['track_id'] = min(group) group.remove(fingerprint['track_id']) merge_tracks(conn, fingerprint['track_id'], list(group)) break if not fingerprint['track_id']: fingerprint['track_id'] = insert_track(conn) if not fingerprint['id']: fingerprint['id'] = insert_fingerprint(conn, fingerprint, submission['id'], submission['source_id']) else: inc_fingerprint_submission_count(conn, fingerprint['id'], submission['id'], submission['source_id']) for mbid in mbids: insert_mbid(conn, fingerprint['track_id'], mbid, submission['id'], submission['source_id']) if submission['puid'] and submission[ 'puid'] != '00000000-0000-0000-0000-000000000000': insert_puid(conn, fingerprint['track_id'], submission['puid'], submission['id'], submission['source_id']) if submission['meta_id']: insert_track_meta(conn, fingerprint['track_id'], submission['meta_id'], submission['id'], submission['source_id']) if submission['foreignid_id']: insert_track_foreignid(conn, fingerprint['track_id'], submission['foreignid_id'], submission['id'], submission['source_id']) return fingerprint