def _handle_internal(self, params): with self.conn.begin(): source_id = find_or_insert_source(self.conn, params.application_id, params.account_id, params.application_version) format_ids = {} for p in params.submissions: if p['format']: if p['format'] not in format_ids: format_ids[p['format']] = find_or_insert_format(self.conn, p['format']) p['format_id'] = format_ids[p['format']] for p in params.submissions: mbids = p['mbids'] or [None] for mbid in mbids: values = { 'mbid': mbid or None, 'puid': p['puid'] or None, 'bitrate': p['bitrate'] or None, 'fingerprint': p['fingerprint'], 'length': p['duration'], 'format_id': p.get('format_id'), 'source_id': source_id } meta_values = dict((n, p[n] or None) for n in self.meta_fields) if any(meta_values.itervalues()): values['meta_id'] = insert_meta(self.conn, meta_values) if p['foreignid']: values['foreignid_id'] = find_or_insert_foreignid(self.conn, p['foreignid']) insert_submission(self.conn, values) return {}
def _handle_internal(self, params): with self.conn.begin(): source_id = find_or_insert_source( self.conn, params.application_id, params.account_id, params.application_version ) format_ids = {} for p in params.submissions: if p["format"]: if p["format"] not in format_ids: format_ids[p["format"]] = find_or_insert_format(self.conn, p["format"]) p["format_id"] = format_ids[p["format"]] for p in params.submissions: mbids = p["mbids"] or [None] for mbid in mbids: values = { "mbid": mbid or None, "puid": p["puid"] or None, "bitrate": p["bitrate"] or None, "fingerprint": p["fingerprint"], "length": p["duration"], "format_id": p.get("format_id"), "source_id": source_id, } meta_values = dict((n, p[n] or None) for n in self.meta_fields) if any(meta_values.itervalues()): values["meta_id"] = insert_meta(self.conn, meta_values) if p["foreignid"]: values["foreignid_id"] = find_or_insert_foreignid(self.conn, p["foreignid"]) insert_submission(self.conn, values) return {}
def test_find_or_insert_source(conn): rows = conn.execute("SELECT id, account_id, application_id FROM source ORDER BY id").fetchall() expected_rows = [ (1, 1, 1), (2, 2, 2), ] assert_equals(expected_rows, rows) id = find_or_insert_source(conn, 1, 1) assert_equals(1, id) id = find_or_insert_source(conn, 2, 2) assert_equals(2, id) id = find_or_insert_source(conn, 1, 2) assert_equals(3, id) rows = conn.execute("SELECT id, account_id, application_id FROM source ORDER BY id").fetchall() expected_rows = [ (1, 1, 1), (2, 2, 2), (3, 2, 1), ] assert_equals(expected_rows, rows)
def _handle_internal(self, params): with self.conn.begin(): source_id = find_or_insert_source(self.conn, params.application_id, params.account_id) format_ids = {} for p in params.submissions: if p['format'] and p['format'] not in format_ids: format_ids[p['format']] = find_or_insert_format(self.conn, p['format']) for p in params.submissions: for mbid in p['mbids']: insert_submission(self.conn, { 'mbid': mbid or None, 'puid': p['puid'] or None, 'bitrate': p['bitrate'] or None, 'fingerprint': p['fingerprint'], 'length': p['duration'], 'format_id': format_ids[p['format']] if p['format'] else None, 'source_id': source_id }) return {}
def import_submission(ingest_db, app_db, fingerprint_db, index_pool, submission): # type: (IngestDB, AppDB, FingerprintDB, IndexClientPool, Dict[str, Any]) -> Optional[Dict[str, Any]] """ Import the given submission into the main fingerprint database """ handled_at = datetime.datetime.now(pytz.utc) update_stmt = schema.submission.update().where( schema.submission.c.id == submission['id']) ingest_db.execute(update_stmt.values(handled=True)) ingest_db.execute(update_stmt.values(handled=True, handled_at=handled_at)) logger.info("Importing submission %d with MBIDs %s", submission['id'], submission['mbid']) num_unique_items = len(set(submission['fingerprint'])) if num_unique_items < const.FINGERPRINT_MIN_UNIQUE_ITEMS: logger.info("Skipping, has only %d unique items", num_unique_items) return None num_query_items = fingerprint_db.execute( "SELECT icount(acoustid_extract_query(%(fp)s))", dict(fp=submission['fingerprint'])) if not num_query_items: logger.info("Skipping, no data to index") return None source_id = submission['source_id'] if source_id is not None: source = get_source(app_db, source_id) if source is None: logger.error("Source not found") return None else: source = { 'application_id': submission['application_id'], 'version': submission['application_version'], 'account_id': submission['account_id'], } source_id = find_or_insert_source(app_db, source['application_id'], source['account_id'], source['version']) submission_result = { 'submission_id': submission['id'], 'created': submission['created'], 'handled_at': handled_at, 'account_id': source['account_id'], 'application_id': source['application_id'], 'application_version': source['version'], } format_id = submission['format_id'] if format_id is None and submission['format'] is not None: format_id = find_or_insert_format(app_db, submission['format']) fingerprint = { 'id': None, 'track_id': None, 'fingerprint': submission['fingerprint'], 'length': submission['length'], 'bitrate': submission['bitrate'], 'format_id': format_id, } searcher = FingerprintSearcher(fingerprint_db, index_pool, fast=False) searcher.min_score = const.TRACK_MERGE_THRESHOLD matches = searcher.search(submission['fingerprint'], submission['length']) if matches: all_track_ids = set() # type: Set[int] possible_track_ids = set() # type: Set[int] for m in matches: if m.track_id in all_track_ids: continue all_track_ids.add(m.track_id) logger.debug("Fingerprint %d with track %d is %d%% similar", m.fingerprint_id, m.track_id, m.score * 100) if can_add_fp_to_track(fingerprint_db, m.track_id, submission['fingerprint'], submission['length']): possible_track_ids.add(m.track_id) if not fingerprint['track_id']: fingerprint['track_id'] = m.track_id if m.score > const.FINGERPRINT_MERGE_THRESHOLD: fingerprint['id'] = m.fingerprint_id if len(possible_track_ids) > 1: for group in can_merge_tracks(fingerprint_db, possible_track_ids): if fingerprint['track_id'] in group and len(group) > 1: fingerprint['track_id'] = min(group) group.remove(fingerprint['track_id']) merge_tracks(fingerprint_db, ingest_db, fingerprint['track_id'], list(group)) break if not fingerprint['track_id']: fingerprint['track_id'] = insert_track(fingerprint_db) assert isinstance(fingerprint['track_id'], int) submission_result['track_id'] = fingerprint['track_id'] if not fingerprint['id']: fingerprint['id'] = insert_fingerprint(fingerprint_db, ingest_db, fingerprint, submission['id'], source_id) else: assert isinstance(fingerprint['id'], int) inc_fingerprint_submission_count(fingerprint_db, ingest_db, fingerprint['id'], submission['id'], source_id) submission_result['fingerprint_id'] = fingerprint['id'] if submission['mbid'] and submission[ 'mbid'] != '00000000-0000-0000-0000-000000000000': insert_mbid(fingerprint_db, ingest_db, fingerprint['track_id'], submission['mbid'], submission['id'], source_id) submission_result['mbid'] = submission['mbid'] if submission['puid'] and submission[ 'puid'] != '00000000-0000-0000-0000-000000000000': insert_puid(fingerprint_db, ingest_db, fingerprint['track_id'], submission['puid'], submission['id'], source_id) submission_result['puid'] = submission['puid'] if submission['meta_id'] or submission['meta']: meta_id = submission['meta_id'] # type: Optional[int] meta_gid = None # type: Optional[uuid.UUID] if meta_id is None: meta_id, meta_gid = find_or_insert_meta(fingerprint_db, submission['meta']) else: found, meta_gid = check_meta_id(fingerprint_db, meta_id) if not found: logger.error("Meta not found") meta_id = None if meta_id is not None: insert_track_meta(fingerprint_db, ingest_db, fingerprint['track_id'], meta_id, submission['id'], source_id) submission_result['meta_id'] = meta_id submission_result['meta_gid'] = meta_gid if submission['foreignid_id'] or submission['foreignid']: foreignid_id = submission['foreignid_id'] if foreignid_id is None: foreignid = submission['foreignid'] foreignid_id = find_or_insert_foreignid(fingerprint_db, foreignid) else: foreignid = get_foreignid(fingerprint_db, foreignid_id) insert_track_foreignid(fingerprint_db, ingest_db, fingerprint['track_id'], foreignid_id, submission['id'], source_id) submission_result['foreignid'] = foreignid insert_submission_result(ingest_db, submission_result) return fingerprint
def _handle_internal(self, params): response = {'submissions': []} ids = set() with self.conn.begin(): source_id = find_or_insert_source(self.conn, params.application_id, params.account_id, params.application_version) format_ids = {} for p in params.submissions: if p['format']: if p['format'] not in format_ids: format_ids[p['format']] = find_or_insert_format( self.conn, p['format']) p['format_id'] = format_ids[p['format']] for p in params.submissions: mbids = p['mbids'] or [None] for mbid in mbids: values = { 'mbid': mbid or None, 'puid': p['puid'] or None, 'bitrate': p['bitrate'] or None, 'fingerprint': p['fingerprint'], 'length': p['duration'], 'format_id': p.get('format_id'), 'source_id': source_id } meta_values = dict( (n, p[n] or None) for n in self.meta_fields) if any(meta_values.itervalues()): values['meta_id'] = insert_meta(self.conn, meta_values) if p['foreignid']: values['foreignid_id'] = find_or_insert_foreignid( self.conn, p['foreignid']) id = insert_submission(self.conn, values) ids.add(id) submission = {'id': id, 'status': 'pending'} if p['index']: submission['index'] = p['index'] response['submissions'].append(submission) if self.redis is not None: self.redis.publish('channel.submissions', json.dumps(list(ids))) clients_waiting_key = 'submission.waiting' clients_waiting = self.redis.incr(clients_waiting_key) - 1 try: max_wait = 10 self.redis.expire(clients_waiting_key, max_wait) tracks = {} remaining = min(max(0, max_wait - 2**clients_waiting), params.wait) logger.debug('starting to wait at %f %d', remaining, clients_waiting) while remaining > 0 and ids: logger.debug('waiting %f seconds', remaining) time.sleep(0.5) # XXX replace with LISTEN/NOTIFY remaining -= 0.5 tracks = lookup_submission_status(self.conn, ids) if not tracks: continue for submission in response['submissions']: id = submission['id'] track_gid = tracks.get(id) if track_gid is not None: submission['status'] = 'imported' submission['result'] = {'id': track_gid} ids.remove(id) finally: self.redis.decr(clients_waiting_key) return response
def _handle_internal(self, params): response = {'submissions': []} ids = set() with self.conn.begin(): source_id = find_or_insert_source(self.conn, params.application_id, params.account_id, params.application_version) format_ids = {} for p in params.submissions: if p['format']: if p['format'] not in format_ids: format_ids[p['format']] = find_or_insert_format(self.conn, p['format']) p['format_id'] = format_ids[p['format']] for p in params.submissions: mbids = p['mbids'] or [None] for mbid in mbids: values = { 'mbid': mbid or None, 'puid': p['puid'] or None, 'bitrate': p['bitrate'] or None, 'fingerprint': p['fingerprint'], 'length': p['duration'], 'format_id': p.get('format_id'), 'source_id': source_id } meta_values = dict((n, p[n] or None) for n in self.meta_fields) if any(meta_values.itervalues()): values['meta_id'] = insert_meta(self.conn, meta_values) if p['foreignid']: values['foreignid_id'] = find_or_insert_foreignid(self.conn, p['foreignid']) id = insert_submission(self.conn, values) ids.add(id) submission = {'id': id, 'status': 'pending'} if p['index']: submission['index'] = p['index'] response['submissions'].append(submission) if self.redis is not None: self.redis.publish('channel.submissions', json.dumps(list(ids))) clients_waiting_key = 'submission.waiting' clients_waiting = self.redis.incr(clients_waiting_key) - 1 try: max_wait = 10 self.redis.expire(clients_waiting_key, max_wait) tracks = {} remaining = min(max(0, max_wait - 2 ** clients_waiting), params.wait) logger.debug('starting to wait at %f %d', remaining, clients_waiting) while remaining > 0 and ids: logger.debug('waiting %f seconds', remaining) time.sleep(0.5) # XXX replace with LISTEN/NOTIFY remaining -= 0.5 tracks = lookup_submission_status(self.conn, ids) if not tracks: continue for submission in response['submissions']: id = submission['id'] track_gid = tracks.get(id) if track_gid is not None: submission['status'] = 'imported' submission['result'] = {'id': track_gid} ids.remove(id) finally: self.redis.decr(clients_waiting_key) return response