def add_catalog(catalog: str, entity: str) -> int: """Add or update a catalog. :param catalog: ``{'discogs', 'imdb', 'musicbrainz', 'twitter'}``. A supported catalog :param entity: ``{'actor', 'band', 'director', 'musician', 'producer', 'writer', 'audiovisual_work', 'musical_work'}``. A supported entity :return: the catalog *id* field of the *catalog* table in the *s51434__mixnmatch_p* Toolforge database """ name_field = f'{catalog.title()} {entity}' session = DBManager(MNM_DB).new_session() try: existing = ( session.query(mix_n_match.MnMCatalog) .filter_by(name=name_field) .first() ) if existing is None: LOGGER.info( "Adding %s %s catalog to the mix'n'match DB ... ", catalog, entity, ) db_entity = mix_n_match.MnMCatalog() _set_catalog_fields(db_entity, name_field, catalog, entity) session.add(db_entity) session.commit() catalog_id = db_entity.id else: LOGGER.info('Updating %s %s catalog ... ', catalog, entity) catalog_id = existing.id _set_catalog_fields(existing, name_field, catalog, entity) session.add(existing) session.commit() except SQLAlchemyError as error: LOGGER.error( "Failed catalog addition/update due to %s. " "You can enable the debug log with the CLI option " "'-l soweego.ingester DEBUG' for more details", error.__class__.__name__, ) LOGGER.debug(error) session.rollback() return None finally: session.close() LOGGER.info( 'Catalog addition/update went fine. Internal ID: %d', catalog_id ) return catalog_id
def add_matches( file_path: str, catalog_id: int, catalog: str, entity: str, confidence_range: Tuple[float, float], ) -> None: """Add or update matches to an existing catalog. Curated matches found in the catalog are kept as is. :param file_path: path to a file with matches :param catalog_id: the catalog *id* field of the *catalog* table in the *s51434__mixnmatch_p* Toolforge database :param catalog: ``{'discogs', 'imdb', 'musicbrainz', 'twitter'}``. A supported catalog :param entity: ``{'actor', 'band', 'director', 'musician', 'producer', 'writer', 'audiovisual_work', 'musical_work'}``. A supported entity :param confidence_range: a pair of floats indicating the minimum and maximum confidence scores of matches that will be added/updated. """ success = True # Flag to log that everything went fine class_qid, url_prefix = _handle_metadata(catalog, entity) matches = _handle_matches(file_path, confidence_range) LOGGER.info( "Starting import of %s %s matches (catalog ID: %d) into the mix'n'match DB ...", catalog, entity, catalog_id, ) start = datetime.now() session = DBManager(MNM_DB).new_session() # Note that the session is kept open after this operation curated, success = _sync_matches(session, catalog_id, success) # Filter curated matches: # rows with tids that are NOT (~) in curated tids matches = matches[~matches[keys.TID].isin(curated)] n_matches = len(matches) matches_reader = matches.itertuples(index=False, name=None) batch = [] try: _import_matches( batch, catalog, catalog_id, class_qid, entity, matches_reader, n_matches, session, url_prefix, ) LOGGER.info( 'Adding last batch of %d %s %s matches, this may take a while ...', len(batch), catalog, entity, ) # Commit remaining entities session.bulk_save_objects(batch) session.commit() except SQLAlchemyError as error: LOGGER.error( "Failed addition/update due to %s. " "You can enable the debug log with the CLI option " "'-l soweego.ingester DEBUG' for more details", error.__class__.__name__, ) LOGGER.debug(error) session.rollback() success = False finally: session.close() if success: end = datetime.now() LOGGER.info( 'Import of %s %s matches (catalog ID: %d) completed in %s. ' 'Total matches: %d', catalog, entity, catalog_id, end - start, n_matches, )