def add_catalog(catalog: str, entity: str) -> int:
    """Add or update a catalog.

    :param catalog: ``{'discogs', 'imdb', 'musicbrainz', 'twitter'}``.
      A supported catalog
    :param entity: ``{'actor', 'band', 'director', 'musician', 'producer',
      'writer', 'audiovisual_work', 'musical_work'}``.
      A supported entity
    :return: the catalog *id* field of the *catalog* table
      in the *s51434__mixnmatch_p* Toolforge database
    """
    name_field = f'{catalog.title()} {entity}'

    session = DBManager(MNM_DB).new_session()
    try:
        existing = (
            session.query(mix_n_match.MnMCatalog)
            .filter_by(name=name_field)
            .first()
        )
        if existing is None:
            LOGGER.info(
                "Adding %s %s catalog to the mix'n'match DB ... ",
                catalog,
                entity,
            )
            db_entity = mix_n_match.MnMCatalog()
            _set_catalog_fields(db_entity, name_field, catalog, entity)
            session.add(db_entity)
            session.commit()
            catalog_id = db_entity.id
        else:
            LOGGER.info('Updating %s %s catalog ... ', catalog, entity)
            catalog_id = existing.id
            _set_catalog_fields(existing, name_field, catalog, entity)
            session.add(existing)
            session.commit()
    except SQLAlchemyError as error:
        LOGGER.error(
            "Failed catalog addition/update due to %s. "
            "You can enable the debug log with the CLI option "
            "'-l soweego.ingester DEBUG' for more details",
            error.__class__.__name__,
        )
        LOGGER.debug(error)
        session.rollback()
        return None
    finally:
        session.close()

    LOGGER.info(
        'Catalog addition/update went fine. Internal ID: %d', catalog_id
    )
    return catalog_id
Example #2
0
def _gather_target_data(
    catalog,
    entity,
    total_queries,
    works_buckets,
    works_inverted,
    people_buckets,
    people_inverted,
):
    claim_pid = vocabulary.WORKS_BY_PEOPLE_MAPPING[catalog][entity]
    db_entity = target_database.get_relationship_entity(catalog, entity)
    session = DBManager().connect_to_db()

    # Leverage works-people relationships
    try:
        for works, people in tqdm(product(works_buckets, people_buckets),
                                  total=total_queries):
            works_to_people = session.query(db_entity).filter(
                and_(
                    db_entity.from_catalog_id.in_(works),
                    db_entity.to_catalog_id.in_(people),
                ))

            for result in works_to_people:
                yield works_inverted[
                    result.from_catalog_id], claim_pid, people_inverted[
                        result.to_catalog_id], result.to_catalog_id
    except SQLAlchemyError as error:
        LOGGER.error(
            "Failed query of works-people relationships due to %s. "
            "You can enable the debug log with the CLI option "
            "'-l soweego.validator DEBUG' for more details",
            error.__class__.__name__,
        )
        LOGGER.debug(error)

        session.rollback()
        return None
    finally:
        session.close()