Beispiel #1
0
def r_by_album(cat_attrs, res_tresh, skip=False):
    log.info('Release: Album\n')
    log.info('Connecting Discogs\tQuery: {0}'.format(cat_attrs['album']))
    cur_method = 'Album'
    if skip:
        cat_attrs['comment'] += 'skipped {}, '.format(cur_method)
        return cat_attrs
    outcome = d.search(cat_attrs['album'], type='release')
    log.info('FOUND {0} RESULTS'.format(len(outcome)))
    counter = 0
    for i in itertools.islice(outcome, 0, res_tresh):
        if config.is_interrupted():
            return cat_attrs
        if len(i.artists) > 1:
            log.debug('there is more than one artist in current release')
        counter += 1
        result = text_tools.rm_artist_num(i.artists[0].name), i.title
        cat_attrs, flag = discogs_tools.final_comparison(
            cat_attrs, result, i.id, cur_method, counter)
        if flag:
            break
        time.sleep(0.5 * ratelimit)
    discogs_tools.log_summary(cat_attrs, cur_method)
    time.sleep(ratelimit)
    return cat_attrs
Beispiel #2
0
def r_by_token(cat_attrs, res_tresh, skip=False):
    log.info('Release: Token\n')
    token = cat_attrs['artist'] + ' - ' + cat_attrs['album']
    log.info('Connecting Discogs\tQuery: {0}'.format(token))
    cur_method = 'Token'
    if skip:
        cat_attrs['comment'] += 'skipped {}, '.format(cur_method)
        return cat_attrs
    outcome = d.search(token, type='release')
    log.info('FOUND {0} RESULTS'.format(len(outcome)))
    counter = 0
    for i in itertools.islice(outcome, 0, res_tresh):
        if config.is_interrupted():
            return cat_attrs
        counter += 1
        # tran_artist = cat_attrs['artist'].translate(punctuationremover)
        # tran_album = cat_attrs['album'].translate(punctuationremover)
        result = text_tools.rm_artist_num(i.artists[0].name), str(
            i.title).translate(punctuationremover)
        cat_attrs, flag = discogs_tools.final_comparison(
            cat_attrs, result, i.id, cur_method, counter)
        if flag:
            break
        time.sleep(ratelimit / 2)
    discogs_tools.log_summary(cat_attrs, cur_method)
    time.sleep(ratelimit)
    return cat_attrs
Beispiel #3
0
def m_by_album_fuzz(cat_attrs, skip=False):
    log.info('Master: Fuzz Album\n')
    log.info('***Connecting Discogs\tQuery: {0}'.format(cat_attrs['album']))
    cur_method = 'm_by_album_fuzz'
    if cat_attrs['album'] == 'Unknown Album':
        log.info('Method inappropriate for this album\n')
        return cat_attrs
    if skip:
        cat_attrs['comment'] += 'skipped {}, '.format(cur_method)
        return cat_attrs
    releases_unf = d.search(cat_attrs['album'], type='release')
    log.info('FOUND {0} RESULTS'.format(len(releases_unf)))
    if len(releases_unf) > 2000:
        log.info('Too many releases, method skipped\n')
        return cat_attrs
    filtered_releases = discogs_tools.filter_releases(cat_attrs, releases_unf)
    if config.is_interrupted():
        return cat_attrs
    final_list = discogs_tools.prefer_masters(filtered_releases)
    if len(filtered_releases) == 1:
        cat_attrs = discogs_tools.save_m_or_r(
            cat_attrs, final_list[0], cur_method)
    else:
        log.info('Found {} matches - need develop this method\n'.format(
            len(final_list)))
        cat_attrs['comment'] += 'AlbumFuzz-Too many matches'
    return cat_attrs
Beispiel #4
0
def m_by_token_cut(cat_attrs, res_tresh, skip=False):
    log.info('Master: Token Cut\n')
    log.info('***Connecting Discogs\tQuery: {0}'.format(
        cat_attrs['album']))
    cur_method = 'Token Cuted'
    if skip:
        cat_attrs['comment'] += 'skipped {}, '.format(cur_method)
        return cat_attrs
    album = text_tools.fm_album_suffixes(cat_attrs['album'])
    token = cat_attrs['artist'] + ' - ' + album
    outcome = d.search(token, type='master')
    log.info('FOUND {0} RESULTS'.format(len(outcome)))
    counter = 0
    for i in itertools.islice(outcome, 0, res_tresh):
        if config.is_interrupted():
            return cat_attrs
        counter += 1
        m_name = i.title.split(' - ')
        result = text_tools.rm_artist_num(m_name[0]), m_name[1]
        cat_attrs, flag = discogs_tools.final_comparison(
            cat_attrs, result, i.id, cur_method, counter)
        if flag:
            break
    discogs_tools.log_summary(cat_attrs, cur_method)
    time.sleep(ratelimit)
    return cat_attrs
Beispiel #5
0
def insert_ids(cat_attrs, f_attrs_list):
    log.info('Query Discogs Func started')
    log.info('Working on {}\n'.format(cat_attrs['path']))
    cat_attrs['comment'] += ''
    if not discogs_tools.is_found(cat_attrs, 'd_master'):
        cat_attrs = find_a_master(cat_attrs, f_attrs_list)
    if config.is_interrupted():
        return cat_attrs
    if not discogs_tools.is_found(cat_attrs, 'd_release', 'd_master'):
        cat_attrs = find_a_release(cat_attrs, f_attrs_list)
    if config.is_interrupted():
        return cat_attrs
    for i in checklist:
        cat_attrs[i] = cat_attrs.get(i, '')
    log.info(
        'Finshed querying Discogs, results are: {}, {}, {}, {}\n\n'.format(
            cat_attrs['d_master'], cat_attrs['metoda_master'],
            cat_attrs['d_release'], cat_attrs['metoda_release']))
    return cat_attrs
def prefer_masters(matches_list):
    final_list = []
    for release in matches_list:
        if config.is_interrupted():
            return []
        if release.master is not None:
            if release.master not in final_list:
                log.info('for {0} id - {1} found master: {2}'.format(
                    release.id, release.title, release.master.id))
                final_list.append(release.master)
        elif release not in final_list:
            log.info('appended release: {0} - {1}'.format(
                release.artists, release.title))
            final_list.append(release)
    return final_list
Beispiel #7
0
def m_by_variations(cat_attrs, res_tresh, skip=False):
    log.info('Master: Variations\n')
    log.info('***Connecting Discogs\tQuery: {0}'.format(
        cat_attrs['artist']))
    cur_method = 'Variations'
    if skip:
        cat_attrs['comment'] += 'skipped {}, '.format(cur_method)
        return cat_attrs
    outcome = d.search(cat_attrs['artist'], type='artist')
    log.info('FOUND {0} RESULTS'.format(len(outcome)))
    try:
        variations = outcome[0].name_variations
        log.info('Found {0} aliases : {1}'.format(outcome[0], variations))
        for k in variations:
            if config.is_interrupted():
                return cat_attrs
            counter = 0
            variations_query = k + ' - ' + cat_attrs['album']
            log.info('***Connecting Discogs\tQuery: {0}'.format(
                variations_query))
            outcome = d.search(variations_query, type='master')
            log.info('FOUND {0} RESULTS'.format(len(outcome)))
            flag = False
            for i in itertools.islice(outcome, 0, res_tresh):
                counter += 0
                m_name = i.title.split(' - ')
                # m_artist = masterlist[0].split('*')[0]
                # m_artist = martist.split(' (')[0]
                result = m_name[0], m_name[1]
                cat_attrs, flag = discogs_tools.final_comparison(
                    cat_attrs, result, i.id, cur_method, counter)
                if flag:
                    break
            if flag:
                break
            time.sleep(ratelimit)
    except TypeError as e:
        log.warning('{} No variations of {} found'.format(
            e, cat_attrs['artist']))
    discogs_tools.log_summary(cat_attrs, cur_method)
    time.sleep(ratelimit)
    return cat_attrs
def filter_releases(cat_attrs, releases_unf):
    filtered_list = []
    log.info('Starting filtering founded {0} releases'.format(
        len(releases_unf)))
    release_count = 0
    for release in releases_unf:
        release_count += 1
        if config.is_interrupted():
            return filtered_list
        release_title = get_title_m_or_r(release)
        if not text_tools.check_album_similarity(
                cat_attrs['album'], release_title):
            log.info('{} Release <{}> isn\'t matching to <{}>'.format(
                release_count, release_title, cat_attrs['album']))
            continue
        if len(release.artists) > 1:
            log.warning('{} Too many artist in {}'.format(
                release_count, release.title))
            for counter in range(len(release.artists)):
                log.info('{} Artist: {}'.format(
                    counter + 1, release.artists[counter].name))
                time.sleep(0.5 * ratelimit)
            log.info('Checking skipped, need rework\n')
            continue
        if release.artists[0].name == "Unknown Artist":
            log.warning('{} Artist Unknown'.format(release_count))
            log.warning('Artist: <{}>, Release: <{}>\n'.format(
                release.artists[0].name, release.title))
            continue
        log.info('{} Checking similarity for {} and (from tags): {}'.format(
            release_count, release.artists[0].name, cat_attrs['artist']))
        if text_tools.check_artist_similarity(
                cat_attrs['artist'], release.artists[0].name):
            filtered_list.append(release)
            log.info('')
            log.info('Release {0} appended, that was {1} result\n'.format(
                release, release_count))
        time.sleep(0.7 * ratelimit)
    log.info('filtered releases: {1} - {0}\n'.format(
        filtered_list, len(filtered_list)))
    return filtered_list
Beispiel #9
0
def find_a_master(cat_attrs, f_attrs_list, skip=False):
    log.info('Find a master Func started\n')
    discogs_meths.m_by_token(cat_attrs, 10)
    if discogs_tools.is_found(cat_attrs, 'd_master'):
        return cat_attrs
    if config.is_interrupted():
        return cat_attrs
    discogs_meths.m_by_album(cat_attrs, 50)
    if discogs_tools.is_found(cat_attrs, 'd_master'):
        return cat_attrs
    if config.is_interrupted():
        return cat_attrs
    discogs_meths.m_by_variations(cat_attrs, 100)
    if discogs_tools.is_found(cat_attrs, 'd_master'):
        return cat_attrs
    if config.is_interrupted():
        return cat_attrs
    discogs_meths.m_by_token_cut(cat_attrs, 10)
    if discogs_tools.is_found(cat_attrs, 'd_master'):
        return cat_attrs
    if config.is_interrupted():
        return cat_attrs
    discogs_meths.m_by_album_fuzz(cat_attrs)
    if discogs_tools.is_found(cat_attrs, 'd_master', 'd_release'):
        return cat_attrs
    if config.is_interrupted():
        return cat_attrs
    discogs_meths.m_by_album_fuzz_excl(cat_attrs)
    if discogs_tools.is_found(cat_attrs, 'd_master', 'd_release'):
        return cat_attrs
    if config.is_interrupted():
        return cat_attrs
    if config.manual_mode:
        discogs_meths.m_by_manual(cat_attrs)
    if discogs_tools.is_found(cat_attrs, 'd_master'):
        return cat_attrs
    discogs_meths.m_by_artist(cat_attrs)
    if config.is_interrupted():
        return cat_attrs
    log.info('Find a master Func ended\n\n')
    return cat_attrs
Beispiel #10
0
def m_by_album_fuzz_excl(cat_attrs, skip=False):
    log.info('Master: Fuzz Album with Replace\n')
    log.info('***Connecting Discogs\tQuery: {0}'.format(cat_attrs['album']))
    log.info('Looking for substring to exlude in : {0}\n'.format(
        cat_attrs['album']))
    cur_method = 'm_by_album_fuzz_excl'
    if skip:
        cat_attrs['comment'] += 'skipped {}, '.format(cur_method)
        return cat_attrs
    exclusion_flag = False
    for substr in text_tools.substr_to_exclude:
        if substr in cat_attrs['album']:
            log.info('Found substring to exclude: {0}\n'.format(substr))
            album_token = cat_attrs['album'].replace(substr, '')
            exclusion_flag = True
        else:
            continue
    if not exclusion_flag:
        log.info('Found nothing to exclude\n')
        return cat_attrs
    releases_unf = d.search(album_token, type='release')
    log.info('FOUND {0} RESULTS'.format(len(releases_unf)))
    if len(releases_unf) > 5000:
        log.info('Too many releases, method skipped\n')
        return cat_attrs
    filtered_releases = discogs_tools.filter_releases(cat_attrs, releases_unf)
    if config.is_interrupted():
        return cat_attrs
    final_list = discogs_tools.prefer_masters(filtered_releases)
    if len(filtered_releases) == 1:
        cat_attrs = discogs_tools.save_m_or_r(
            cat_attrs, final_list[0], cur_method)
    else:
        log.info('Found {} matches - need develop this method\n'.format(
            len(final_list)))
        cat_attrs['comment'] += 'AlbumFuzz-Too many matches'
    return cat_attrs
Beispiel #11
0
def m_by_artist(cat_attrs, skip=False):
    log.info('Master: Artist\n')
    log.info('***Connecting Discogs\tQuery: {0}'.format(cat_attrs['artist']))
    if cat_attrs['artist'] == 'Various Artist':
        log.info('Method inappropriate for this album\n')
        return cat_attrs
    cur_method = 'by artist releases'
    if skip:
        cat_attrs['comment'] += 'skipped {}, '.format(cur_method)
        return cat_attrs
    artists_unf = d.search(cat_attrs['artist'], type='artist')
    log.info('FOUND {0} RESULTS'.format(len(artists_unf)))
    if len(artists_unf) > 2000:
        log.info('Too many artist, method skipped\n')
        return cat_attrs
    artists_ids = text_tools.find_match_artists(
        artists_unf, cat_attrs['artist'])
    if len(artists_ids) > 20:
        log.info('Too many artist, skipped method\n')
        return cat_attrs
    log.info('Getting artist ids list')
    releases_list = []
    for ids in artists_ids:
        log.info('***Connecting Discogs\tQuery: {0} as an artist id'.format(
            ids))
        artist_ = d.artist(ids)
        try:
            artist_releases = artist_.releases
        except discogs_client.exceptions.HTTPError as e:
            if e.status_code == 404:
                log.warning('\n\n\n\n\n\n\n\n\n')
                log.warning('{} - skipped artist'.format(e))
                log.warning('\n\n\n\n\n\n\n\n\n')
                continue
        log.info('FOUND {0} RESULTS'.format(len(artist_releases)))
        if len(artist_releases) > 8000:
            log.info('Too many releases, method skipped\n')
            cat_attrs['comment'] += 'too many releases of artist founded'
            return cat_attrs
        time.sleep(ratelimit)
        simple_releases = {}
        counter = 0
        for counter in range(len(artist_releases)):
            skip_flag = False
            if config.is_interrupted():
                return cat_attrs
            break_counter = 0
            log.debug('Starting step')
            while break_counter < 10:
                try:
                    release = artist_releases[counter]
                    log.info('{} Binded {} successfully'.format(
                        counter, release))
                except decoder.JSONDecodeError as e:
                    break_counter += 1
                    log.warning('{} nr {}\n'.format(e, break_counter))
                    time.sleep(ratelimit)
                    continue
                except IndexError as e:
                    skip_flag = True
                    log.warning('\n\n\n')
                    log.warning('Index Error forced skip')
                    log.warning('On {} item'.format(counter))
                    log.warning('\n\n\n')
                    break
                else:
                    break
            if break_counter == 10:
                log.warning('\n\n\n\n\n\n\n\n\n')
                log.warning('JSON Error forced skip')
                log.warning('On {} item'.format(counter))
                log.warning('\n\n\n\n\n\n\n\n\n')
                cat_attrs['comment'] += ' JSON Error'
                continue
            if skip_flag:
                log.warning('skipping step')
                continue
            if config.is_interrupted():
                return cat_attrs
            counter += 1
            log.debug('{} Id: {}, Title: {}'.format(
                counter, release.id, release.title))
            simple_releases[str(release.id)] = [release.title, type(release)]
            log.debug('Added {}'.format(simple_releases[str(release.id)][0]))
            if counter % 20 == 0:
                time.sleep(ratelimit)
            log.debug('Finishing step\n')
        log.info('releases list : {0}'.format(artist_releases))
        log.info('releases list length : {0}'.format(len(artist_releases)))
        log.info('releases list : {0}'.format(simple_releases))
        log.info('releases list length : {0}'.format(len(simple_releases)))
        counter = 0
        for rel_id, rel_data in simple_releases.items():
            counter += 1
            if config.is_interrupted():
                return cat_attrs
            log.info('{} checking {}'.format(counter, rel_data[0]))
            ratio = fuzz.ratio(rel_data[0], cat_attrs['album'])
            ratio_partial = fuzz.partial_ratio(rel_data[0], cat_attrs['album'])
            log.debug('Ratio : {0}'.format(ratio))
            log.debug('Ratio Partial : {0}'.format(ratio_partial))
            if ratio > 85 or ratio_partial > 98:
                if rel_data[1] == discogs_client.models.Master:
                    releases_list.append(d.master(rel_id))
                elif rel_data[1] == discogs_client.models.Release:
                    releases_list.append(d.release(rel_id))
                log.info('release {0} appended'.format(rel_data[0]))
            time.sleep(ratelimit)
    if config.is_interrupted():
        return cat_attrs
    counter = 0
    for release in releases_list:
        log.info('type of {0}, {1} release is {2}'.format(
            counter, release.title, type(release)))
        counter += 1
    if len(releases_list) == 1:
        cat_attrs = discogs_tools.save_m_or_r(
            cat_attrs, releases_list[0], cur_method)
    else:
        log.info('Too many matches, This case should be improved')
    return cat_attrs