Ejemplo n.º 1
0
    def map(self, data):
        target = base_mapping('release')
        target['version'] = 11
        release = target['release']

        try:
            release['title'] = collect_text(data['discogs']['release']['title'])
        except: pass

        try:
            release['date'] = collect_text(data['discogs']['release']['released'])
        except: pass

        try:
            release['country'] = [countries.get(country) for country in collect_text(data['discogs']['release']['country']) if countries.get(country, False)]
        except: pass

        try:
            release['artist'] = [{'name': artist['name']['text'], 'subitem': 'artist-{0}'.format(int(artist['id']['text']))} for artist in collect_obj(data['discogs']['release']['artists']['artist'])]
            release['combined_artist'] = comma_list([artist['name'] for artist in release['artist']])
        except: pass

        try:
            release['urls'] = [{'url': image['_uri'], 'type': 'cover art'} for image in collect_obj(data['discogs']['release']['images']['image'])]
        except: pass

        try:
            labels = [{'name': label['_name'], 'catalog_number': label['_catno']} for label in collect_obj(data['discogs']['release']['labels']['label'])]
            subitem_labels = self.extract_linked(data)['label']
            for label in labels:
                added = False
                for candidate in subitem_labels:
                    if candidate['name'] == label['name']:
                        label.update({'subitem': 'label-' + candidate['label_id']})
                        release['label'].append(label)
                        added = True
                        break
                if not added:
                    release['label'].append(label)
        except: pass

        try:
            release['barcode'] = [re.sub(' ', '', barcode_obj['_value']) for barcode_obj in collect_obj(data['discogs']['release']['identifiers']['identifier'], {'_type': '^Barcode$'})]
        except: pass

        try:
            tracks = collect_obj(data['discogs']['release']['tracklist']['track'])
            for track in tracks:
                obj = {'title': collect_text(track['title'])}
                if 'artists' in track:
                    obj['artist'] = [{'name': artist['name']['text'], 'subitem': 'artist-{0}'.format(int(artist['id']['text']))} for artist in collect_obj(track['artists']['artist'])]
                else:
                    obj['artist'] = release['artist']
                obj['length_formatted'] = collect_text(track['duration'])
                obj['length'] = [unformat_track_length(a) for a in collect_text(track['duration'])]
                obj['number'] = collect_text(track['position'])
                release['tracks'].append(obj)
        except: pass

        return target
Ejemplo n.º 2
0
Archivo: wcd.py Proyecto: warpr/geordi
    def _extract_track(self, track, links):
        f = base_mapping('track')
        f['subitem'] = 'file-{}'.format(track['sha1']['text'])
        try:
            f['title'] = [track['title']['text']]
        except: pass
        try:
            f['artist'] = [{'name': track['artist']['text']}]
            for artist in links['artist_id']:
                if artist['name'] == f['artist'][0]['name']:
                    f['artist'][0]['subitem'] = 'artist_id-{}'.format(artist['wcd_artist_id'])
        except: pass
        try:
            f['length'] = [int(float(track['length']['text']) * 1000)]
            f['length_formatted'] = [format_track_length(length) for length in f['length']]
        except: pass
        try:
            numbers = [re.split('/', track['track']['text'])[0]]
            for num in numbers:
                try:
                    f['number'].append(str(int(num)))
                except ValueError:
                    f['number'].append(num)

            if re.search('/', track['track']['text']):
                numbers = [re.split('/', track['track']['text'])[1]]
                for num in numbers:
                    try:
                        f['totaltracks'].append(str(int(num)))
                    except ValueError:
                        f['totaltracks'].append(num)
        except: pass


        disk_re = re.compile('(cd|dis[ck])\s*(\d+)', re.IGNORECASE)
        if disk_re.search(track['_name']):
            medium_candidates = [disk_re.search(track['_name']).group(2)]
        else:
            medium_candidates = []

        if disk_re.search(track['album']['text']):
            medium_candidates.append(disk_re.search(track['album']['text']).group(2))
        f['medium'] = uniq(medium_candidates);

        if 'external-identifier' in track:
            f[u'acoustid'] = [re.sub('^urn:acoustid:', '', acoustid) for acoustid in collect_text(track['external-identifier'], 'urn:acoustid(?!:unknown)')]
        else:
            f[u'acoustid'] = []

        return f
Ejemplo n.º 3
0
Archivo: wcd.py Proyecto: imclab/geordi
    def map(self, data):
        target = base_mapping('release')
        target['version'] = 12
        release = target['release']

        # Release Title
        try:
            title_candidates = [
                htmlunescape(data['what_cd_json']['response']['group']['name'])
            ]
        except:
            title_candidates = []
        try:
            title_candidates.extend(
                collect_text(data['meta_xml']['metadata']['album']))
        except:
            pass
        try:
            title_list = re.split(
                ' / ',
                data['meta_xml']['metadata']['title']['text'],
                maxsplit=2)
            if title_list[0] != 'Various Artists':
                title_candidates.append(title_list[0])
            else:
                title_candidates.append(title_list[1])
        except:
            pass
        release['title'] = uniq(title_candidates)

        # Release Date
        try:
            release['date'] = collect_text(
                data['meta_xml']['metadata']['year'])
        except:
            pass

        # Release Artists
        if 'what_cd_json' in data:
            try:
                release['artist'] = [{
                    'name':
                    artist['name'],
                    'subitem':
                    "artist_id-{}".format(int(artist['id']))
                } for artist in data['what_cd_json']['response']['group']
                                     ['musicInfo']['artists']]
            except (KeyError, TypeError):
                pass
            try:
                other_artists = []
                for (type, list) in data['what_cd_json']['response']['group'][
                        'musicInfo'].iteritems():
                    if type != 'artists':
                        other_artists.extend([{
                            'name':
                            artist['name'],
                            'subitem':
                            'artist_id-{0}'.format(int(artist['id']))
                        } for artist in list])
                release['other_artist'] = uniq(other_artists)
            except:
                pass
        if 'artist' not in release or len(release['artist']) < 1:
            try:
                release['artist'] = [{
                    'name': name
                } for name in collect_text(data['meta_xml']['metadata']
                                           ['artist'])]
            except KeyError:
                try:
                    release['artist'] = [{
                        'name': name
                    } for name in collect_text(data['meta_xml']['metadata']
                                               ['creator'])]
                except:
                    release['artist'] = []
        release['combined_artist'] = comma_list(
            [artist['name'] for artist in release['artist']])

        # Release Label
        label_candidates = []
        catno_candidates = []
        try:
            if data['what_cd_json']['response']['group']['recordLabel']:
                label_candidates.append(
                    data['what_cd_json']['response']['group']['recordLabel'])
        except:
            pass
        try:
            tor_id = re.split(
                '_', data['meta_xml']['metadata']['identifier']['text'])[-1]
            for torrent in data['what_cd_json']['response']['torrents']:
                if int(torrent['id']) == int(tor_id):
                    try:
                        if torrent['remasterRecordLabel']:
                            label_candidates.append(
                                torrent['remasterRecordLabel'])
                    except KeyError:
                        pass
                    try:
                        if torrent['remasterCatalogueNumber']:
                            catno_candidates.append(
                                torrent['remasterCatalogueNumber'])
                    except KeyError:
                        pass
                    break
        except KeyError:
            pass
        try:
            label_candidates.extend(
                collect_text(data['meta_xml']['metadata']['publisher']))
        except KeyError:
            pass

        release['label'] = [{'name': name} for name in uniq(label_candidates)]

        # Release Catalog Number
        try:
            if data['what_cd_json']['response']['group']['catalogueNumber']:
                catno_candidates.append(data['what_cd_json']['response']
                                        ['group']['catalogueNumber'])
        except:
            pass
        release['catalog_number'] = uniq(catno_candidates)

        # Tracks
        links = self.extract_linked(data)
        try:
            tracks = [
                self._extract_track(x, links)
                for x in data['files_xml']['files']['file']
                if (x['_source'] == 'original'
                    and x['format']['text'] in self._acceptable_formats())
            ]
            release['tracks'] = sorted(tracks, key=self._track_sorter)
        except:
            pass

        # URLs
        try:
            release['urls'].append({
                "url":
                data['what_cd_json']['response']['group']['wikiImage'],
                "type":
                "cover art"
            })
        except:
            pass

        return target
Ejemplo n.º 4
0
Archivo: wcd.py Proyecto: imclab/geordi
    def _extract_track(self, track, links):
        f = base_mapping('track')
        f['subitem'] = 'file-{}'.format(track['sha1']['text'])
        try:
            f['title'] = [track['title']['text']]
        except:
            pass
        try:
            f['artist'] = [{'name': track['artist']['text']}]
            for artist in links['artist_id']:
                if artist['name'] == f['artist'][0]['name']:
                    f['artist'][0]['subitem'] = 'artist_id-{}'.format(
                        artist['wcd_artist_id'])
        except:
            pass
        try:
            f['length'] = [int(float(track['length']['text']) * 1000)]
            f['length_formatted'] = [
                format_track_length(length) for length in f['length']
            ]
        except:
            pass
        try:
            numbers = [re.split('/', track['track']['text'])[0]]
            for num in numbers:
                try:
                    f['number'].append(str(int(num)))
                except ValueError:
                    f['number'].append(num)

            if re.search('/', track['track']['text']):
                numbers = [re.split('/', track['track']['text'])[1]]
                for num in numbers:
                    try:
                        f['totaltracks'].append(str(int(num)))
                    except ValueError:
                        f['totaltracks'].append(num)
        except:
            pass

        disk_re = re.compile('(cd|dis[ck])\s*(\d+)', re.IGNORECASE)
        if disk_re.search(track['_name']):
            medium_candidates = [disk_re.search(track['_name']).group(2)]
        else:
            medium_candidates = []

        if disk_re.search(track['album']['text']):
            medium_candidates.append(
                disk_re.search(track['album']['text']).group(2))
        f['medium'] = uniq(medium_candidates)

        if 'external-identifier' in track:
            f[u'acoustid'] = [
                re.sub('^urn:acoustid:', '', acoustid)
                for acoustid in collect_text(track['external-identifier'],
                                             'urn:acoustid(?!:unknown)')
            ]
        else:
            f[u'acoustid'] = []

        return f
Ejemplo n.º 5
0
    def map(self, data):
        target = base_mapping('release')
        target['version'] = 12
        release = target['release']

        # Release Title
        try:
            title_candidates = [htmlunescape(data['what_cd_json']['response']['group']['name'])]
        except:
            title_candidates = []
        try:
            title_candidates.extend(collect_text(data['meta_xml']['metadata']['album']))
        except: pass
        try:
            title_list = re.split(' / ', data['meta_xml']['metadata']['title']['text'], maxsplit=2)
            if title_list[0] != 'Various Artists':
                title_candidates.append(title_list[0])
            else:
                title_candidates.append(title_list[1])
        except: pass
        release['title'] = uniq(title_candidates)

        # Release Date
        try:
            release['date'] = collect_text(data['meta_xml']['metadata']['year'])
        except: pass

        # Release Artists
        if 'what_cd_json' in data:
            try:
                release['artist'] = [
                    {'name': artist['name'],
                     'subitem': "artist_id-{}".format(int(artist['id']))}
                    for artist
                    in data['what_cd_json']['response']['group']['musicInfo']['artists']
                ]
            except (KeyError, TypeError): pass
            try:
                other_artists = []
                for (type, list) in data['what_cd_json']['response']['group']['musicInfo'].iteritems():
                    if type != 'artists':
                        other_artists.extend([
                            {'name': artist['name'],
                             'subitem': 'artist_id-{0}'.format(int(artist['id']))}
                            for artist in list
                        ])
                release['other_artist'] = uniq(other_artists)
            except: pass
        if 'artist' not in release or len(release['artist']) < 1:
            try:
                release['artist'] = [{'name': name} for name in collect_text(data['meta_xml']['metadata']['artist'])]
            except KeyError:
                try:
                    release['artist'] = [{'name': name} for name in collect_text(data['meta_xml']['metadata']['creator'])]
                except: release['artist'] = []
        release['combined_artist'] = comma_list([artist['name'] for artist in release['artist']])

        # Release Label
        label_candidates = []
        catno_candidates = []
        try:
            if data['what_cd_json']['response']['group']['recordLabel']:
                label_candidates.append(data['what_cd_json']['response']['group']['recordLabel'])
        except: pass
        try:
            tor_id = re.split('_', data['meta_xml']['metadata']['identifier']['text'])[-1]
            for torrent in data['what_cd_json']['response']['torrents']:
                if int(torrent['id']) == int(tor_id):
                    try:
                        if torrent['remasterRecordLabel']:
                            label_candidates.append(torrent['remasterRecordLabel'])
                    except KeyError: pass
                    try:
                        if torrent['remasterCatalogueNumber']:
                            catno_candidates.append(torrent['remasterCatalogueNumber'])
                    except KeyError: pass
                    break
        except KeyError: pass
        try:
            label_candidates.extend(collect_text(data['meta_xml']['metadata']['publisher']))
        except KeyError: pass

        release['label'] = [{'name': name} for name in uniq(label_candidates)]

        # Release Catalog Number
        try:
            if data['what_cd_json']['response']['group']['catalogueNumber']:
                catno_candidates.append(data['what_cd_json']['response']['group']['catalogueNumber'])
        except: pass
        release['catalog_number'] = uniq(catno_candidates)

        # Tracks
        links = self.extract_linked(data)
        try:
            tracks = [self._extract_track(x, links)
                      for x
                      in data['files_xml']['files']['file']
                      if (x['_source'] == 'original' and
                          x['format']['text'] in self._acceptable_formats())]
            release['tracks'] = sorted(tracks, key=self._track_sorter)
        except: pass

        # URLs
        try:
            release['urls'].append(
                {"url": data['what_cd_json']['response']['group']['wikiImage'],
                 "type": "cover art"}
            )
        except: pass

        return target
Ejemplo n.º 6
0
    def map(self, data):
        target = base_mapping('release')
        target['version'] = 13
        release = target['release']

        try:
            release['title'] = collect_text(
                data['discogs']['release']['title'])
        except:
            pass

        try:
            release['date'] = collect_text(
                data['discogs']['release']['released'])
        except:
            pass

        try:
            release['country'] = [
                countries.get(country) for country in collect_text(
                    data['discogs']['release']['country'])
                if countries.get(country, False)
            ]
        except:
            pass

        try:
            release['artist'] = [{
                'name':
                artist['name']['text'],
                'subitem':
                'artist-{0}'.format(int(artist['id']['text']))
            } for artist in collect_obj(data['discogs']['release']['artists']
                                        ['artist'])]
            release['combined_artist'] = comma_list(
                [artist['name'] for artist in release['artist']])
        except:
            pass

        try:
            release['urls'] = [{
                'url': image['_uri'],
                'type': 'cover art'
            } for image in collect_obj(data['discogs']['release']['images']
                                       ['image'])]
        except:
            pass

        try:
            release['urls'].append({
                'url':
                'http://www.discogs.com/release/' +
                data['discogs']['release']['_id'],
                'type':
                'link type',
                'link_type':
                'discogs'
            })
        except:
            pass

        try:
            labels = [{
                'name': label['_name'],
                'catalog_number': label['_catno']
            } for label in collect_obj(data['discogs']['release']['labels']
                                       ['label'])]
            subitem_labels = self.extract_linked(data)['label']
            for label in labels:
                added = False
                for candidate in subitem_labels:
                    if candidate['name'] == label['name']:
                        label.update(
                            {'subitem': 'label-' + candidate['label_id']})
                        release['label'].append(label)
                        added = True
                        break
                if not added:
                    release['label'].append(label)
        except:
            pass

        try:
            release['barcode'] = [
                re.sub(' ', '', barcode_obj['_value'])
                for barcode_obj in collect_obj(
                    data['discogs']['release']['identifiers']['identifier'],
                    {'_type': '^Barcode$'})
            ]
        except:
            pass

        try:
            tracks = collect_obj(
                data['discogs']['release']['tracklist']['track'])
            for track in tracks:
                obj = {'title': collect_text(track['title'])}
                if 'artists' in track:
                    obj['artist'] = [{
                        'name':
                        artist['name']['text'],
                        'subitem':
                        'artist-{0}'.format(int(artist['id']['text']))
                    } for artist in collect_obj(track['artists']['artist'])]
                else:
                    obj['artist'] = release['artist']
                obj['length_formatted'] = collect_text(track['duration'])
                obj['length'] = [
                    unformat_track_length(a)
                    for a in collect_text(track['duration'])
                ]
                obj['number'] = collect_text(track['position'])
                release['tracks'].append(obj)
        except:
            pass

        return target