Ejemplo n.º 1
0
def scrape_track(name, folder):
    # TODO DETAIL in logger
    name_parts = name.split('-')
    if len(name_parts) == 4:
        year = name_parts[0]
        title = name_parts[1]
        artist = name_parts[2]
        track = name_parts[3]
    else:
        print "Invalid name: %s" % (name,)
        return False, False

    track_name = '%s - %s' % (artist, track)
    try:
        link = YouTubeExtractor.search_youtube_links(track_name)
    except Exception:
        return False, False
    try:
        # search in youtube based on en artist - track
        ydl = youtube_dl.YoutubeDL({'outtmpl': '%(id)s%(ext)s', 'noplaylist': '--no-playlist'})
        # Add all the available extractors
        ydl.add_default_info_extractors()
        result = ydl.extract_info(link, download=False)
        found = False
        Track.sync()
        display_id = result['display_id']
        exists = Track.objects.filter(youtube_code=display_id).count() > 0
        # Check Tracker.youtube_code doesn't exist
        if not exists:
            for format in result['formats']:
                    if format['ext'] == 'm4a':
                        url = format['url']
                        try:
                            r = requests.get(url, stream=True)
                            chunk_size = 1000
                            filename = result['display_id']+'.mp3'
                            try:
                                with open('/%s/%s' % (folder, filename,), 'wb') as fd:
                                    for chunk in r.iter_content(chunk_size):
                                        fd.write(chunk)
                            except Exception:
                                raise StorageException('Some problem writing file /%s/%s' % (folder, filename))
                            found = True
                            break
                        except Exception:
                            pass
            if found:
                return name, '/%s/%s' % (folder, filename,)
            else:
                return False, False
        else:
            return False, False
    except:
        return False, False
Ejemplo n.º 2
0
    def generate_fingerprint_from_list(results, file_list):
        # TODO: os.system is thread safe??
        # TODO: How to test this?
        codes_file = '/tmp/allcodes_%s.json' % (random.randint(1, 10000))
        command = '/home/vagrant/echoprint-codegen/echoprint-codegen -s 10 30 < %s > %s' % (file_list, codes_file)
        os.system(command)
        # Create the Track models
        with open(codes_file, 'r') as data_file:
            data = json.load(data_file)
            for fingerprint in data:
                # check fp doesn't exist in database
                code_string = fingerprint.get('code')
                if code_string:
                    response = fp.best_match_for_query(code_string)
                    if not response.match():
                        label = [v for v in results if v[1] == fingerprint['metadata']['filename']][0][0]
                        youtube_code = fingerprint['metadata']['filename'].replace('.mp3', '').replace('/tmp/', '')
                        year = label.split('-')[0].strip()
                        release = label.split('-')[1].strip()
                        artist = label.split('-')[2].strip()
                        title = label.split('-')[3].strip()
                        fingerprint['metadata']['artist'] = artist
                        fingerprint['metadata']['title'] = title
                        # Track creation
                        Track.sync()
                        track = Track(band=artist, release=release,
                                      name=title,
                                      year=year,
                                      youtube_code=youtube_code)
                        track.save()
                        # Remove all - (due to limitation in fingerprint-server track_id match)
                        fingerprint['metadata']['track_id'] = track.echoprint_id
                    else:
                        # remove duplicate element
                        data.remove(fingerprint)
                        print "This file is duplicated"

        # Overwrite with artist and title
        with open(codes_file, 'w') as data_file:
            data_file.write(json.dumps(data))

        # Fastingest invoke => post all into echo-fingerprint
        codes, _ = parse_json_dump(codes_file)
        fp.ingest(codes)

        FileHandler.delete_file(codes_file)

        return True