Exemple #1
0
def get_artists():
    tokens = hp.get_tokens()
    hp.check_expiration(tokens)

    # Get request to followed artists endpoint
    headers = {'Authorization': f'Bearer {tokens["access_token"]}'}
    r = requests.get(MY_FOLLOWED_ARTISTS_URL, headers=headers)
    response = r.json()

    artist_ids = []
    artists = response['artists']['items']
    for artist in artists:
        artist_ids.append(artist['id'])

    # While next results page exists, get it and its artist_ids
    while response['artists']['next']:
        next_page_uri = response['artists']['next']
        r = requests.get(next_page_uri, headers=headers)
        response = r.json()
        for artist in response['artists']['items']:
            artist_ids.append(artist['id'])

    print('Retrieved artist IDs!')
    session['artist_ids'] = artist_ids

    return redirect('/get_albums')
Exemple #2
0
def add_to_playlist():
    tokens = hp.get_tokens()
    hp.check_expiration(tokens)
    playlist_id = session['playlist_id']
    track_uris = hp.get_track_uris()

    # split up the request if number of tracks is too big. Spotify API max 100 per req.
    tracks_list = track_uris['uris']
    number_of_tracks = len(tracks_list)

    # split track_uris list into 3 sub lists
    if number_of_tracks > 200:
        three_split = np.array_split(tracks_list, 3)
        for lst in three_split:
            hp.add_tracks(tokens, playlist_id, list(lst))

    # split track_uris list into 2 sub lists
    elif number_of_tracks > 100:
        two_split = np.array_split(tracks_list, 2)
        for lst in two_split:
            hp.add_tracks(tokens, playlist_id, list(lst))

    else:
        hp.add_tracks(tokens, playlist_id, tracks_list)

    print('Added tracks to playlist!')

    # redirect to playlsit page & shut down flask server
    hp.shutdown_server(request.environ)
    return redirect(session['playlist_url'])
Exemple #3
0
def refresh_tokens():
    tokens = hp.get_tokens()
    payload = {
        'grant_type': 'refresh_token',
        'refresh_token': tokens['refresh_token']
    }
    base64encoded = str(
        base64.b64encode(f'{CLIENT_ID}:{CLIENT_SECRET}'.encode('ascii')),
        'ascii')
    headers = {'Authorization': f'Basic {base64encoded}'}

    # post request for new tokens
    r = requests.post(SPOTIFY_TOKEN_URL, data=payload, headers=headers)
    response = r.json()
    hp.refresh_tokens(response['access_token'], tokens['refresh_token'],
                      response['expires_in'])

    print('Tokens refreshed!')
    return redirect('/get_artists')
Exemple #4
0
def get_tracks():
    tokens = hp.get_tokens()
    hp.check_expiration(tokens)
    album_ids = session['album_ids']
    track_uris = []

    for id in album_ids:
        uri = f'https://api.spotify.com/v1/albums/{id}/tracks'
        headers = {'Authorization': f'Bearer {tokens["access_token"]}'}
        r = requests.get(uri, headers=headers)
        response = r.json()

        for track in response['items']:
            track_uris.append(track['uri'])

    hp.store_track_uris(track_uris)
    print('Retrieved tracks!')

    return redirect('/create_playlist')
Exemple #5
0
def get_albums():
    tokens = hp.get_tokens()
    hp.check_expiration(tokens)
    artist_ids = session['artist_ids']
    album_ids = []
    album_names = {
    }  # used to check for duplicates with different id's * issue with some albums

    # set time frame for new releases (4 weeks)
    today = datetime.now()
    number_weeks = timedelta(weeks=4)
    time_frame = (today - number_weeks).date()

    for id in artist_ids:
        uri = f'https://api.spotify.com/v1/artists/{id}/albums?include_groups=album,single&country=US'
        headers = {'Authorization': f'Bearer {tokens["access_token"]}'}
        r = requests.get(uri, headers=headers)
        response = r.json()

        albums = response['items']
        for album in albums:
            # check for tracks that are new releases (4 weeks)
            try:
                release_date = datetime.strptime(
                    album['release_date'],
                    '%Y-%m-%d')  # convert release_date string to datetime
                album_name = album['name']
                artist_name = album['artists'][0]['name']
                if release_date.date() > time_frame:
                    # if we do find a duplicate album name, check if it's by a different artist
                    if album_name not in album_names or artist_name != album_names[
                            album_name]:
                        album_ids.append(album['id'])
                        album_names[album_name] = artist_name
            except ValueError:
                # there appear to be some older release dates that only contain year (2007) - irrelevant
                print(
                    f'Release date found with format: {album["release_date"]}')

    session['album_ids'] = album_ids
    print('Retrieved album IDs!')
    return redirect('/get_tracks')
Exemple #6
0
def create_playlist():
    tokens = hp.get_tokens()
    hp.check_expiration(tokens)
    current_date = (date.today()).strftime('%m-%d-%Y')
    playlist_name = f'New Monthly Releases - {current_date}'

    # make request to create_playlist endpoint
    uri = f'https://api.spotify.com/v1/users/{USER_ID}/playlists'
    headers = {
        'Authorization': f'Bearer {tokens["access_token"]}',
        'Content-Type': 'application/json'
    }
    payload = {'name': playlist_name}
    r = requests.post(uri, headers=headers, data=json.dumps(payload))
    response = r.json()

    session['playlist_id'] = response['id']  # store our new playlist's id
    session['playlist_url'] = response['external_urls'][
        'spotify']  # store new playlist's url

    print(f'{r.status_code} - Created playlist!')
    return redirect('/add_to_playlist')
        train_data = train_data1 + train_data2
        valid_data = valid_data1 + valid_data2
        print(len(train_data), len(valid_data))
    else:
        raise Exception("invalid TRAIN_NOISE_TYPE")

    if START_EPOCH != 1:  # if not training from scratch or for inference
        print(f"loading vocab from {VOCAB_PATH}")
        vocab = load_vocab_dict(VOCAB_PATH)
    else:
        print(
            f"loading vocab from train data itself and saving it at {VOCAB_PATH}"
        )
        vocab = get_tokens([i[0] for i in train_data],
                           keep_simple=True,
                           min_max_freq=(2, float("inf")),
                           topk=100000,
                           intersect=vocab_ref,
                           load_char_tokens=True)
        save_vocab_dict(VOCAB_PATH, vocab)
    print("")
    #print(vocab["token_freq"])
    print([*vocab.keys()])
    #print([(idx,vocab["idx2token"][idx]) for idx in range(100)])
    print("")
    # see how many tokens in labels are going to be UNK
    # print ( num_unk_tokens([i[0] for i in train_data], vocab) )
    # print ( num_unk_tokens([i[0] for i in valid_data], vocab) )

    #############################################
    # load ElmoSCTransformer
    #############################################
Exemple #8
0
import random
import math
import helpers
import spacy

original_tokens = helpers.get_tokens()
original_tokens = map(lambda token: token.text, original_tokens)
original_tokens = list(dict.fromkeys(original_tokens))

tokens = helpers.get_tokens()
tokens = map(lambda token: token.text, tokens)
# drop dupes
tokens = list(dict.fromkeys(tokens))

tokens_count = len(tokens)
twenty_percent = math.ceil(tokens_count * 0.2)

token_indices_to_scramble = random.sample(range(1, tokens_count),
                                          twenty_percent)

for index in token_indices_to_scramble:
    operations = [
        helpers.ScrambleOperations.add_letter,
        helpers.ScrambleOperations.remove_letter,
        helpers.ScrambleOperations.swap_letter
    ]
    tokens[index] = random.choice(operations)(tokens[index])

helpers.write_tokens(tokens)

for index in range(len(tokens)):
Exemple #9
0
open('data/preprocessed/images_val2014_all.txt', 'r').read().decode('utf8').splitlines()
images_val_path = \
open('data/preprocessed/images_val2014_path.txt', 'r').read().decode('utf8').splitlines()
answers_val_all = \
open('data/preprocessed/answers_val2014_all.txt', 'r').read().decode('utf8').splitlines()

print ('ques_val, size = {}, sampel = {}'.format(len(questions_val), questions_val[0]))
print ('ques_lengths_val, size = {}, sample = {}'.format(len(questions_lengths_val), questions_lengths_val[0]))
print ('ans_val, size = {}, sample = {}'.format(len(answers_val), answers_val[0]))
print ('imag_val, size = {}, sample = {}'.format(len(images_val), images_val[0]))
print ('imag_val_path, size = {}, sample = {}'.format(len(images_val_path), images_val_path[0]))
print ('ans_val_all, size = {}, sample = {}'.format(len(answers_val_all), answers_val_all[0]))
print(temp)

print(temp)
ques_tokens_train = get_tokens(questions_train)
ques_tokens_val = get_tokens(questions_val)
'''
counts = {}
count_thr = 5
for i, tokens in enumerate(ques_tokens_train):#change to train
    for token in tokens:
        counts[token] = counts.get(token, 0) + 1

cw = sorted([(count,w) for w,count in counts.iteritems()], reverse=True)
print('top words and their counts:')
print('\n'.join(map(str,cw[:20])))
# print some stats
total_words = sum(counts.itervalues())
print('total words:', total_words)
bad_words = [w for w,n in counts.iteritems() if n <= count_thr]