Exemple #1
0
def printUsage():
	print("Usage: python songsPerYear.py {user_id}")
	
	
try:
	user = sys.argv[1]
except IndexError as err:
	printUsage()
	sys.exit(1)
	
user_folder = './'+user

# Set up data
allData = importMappedData(user_folder+'/mapped_data.tsv')
data = mbzMeta(user_folder+'/meta.tsv',allData)
data = data.merge(allData, on='track_id')

# Select the attribute we want to visualise
attribute = data['year'].value_counts()

labels = attribute.index.values
# Deal with unknown (empty string)
labels = ["Unknown" if x == '' else x for x in labels]

frequency = attribute.tolist() 
 
# Generate the y positions. Later, replace them with labels
y_pos = range(len(labels))

fig,ax = plt.subplots()
                         ])
if userPlays.empty:
    print(user + " does not exist! ")
    sys.exit(1)
# Sort by playcount
sortedPlays = userPlays.sort_values(by='normalised_pc', ascending=False)
num = 1
print(user + "'s listened to tracks in order of preference desc:")
for index, row in sortedPlays.iterrows():
    track_name, artist_name = trackMeta(sortedPlays, row['track_id'])
    print(str(num) + " " + track_name + " by " + artist_name)
    num = num + 1

print(sortedPlays)

songData = mbzMeta(sortedPlays)

# Clean-up genres column
songData['genres'] = songData['genres'].apply(
    lambda x: x.replace("[", "").replace("]", "").replace("'", ""))

genres = songData["genres"].tolist()
genreList = []
for group in genres:
    genreList.append(group.split(", "))

d = {}
for item in genreList:
    for genre in item:
        if genre in d:
            d[genre] = d[genre] + 1
Exemple #3
0
from operator import itemgetter
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.svm import SVR
import matplotlib.pyplot as plt
from functions import importMappedData, mbzMeta

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# SET UP DATA

# Include the metadata for every song
data = mbzMeta(importMappedData())

# Clean-up genres column
data['genres'] = data['genres'].apply(lambda x: x.replace("[","").replace("]","").replace("'","") )

# For joining two dataframes use track_id as key
trackids = data.index

# Convert artist_id to numerical
artistids = data["artist_id"].tolist()
artistle = LabelEncoder()
artistle.fit_transform(artistids)
artist_classes = artistle.classes_
encoded_artists = artistle.transform(artistids)
encoded_artists = pd.Series(encoded_artists)