import numpy as np def printUsage(): print("Usage: python songsPerYear.py {user_id}") try: user = sys.argv[1] except IndexError as err: printUsage() sys.exit(1) user_folder = './'+user # Set up data allData = importMappedData(user_folder+'/mapped_data.tsv') data = mbzMeta(user_folder+'/meta.tsv',allData) data = data.merge(allData, on='track_id') # Select the attribute we want to visualise attribute = data['year'].value_counts() labels = attribute.index.values # Deal with unknown (empty string) labels = ["Unknown" if x == '' else x for x in labels] frequency = attribute.tolist() # Generate the y positions. Later, replace them with labels y_pos = range(len(labels))
import matplotlib.pyplot as plt import pandas as pd import sys import os from functions import importMappedData, trackMeta, mbzMeta, getUsers from collections import OrderedDict, Counter from operator import itemgetter user = sys.argv[1] data = importMappedData() userData = getUsers() userInfo = userData[userData["user_id"] == user] gender = "" age = 0 country = "" try: gender = userInfo['gender'].values[0] age = userInfo['age'].values[0] country = userInfo['country'].values[0] except IndexError as err: pass print(user) print("gender: " + gender) print("age: " + str(age)) print("country: " + country) # Create a DF with data relevant to the specified user userPlays = pd.DataFrame(data[data["user_id"] == user],
from operator import itemgetter from sklearn.model_selection import cross_val_score from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer from sklearn.svm import SVR import matplotlib.pyplot as plt from functions import importMappedData, mbzMeta def mean_absolute_percentage_error(y_true, y_pred): y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 # SET UP DATA # Include the metadata for every song data = mbzMeta(importMappedData()) # Clean-up genres column data['genres'] = data['genres'].apply(lambda x: x.replace("[","").replace("]","").replace("'","") ) # For joining two dataframes use track_id as key trackids = data.index # Convert artist_id to numerical artistids = data["artist_id"].tolist() artistle = LabelEncoder() artistle.fit_transform(artistids) artist_classes = artistle.classes_ encoded_artists = artistle.transform(artistids) encoded_artists = pd.Series(encoded_artists)