def printUsage(): print("Usage: python songsPerYear.py {user_id}") try: user = sys.argv[1] except IndexError as err: printUsage() sys.exit(1) user_folder = './'+user # Set up data allData = importMappedData(user_folder+'/mapped_data.tsv') data = mbzMeta(user_folder+'/meta.tsv',allData) data = data.merge(allData, on='track_id') # Select the attribute we want to visualise attribute = data['year'].value_counts() labels = attribute.index.values # Deal with unknown (empty string) labels = ["Unknown" if x == '' else x for x in labels] frequency = attribute.tolist() # Generate the y positions. Later, replace them with labels y_pos = range(len(labels)) fig,ax = plt.subplots()
]) if userPlays.empty: print(user + " does not exist! ") sys.exit(1) # Sort by playcount sortedPlays = userPlays.sort_values(by='normalised_pc', ascending=False) num = 1 print(user + "'s listened to tracks in order of preference desc:") for index, row in sortedPlays.iterrows(): track_name, artist_name = trackMeta(sortedPlays, row['track_id']) print(str(num) + " " + track_name + " by " + artist_name) num = num + 1 print(sortedPlays) songData = mbzMeta(sortedPlays) # Clean-up genres column songData['genres'] = songData['genres'].apply( lambda x: x.replace("[", "").replace("]", "").replace("'", "")) genres = songData["genres"].tolist() genreList = [] for group in genres: genreList.append(group.split(", ")) d = {} for item in genreList: for genre in item: if genre in d: d[genre] = d[genre] + 1
from operator import itemgetter from sklearn.model_selection import cross_val_score from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer from sklearn.svm import SVR import matplotlib.pyplot as plt from functions import importMappedData, mbzMeta def mean_absolute_percentage_error(y_true, y_pred): y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 # SET UP DATA # Include the metadata for every song data = mbzMeta(importMappedData()) # Clean-up genres column data['genres'] = data['genres'].apply(lambda x: x.replace("[","").replace("]","").replace("'","") ) # For joining two dataframes use track_id as key trackids = data.index # Convert artist_id to numerical artistids = data["artist_id"].tolist() artistle = LabelEncoder() artistle.fit_transform(artistids) artist_classes = artistle.classes_ encoded_artists = artistle.transform(artistids) encoded_artists = pd.Series(encoded_artists)