from utils.definitions import * # Datareader dr = Datareader(mode='online', only_load=True) #ev = Evaluator(dr) # Dataframe with interactions df_train = dr.get_df_train_interactions() df_test = dr.get_df_test_interactions() df = pd.concat([df_train, df_test], axis=0, join='outer') playlists = df['pid'].as_matrix() tracks = df['tid'].as_matrix() dictionary = dr.get_track_to_artist_dict() pids = list(dr.get_train_pids()) + list(dr.get_test_pids()) # URM urm = dr.get_urm() urm = urm[pids] print(urm.shape) print('artists...') artists = [dictionary[t] for t in tracks] print('ucm...') ucm = sparse.csr_matrix((np.ones(len(playlists)), (playlists, artists)), shape=(1049361, len(dr.get_artists()))) ucm = ucm.tocsr() ucm = ucm[pids] print(ucm.shape)
if mode == "online": nlp = NLP(dr, stopwords=[], norm=norm, work=work, split=split, date=date, skip_words=skip_words, porter=porter, porter2=porter2, lanca=lanca, lanca2=lanca2) pids = list(dr.get_train_pids()) + list(dr.get_test_pids()) test_playlists = dr.get_test_pids() ucm = nlp.get_UCM(data1=data1) dr_old = Datareader(mode='online', only_load='True', type='old') train = ucm[:1000000] test = ucm[1000000:] test_indices = [] for cat in range(1, 11): indices = dr_old.get_test_pids_indices(cat=cat) test_indices.extend(indices) new_indices = list(