def popularity(data, pidtotracks=None, name="DefaultName"): if (pickle_utility.exists_file(name)): return pickle_utility.load(name) if (pidtotracks == None): pidtotracks = pickle_utility.load("pidtotracks") occurences = pickle_utility.load("trackOccurences") pop_dict = {} counter = 0 print("Start popularity ranking...") for key, pid_predictions in data.items(): track_pop_dict = Counter() for pid in pid_predictions: tracks = pidtotracks[int(pid)] for track in tracks.split(): track_pop_dict[track] = occurences[track] pop_dict[key] = [i[0] for i in track_pop_dict.most_common(1000)] counter += 1 if (counter % 100 == 0): print("Processed {} playlists".format(counter)) pickle_utility.dump(pop_dict, name) return pop_dict
def getPopularityRankedTracks(playlists, name): trackPopularityDict = pickle_utility.load("trackOccurences") rankedTrackDict = dict() rankedSimilarTrackDict = dict() counter = 0 for pid, similarPids in playlists.items(): print (counter," / ", len(playlists.items())) counter+=1 for similarPid in similarPids: tracks = pid_dict[int(float(similarPid))] tracks = tracks.split(" ") for track in tracks: if not track in rankedTrackDict: rankedTrackDict[track] = trackPopularityDict[track] # rearrange the similar playlists for this pid so that the most popular are at the beginning sortedSimilarTracks = dict(sorted(rankedTrackDict.items(), key=operator.itemgetter(1), reverse=True)) rankedSimilarTrackDict[pid] = list(sortedSimilarTracks.keys()) sortedSimilarTracks.clear() pickle_utility.dump(rankedSimilarTrackDict, "rankedTracks", name) return rankedSimilarTrackDict
def create_pid_dict(fileCount): pids, tracks = process_mpd(app_settings.MPD_SET, fileCount) print("Concat lists") pid_dict = {} for pid, track in zip(pids, tracks): pid_dict[pid] = track print("Dump") pickle_utility.dump(pid_dict, "pidtotracks") return
def create_yearDict(): years = dict() for k in lookupDict: playlist = re.split(' ', k) id = playlist[-1] name = [] for j in range(len(playlist) - 1): name.append(playlist[j]) match = re.match(".*([1-2][0-9]{3})", str(name)) if match is not None: # year = re.split(".*([1-2][0-9]{3})", str(name)) search = re.search('\d{4}', str(match)) date = search.group() if date not in years: years[date] = [id] else: years[date].append(id) pickle_utility.dump(years, "yearsDict")
def create_wordDict(): words = dict() for k in lookupDict: playlist = k.split(" ") name = [] out = "" for j in range(len(playlist) - 1): name.append(playlist[j]) id = playlist[-1] for i in range(len(name)): tmp = name[i] tmp = re.sub("[^\w\s\_]", "", tmp) tmp = tmp.lower() out = out + tmp if out not in words: words[out] = [id] else: words[out].append(id) pickle_utility.dump(words, "wordDict_NamesNotSplitted")
def getPopularityRankedPlaylists(playlists, name): pidPopularityDict = pickle_utility.load("pidPopularityDict") rankedPidDict = dict() rankedSimilarTrackDict = dict() for pid, similarPids in playlists.items(): for similarPid in similarPids: rankedPidDict[similarPid] = pidPopularityDict[int(float(similarPid))] # rearrange the similar playlists for this pid so that the most popular are at the beginning sortedSimilarPlaylists = dict(sorted(rankedPidDict.items(), key=operator.itemgetter(1), reverse=True)) rankedSimilarTrackDict[pid] = list(sortedSimilarPlaylists.keys()) rankedPidDict.clear() pickle_utility.dump(rankedSimilarTrackDict, "rankedPlaylists", name) return rankedSimilarTrackDict
def enrich_playlists(playlists, nr_new_tracks_per_track): # playlists=playlists.values() file = open(FILEPATH + "count_chain_file_dict", "rb") dic = pickle.load(file) outPl = [] outDict = dict() for k, pl in playlists.items(): trackSet = set(pl) tempList = [] tempList.extend(pl) for track in pl: onlyTracks = [ e[0] for e in dic[track].most_common(nr_new_tracks_per_track) if e[0] not in trackSet ] tempList.extend(onlyTracks) outPl.append(tempList) outDict[k] = tempList pickle_utility.dump(outDict, "MarkovOut") return outDict
def create_wordDict(): words = dict() for k in lookupDict: playlist = re.split(' |_|-', k) name = [] out = "" for j in range(len(playlist)-1): name.append(playlist[j]) id = playlist[-1] for i in range(len(name)): tmp = name[i] tmp = re.sub("[^\w\s\_]", "", tmp) tmp = tmp.lower() if tmp not in irrelevant: out = out + tmp if out not in words: words[out] = [id] else: words[out].append(id) # if hasNumbers(out): # years.append(out) pickle_utility.dump(words, "wordDict_withoutIrrelevant")