def __load(self): test_pids = mpdutils.get_pids_from_dataset_json(self.test) for file in [f for f in listdir(self.mpd) if f.endswith(".json")]: print("Loading gtp data from %s" % file) playlists = mpdutils.read_dataset_json(join(self.mpd, file)) for p in playlists: if p["pid"] in test_pids: continue for t in p["tracks"]: self.track_frequencies[t["track_uri"]] += 1
def _count_tracks(self): for f in [ file for file in listdir(self.mpd) if file.endswith(".json") ]: print("Counting track frequencies %s" % f) playlists = mpdutils.read_dataset_json(join(self.mpd, f)) for p in playlists: tmp = set() for t in p["tracks"]: track_uri = t["track_uri"] if track_uri not in tmp: self.current_frequencies[track_uri] += 1 tmp.add(track_uri)
def __load(self): test_pids = mpdutils.get_pids_from_dataset_json(self.test) for file in [f for f in listdir(self.mpd) if f.endswith(".json")]: print("Loading ntm data from %s" % file) playlists = mpdutils.read_dataset_json(join(self.mpd, file)) for p in playlists: if p["pid"] in test_pids: continue nt = normalize(p["name"]) for t in p["tracks"]: self.title_memberships[nt][t["track_uri"]] += 1 self.track_frequencies[t["track_uri"]] += 1
def run(self): print("Generating recommendations...") recommendations = dict() playlists = mpdutils.read_dataset_json(self.test) cnt = 0 for p in playlists: recs = self.model.recommend(p, self.n) recommendations[p["pid"]] = recs cnt += 1 if cnt % 100 == 0: print(cnt) self._export(recommendations) print("Recommendations are produced %s" % self.out)
def serialize(self): test_pids = mpdutils.get_pids_from_dataset_json(self.test) norm = LookupNormalization() data = defaultdict(lambda: []) for file in [f for f in listdir(self.mpd) if f.endswith(".json")]: print("Converting playlists from %s" % file) playlists = mpdutils.read_dataset_json(join(self.mpd, file)) for p in playlists: if p["pid"] in test_pids: continue lu = norm.lookup(p["name"]) tracks = [t["track_uri"] for t in p["tracks"]] data[lu].extend(tracks) with open(self.out, "w", encoding="utf-8") as o: for k, v in data.items(): o.write(k) o.write("\t") o.write(" ".join(v)) o.write("\n")
def analyze(self): if self.test.endswith(".json"): print("Analyzing %s" % self.test) summary, stats = [], defaultdict(lambda: dict( instances=0, num_tracks=[], num_samples=[], num_holdouts=[])) for p in mpdutils.read_dataset_json(self.test): cid = p["category"] stats[cid]["instances"] += 1 stats[cid]["num_tracks"].append(p["num_tracks"]) stats[cid]["num_samples"].append(p["num_samples"]) stats[cid]["num_holdouts"].append(p["num_holdouts"]) total, all_tracks, all_samples, all_holdouts = 0, [], [], [] for k, v in sorted(stats.items()): summary.append([ k, v["instances"], statistics.mean(v["num_tracks"]), statistics.mean(v["num_samples"]), statistics.mean(v["num_holdouts"]) ]) total += v["instances"] all_tracks.extend(v["num_tracks"]) all_samples.extend(v["num_samples"]) all_holdouts.extend(v["num_holdouts"]) summary.append([ "overall", total, statistics.mean(all_tracks), statistics.mean(all_samples), statistics.mean(all_holdouts) ]) print(tabulate(summary, headers=self.headers)) print()
def _search(self, limit=100): low, up = mpdutils.COLD_START["lower"], mpdutils.COLD_START["upper"] files = [f for f in listdir(self.mpd) if f.endswith(".json")] random.shuffle(files) while True: file = files.pop() print("Searching for candidate playlists %s" % file) playlists = mpdutils.read_dataset_json(join(self.mpd, file)) random.shuffle(playlists) candidates = [p for p in playlists if low <= p["num_tracks"]][:limit] if self.mode == "custom" \ else [p for p in playlists if low <= p["num_tracks"] <= up][:limit] for c in candidates: if self._is_applicable(c): pjson = dict(pid=c["pid"], name=c["name"], category=mpdutils.COLD_START["id"], num_tracks=c["num_tracks"], num_samples=mpdutils.COLD_START["seeds"], num_holdouts=c["num_tracks"], tracks=[], holdouts=sorted(c["tracks"], key=lambda x: x["pos"], reverse=False)) self.reservations.append(pjson) for t in c["tracks"]: track_uri = t["track_uri"] self.current_frequencies[track_uri] -= 1 if len(self.reservations) >= self.size: break
def __load_holdouts(test): data = dict() playlists = mpdutils.read_dataset_json(test) for p in playlists: data[p["pid"]] = [t["track_uri"] for t in p["holdouts"]] return data