Exemple #1
0
    def __load(self):
        test_pids = mpdutils.get_pids_from_dataset_json(self.test)

        for file in [f for f in listdir(self.mpd) if f.endswith(".json")]:
            print("Loading gtp data from %s" % file)
            playlists = mpdutils.read_dataset_json(join(self.mpd, file))
            for p in playlists:
                if p["pid"] in test_pids:
                    continue

                for t in p["tracks"]:
                    self.track_frequencies[t["track_uri"]] += 1
    def _count_tracks(self):
        for f in [
                file for file in listdir(self.mpd) if file.endswith(".json")
        ]:
            print("Counting track frequencies %s" % f)

            playlists = mpdutils.read_dataset_json(join(self.mpd, f))
            for p in playlists:
                tmp = set()
                for t in p["tracks"]:
                    track_uri = t["track_uri"]
                    if track_uri not in tmp:
                        self.current_frequencies[track_uri] += 1
                        tmp.add(track_uri)
Exemple #3
0
    def __load(self):
        test_pids = mpdutils.get_pids_from_dataset_json(self.test)

        for file in [f for f in listdir(self.mpd) if f.endswith(".json")]:
            print("Loading ntm data from %s" % file)
            playlists = mpdutils.read_dataset_json(join(self.mpd, file))
            for p in playlists:
                if p["pid"] in test_pids:
                    continue

                nt = normalize(p["name"])
                for t in p["tracks"]:
                    self.title_memberships[nt][t["track_uri"]] += 1
                    self.track_frequencies[t["track_uri"]] += 1
Exemple #4
0
    def run(self):
        print("Generating recommendations...")
        recommendations = dict()
        playlists = mpdutils.read_dataset_json(self.test)

        cnt = 0
        for p in playlists:
            recs = self.model.recommend(p, self.n)
            recommendations[p["pid"]] = recs
            cnt += 1
            if cnt % 100 == 0:
                print(cnt)

        self._export(recommendations)
        print("Recommendations are produced %s" % self.out)
    def serialize(self):
        test_pids = mpdutils.get_pids_from_dataset_json(self.test)
        norm = LookupNormalization()

        data = defaultdict(lambda: [])
        for file in [f for f in listdir(self.mpd) if f.endswith(".json")]:
            print("Converting playlists from %s" % file)
            playlists = mpdutils.read_dataset_json(join(self.mpd, file))
            for p in playlists:
                if p["pid"] in test_pids:
                    continue

                lu = norm.lookup(p["name"])
                tracks = [t["track_uri"] for t in p["tracks"]]
                data[lu].extend(tracks)

        with open(self.out, "w", encoding="utf-8") as o:
            for k, v in data.items():
                o.write(k)
                o.write("\t")
                o.write(" ".join(v))
                o.write("\n")
    def analyze(self):
        if self.test.endswith(".json"):
            print("Analyzing %s" % self.test)

            summary, stats = [], defaultdict(lambda: dict(
                instances=0, num_tracks=[], num_samples=[], num_holdouts=[]))
            for p in mpdutils.read_dataset_json(self.test):
                cid = p["category"]
                stats[cid]["instances"] += 1
                stats[cid]["num_tracks"].append(p["num_tracks"])
                stats[cid]["num_samples"].append(p["num_samples"])
                stats[cid]["num_holdouts"].append(p["num_holdouts"])

            total, all_tracks, all_samples, all_holdouts = 0, [], [], []

            for k, v in sorted(stats.items()):
                summary.append([
                    k, v["instances"],
                    statistics.mean(v["num_tracks"]),
                    statistics.mean(v["num_samples"]),
                    statistics.mean(v["num_holdouts"])
                ])
                total += v["instances"]

                all_tracks.extend(v["num_tracks"])
                all_samples.extend(v["num_samples"])
                all_holdouts.extend(v["num_holdouts"])

            summary.append([
                "overall", total,
                statistics.mean(all_tracks),
                statistics.mean(all_samples),
                statistics.mean(all_holdouts)
            ])

            print(tabulate(summary, headers=self.headers))
            print()
    def _search(self, limit=100):
        low, up = mpdutils.COLD_START["lower"], mpdutils.COLD_START["upper"]

        files = [f for f in listdir(self.mpd) if f.endswith(".json")]
        random.shuffle(files)

        while True:
            file = files.pop()
            print("Searching for candidate playlists %s" % file)

            playlists = mpdutils.read_dataset_json(join(self.mpd, file))
            random.shuffle(playlists)

            candidates = [p for p in playlists if low <= p["num_tracks"]][:limit] if self.mode == "custom" \
                else [p for p in playlists if low <= p["num_tracks"] <= up][:limit]

            for c in candidates:
                if self._is_applicable(c):
                    pjson = dict(pid=c["pid"],
                                 name=c["name"],
                                 category=mpdutils.COLD_START["id"],
                                 num_tracks=c["num_tracks"],
                                 num_samples=mpdutils.COLD_START["seeds"],
                                 num_holdouts=c["num_tracks"],
                                 tracks=[],
                                 holdouts=sorted(c["tracks"],
                                                 key=lambda x: x["pos"],
                                                 reverse=False))

                    self.reservations.append(pjson)

                    for t in c["tracks"]:
                        track_uri = t["track_uri"]
                        self.current_frequencies[track_uri] -= 1

            if len(self.reservations) >= self.size:
                break
 def __load_holdouts(test):
     data = dict()
     playlists = mpdutils.read_dataset_json(test)
     for p in playlists:
         data[p["pid"]] = [t["track_uri"] for t in p["holdouts"]]
     return data