def main():
    # Args parser
    parser = argparse.ArgumentParser(description=
                "Evaluates the average rank and mean AP for the test SHS " \
                "over the entire MSD",
                formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("msd_dir", action="store",
                        help="Million Song Dataset main directory")
    parser.add_argument("-dictfile", action="store", default="",
                        help="Pickle to the learned dictionary")
    parser.add_argument("-outdir", action="store", default="msd_codes",
                        help="Output directory for the features")
    parser.add_argument("-N", action="store", default=10, type=int,
                        help="Number of processors to use when computing " \
                        "the codes for 1M tracks,")
    parser.add_argument("-lda", action="store", default=None, 
                        help="LDA file")
    parser.add_argument("-pca", nargs=2, metavar=('f.pkl', 'n'), 
                        default=(None, 0),
                        help="pca model saved in a pickle file, " \
                        "use n dimensions")
    parser.add_argument("-codes", action="store", nargs=2, default=[None,0], 
                        dest="codesdir", metavar=("msd_codes/", "n"),
                        help="Path to the folder with all the codes and "
                            "version to evaluate")
    parser.add_argument("-orig_codes", action="store", default=None, 
                        dest="origcodesdir",
                        help="Path to the folder with all the codes without "
                            "dimensionality reduction")
    parser.add_argument("-norm", action="store_true", dest="norm", default=False, 
                        help="Normalize before LDA/PCA or not")

    args = parser.parse_args()
    start_time = time.time()
    maindir = args.msd_dir
    shsf = "SHS/shs_dataset_test.txt"

    global lda
    global pca

    # sanity cheks
    utils.assert_file(maindir)
    utils.assert_file(shsf)
    utils.create_dir(args.outdir)

    # read cliques and all tracks
    cliques, all_tracks = utils.read_shs_file(shsf)
    track_ids = utils.load_pickle("SHS/track_ids_test.pk")
    clique_ids = utils.load_pickle("SHS/clique_ids_test.pk")

    # read codes file
    codesdir = args.codesdir[0]
    if codesdir is not None:
        if os.path.isfile(codesdir):
            c = utils.load_pickle(codesdir)
            feats = c[0]
            track_ids = c[1]
            clique_ids = c[2]
        else:
            feats, track_ids, clique_ids = load_codes(codesdir, 
                                                lda_idx=int(args.codesdir[1]))
        logger.info("Codes files read")
        print feats.shape
    else:
        # Read PCA file
        if args.pca[0] is not None:
            pca = utils.load_pickle(args.pca[0])[int(args.pca[1])]

        # read LDA file
        lda_file = args.lda
        if lda_file is not None:
            lda = utils.load_pickle(lda_file)

        utils.assert_file(args.dictfile)

        # Prepare Multiprocessing computation
        input = []
        pool = Pool(processes=args.N)
        for n in xrange(args.N):
            arg = {}
            arg["track_ids"] = track_ids
            arg["maindir"] = maindir
            arg["d"] = args.dictfile
            arg["N"] = n
            arg["clique_ids"] = clique_ids
            arg["outdir"] = args.outdir
            arg["origcodesdir"] = args.origcodesdir
            arg["pca_n"] = int(args.pca[1])
            arg["norm"] = args.norm
            input.append(arg)

        # Start computing the codes
        pool.map(compute_codes, input)

        # Done!
        logger.info("Codes computation done!")
        logger.info("Took %.2f seconds" % (time.time() - start_time))
        sys.exit()

    # Scores
    feats, clique_ids, track_ids = utils.clean_feats(feats, clique_ids, track_ids)
    stats = score(feats, clique_ids, N=len(all_tracks))

    # TODO: change file name
    utils.save_pickle(stats, "stats.pk")

    # done
    logger.info('Average rank per track: %.2f, clique: %.2f, MAP: %.2f%%' \
                % (anst.average_rank_per_track(stats),
                    anst.average_rank_per_clique(stats),
                    anst.mean_average_precision(stats) * 100))
    logger.info("Done! Took %.2f seconds" % (time.time() - start_time))
def main():
    # Args parser
    parser = argparse.ArgumentParser(description=
                "Cover song ID on the training Second Hand Song dataset",
                formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("msd_dir", action="store",
                        help="Million Song Dataset main directory")
    parser.add_argument("-dictfile", action="store", default="",
                        help="Pickle to the learned dictionary")
    parser.add_argument("-lda", action="store", nargs=2, default=[None,0], 
                        help="LDA file and version", metavar=('lda.pkl', 'n'))
    parser.add_argument("-codes", action="store", default=None, dest="codesfile",
                        help="Pickle to the features file")
    parser.add_argument("-f", action="store", default="", dest="featfile",
                        help="Pickle to the final features")
    parser.add_argument("-pca", nargs=2, metavar=('f.pkl', 'n'), 
                        default=("", 0),
                        help="pca model saved in a pickle file, " \
                        "use n dimensions")

    args = parser.parse_args()
    start_time = time.time()
    maindir = args.msd_dir
    shsf = "SHS/shs_dataset_train.txt"
    dictfile = args.dictfile

    # sanity cheks
    utils.assert_file(dictfile)
    utils.assert_file(maindir)
    utils.assert_file(shsf)

    # read clique ids and track ids
    cliques, all_tracks = utils.read_shs_file(shsf)
    track_ids = all_tracks.keys()
    clique_ids = np.asarray(utils.compute_clique_idxs(track_ids, cliques))
    logger.info("Track ids and clique ids read")
    utils.save_pickle(clique_ids, "SHS/clique_ids_train.pk")
    utils.save_pickle(track_ids, "SHS/track_ids_train.pk")

    # read LDA file
    lda_file = args.lda[0]
    if lda_file != None:
        lda_file = utils.load_pickle(lda_file)
        logger.info("LDA file read")

    # read codes file
    codesfile = args.codesfile
    if codesfile != None:
        codesfile = utils.load_pickle(codesfile)
        logger.info("Codes file read")

    # Compute features if needed
    if args.featfile == "":
        feats = compute_feats(track_ids, maindir, dictfile,
            lda_file=lda_file, lda_n=int(args.lda[1]), codes=codesfile,
            pca=args.pca[0], pca_n=int(args.pca[1]))
    else:  
        feats = utils.load_pickle(args.featfile)

    # Apply PCA
    pcafile = args.pca[0]
    pcadim = int(args.pca[1])
    if pcafile != "" and False:
        trainedpca = utils.load_pickle(pcafile)
        assert pcadim > 0
        logger.info('trained pca loaded')
        pcafeats = np.zeros((feats.shape[0], pcadim))
        for i,feat in enumerate(feats):
            pcafeats[i] = trainedpca.apply_newdata(feat, ndims=pcadim)
        feats = pcafeats

    # Scores
    feats, clique_ids, track_ids = utils.clean_feats(feats, clique_ids, track_ids)
    stats = score(feats, clique_ids)

    # Save data
    if dictfile == "":
        dictfile = "thierry" # For saving purposes
    utils.save_pickle(stats, "results/stats-" + os.path.basename(dictfile) + ".pk")

    # done
    logger.info('Average rank per track: %.2f, clique: %.2f, MAP: %.2f%%' \
                % (anst.average_rank_per_track(stats),
                    anst.average_rank_per_clique(stats),
                    anst.mean_average_precision(stats) * 100))
    logger.info("Done! Took %.2f seconds" % (time.time() - start_time))
Esempio n. 3
0
def main():
    # Args parser
    parser = argparse.ArgumentParser(
        description="Cover song ID on the training Second Hand Song dataset",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("msd_dir",
                        action="store",
                        help="Million Song Dataset main directory")
    parser.add_argument("-dictfile",
                        action="store",
                        default="",
                        help="Pickle to the learned dictionary")
    parser.add_argument("-lda",
                        action="store",
                        nargs=2,
                        default=[None, 0],
                        help="LDA file and version",
                        metavar=('lda.pkl', 'n'))
    parser.add_argument("-codes",
                        action="store",
                        default=None,
                        dest="codesfile",
                        help="Pickle to the features file")
    parser.add_argument("-f",
                        action="store",
                        default="",
                        dest="featfile",
                        help="Pickle to the final features")
    parser.add_argument("-pca", nargs=2, metavar=('f.pkl', 'n'),
                        default=("", 0),
                        help="pca model saved in a pickle file, " \
                        "use n dimensions")

    args = parser.parse_args()
    start_time = time.time()
    maindir = args.msd_dir
    shsf = "SHS/shs_dataset_train.txt"
    dictfile = args.dictfile

    # sanity cheks
    utils.assert_file(dictfile)
    utils.assert_file(maindir)
    utils.assert_file(shsf)

    # read clique ids and track ids
    cliques, all_tracks = utils.read_shs_file(shsf)
    track_ids = all_tracks.keys()
    clique_ids = np.asarray(utils.compute_clique_idxs(track_ids, cliques))
    logger.info("Track ids and clique ids read")
    utils.save_pickle(clique_ids, "SHS/clique_ids_train.pk")
    utils.save_pickle(track_ids, "SHS/track_ids_train.pk")

    # read LDA file
    lda_file = args.lda[0]
    if lda_file != None:
        lda_file = utils.load_pickle(lda_file)
        logger.info("LDA file read")

    # read codes file
    codesfile = args.codesfile
    if codesfile != None:
        codesfile = utils.load_pickle(codesfile)
        logger.info("Codes file read")

    # Compute features if needed
    if args.featfile == "":
        feats = compute_feats(track_ids,
                              maindir,
                              dictfile,
                              lda_file=lda_file,
                              lda_n=int(args.lda[1]),
                              codes=codesfile,
                              pca=args.pca[0],
                              pca_n=int(args.pca[1]))
    else:
        feats = utils.load_pickle(args.featfile)

    # Apply PCA
    pcafile = args.pca[0]
    pcadim = int(args.pca[1])
    if pcafile != "" and False:
        trainedpca = utils.load_pickle(pcafile)
        assert pcadim > 0
        logger.info('trained pca loaded')
        pcafeats = np.zeros((feats.shape[0], pcadim))
        for i, feat in enumerate(feats):
            pcafeats[i] = trainedpca.apply_newdata(feat, ndims=pcadim)
        feats = pcafeats

    # Scores
    feats, clique_ids, track_ids = utils.clean_feats(feats, clique_ids,
                                                     track_ids)
    stats = score(feats, clique_ids)

    # Save data
    if dictfile == "":
        dictfile = "thierry"  # For saving purposes
    utils.save_pickle(stats,
                      "results/stats-" + os.path.basename(dictfile) + ".pk")

    # done
    logger.info('Average rank per track: %.2f, clique: %.2f, MAP: %.2f%%' \
                % (anst.average_rank_per_track(stats),
                    anst.average_rank_per_clique(stats),
                    anst.mean_average_precision(stats) * 100))
    logger.info("Done! Took %.2f seconds" % (time.time() - start_time))
def main():
    # Args parser
    parser = argparse.ArgumentParser(description=
                "Evaluates the average rank and mean AP for the test SHS " \
                "over the entire MSD",
                formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("msd_dir",
                        action="store",
                        help="Million Song Dataset main directory")
    parser.add_argument("-dictfile",
                        action="store",
                        default="",
                        help="Pickle to the learned dictionary")
    parser.add_argument("-outdir",
                        action="store",
                        default="msd_codes",
                        help="Output directory for the features")
    parser.add_argument("-N", action="store", default=10, type=int,
                        help="Number of processors to use when computing " \
                        "the codes for 1M tracks,")
    parser.add_argument("-lda", action="store", default=None, help="LDA file")
    parser.add_argument("-pca", nargs=2, metavar=('f.pkl', 'n'),
                        default=(None, 0),
                        help="pca model saved in a pickle file, " \
                        "use n dimensions")
    parser.add_argument("-codes",
                        action="store",
                        nargs=2,
                        default=[None, 0],
                        dest="codesdir",
                        metavar=("msd_codes/", "n"),
                        help="Path to the folder with all the codes and "
                        "version to evaluate")
    parser.add_argument("-orig_codes",
                        action="store",
                        default=None,
                        dest="origcodesdir",
                        help="Path to the folder with all the codes without "
                        "dimensionality reduction")
    parser.add_argument("-norm",
                        action="store_true",
                        dest="norm",
                        default=False,
                        help="Normalize before LDA/PCA or not")

    args = parser.parse_args()
    start_time = time.time()
    maindir = args.msd_dir
    shsf = "SHS/shs_dataset_test.txt"

    global lda
    global pca

    # sanity cheks
    utils.assert_file(maindir)
    utils.assert_file(shsf)
    utils.create_dir(args.outdir)

    # read cliques and all tracks
    cliques, all_tracks = utils.read_shs_file(shsf)
    track_ids = utils.load_pickle("SHS/track_ids_test.pk")
    clique_ids = utils.load_pickle("SHS/clique_ids_test.pk")

    # read codes file
    codesdir = args.codesdir[0]
    if codesdir is not None:
        if os.path.isfile(codesdir):
            c = utils.load_pickle(codesdir)
            feats = c[0]
            track_ids = c[1]
            clique_ids = c[2]
        else:
            feats, track_ids, clique_ids = load_codes(codesdir,
                                                      lda_idx=int(
                                                          args.codesdir[1]))
        logger.info("Codes files read")
        print feats.shape
    else:
        # Read PCA file
        if args.pca[0] is not None:
            pca = utils.load_pickle(args.pca[0])[int(args.pca[1])]

        # read LDA file
        lda_file = args.lda
        if lda_file is not None:
            lda = utils.load_pickle(lda_file)

        utils.assert_file(args.dictfile)

        # Prepare Multiprocessing computation
        input = []
        pool = Pool(processes=args.N)
        for n in xrange(args.N):
            arg = {}
            arg["track_ids"] = track_ids
            arg["maindir"] = maindir
            arg["d"] = args.dictfile
            arg["N"] = n
            arg["clique_ids"] = clique_ids
            arg["outdir"] = args.outdir
            arg["origcodesdir"] = args.origcodesdir
            arg["pca_n"] = int(args.pca[1])
            arg["norm"] = args.norm
            input.append(arg)

        # Start computing the codes
        pool.map(compute_codes, input)

        # Done!
        logger.info("Codes computation done!")
        logger.info("Took %.2f seconds" % (time.time() - start_time))
        sys.exit()

    # Scores
    feats, clique_ids, track_ids = utils.clean_feats(feats, clique_ids,
                                                     track_ids)
    stats = score(feats, clique_ids, N=len(all_tracks))

    # TODO: change file name
    utils.save_pickle(stats, "stats.pk")

    # done
    logger.info('Average rank per track: %.2f, clique: %.2f, MAP: %.2f%%' \
                % (anst.average_rank_per_track(stats),
                    anst.average_rank_per_clique(stats),
                    anst.mean_average_precision(stats) * 100))
    logger.info("Done! Took %.2f seconds" % (time.time() - start_time))