def main(): # Args parser parser = argparse.ArgumentParser(description= "Evaluates the average rank and mean AP for the test SHS " \ "over the entire MSD", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("msd_dir", action="store", help="Million Song Dataset main directory") parser.add_argument("-dictfile", action="store", default="", help="Pickle to the learned dictionary") parser.add_argument("-outdir", action="store", default="msd_codes", help="Output directory for the features") parser.add_argument("-N", action="store", default=10, type=int, help="Number of processors to use when computing " \ "the codes for 1M tracks,") parser.add_argument("-lda", action="store", default=None, help="LDA file") parser.add_argument("-pca", nargs=2, metavar=('f.pkl', 'n'), default=(None, 0), help="pca model saved in a pickle file, " \ "use n dimensions") parser.add_argument("-codes", action="store", nargs=2, default=[None,0], dest="codesdir", metavar=("msd_codes/", "n"), help="Path to the folder with all the codes and " "version to evaluate") parser.add_argument("-orig_codes", action="store", default=None, dest="origcodesdir", help="Path to the folder with all the codes without " "dimensionality reduction") parser.add_argument("-norm", action="store_true", dest="norm", default=False, help="Normalize before LDA/PCA or not") args = parser.parse_args() start_time = time.time() maindir = args.msd_dir shsf = "SHS/shs_dataset_test.txt" global lda global pca # sanity cheks utils.assert_file(maindir) utils.assert_file(shsf) utils.create_dir(args.outdir) # read cliques and all tracks cliques, all_tracks = utils.read_shs_file(shsf) track_ids = utils.load_pickle("SHS/track_ids_test.pk") clique_ids = utils.load_pickle("SHS/clique_ids_test.pk") # read codes file codesdir = args.codesdir[0] if codesdir is not None: if os.path.isfile(codesdir): c = utils.load_pickle(codesdir) feats = c[0] track_ids = c[1] clique_ids = c[2] else: feats, track_ids, clique_ids = load_codes(codesdir, lda_idx=int(args.codesdir[1])) logger.info("Codes files read") print feats.shape else: # Read PCA file if args.pca[0] is not None: pca = utils.load_pickle(args.pca[0])[int(args.pca[1])] # read LDA file lda_file = args.lda if lda_file is not None: lda = utils.load_pickle(lda_file) utils.assert_file(args.dictfile) # Prepare Multiprocessing computation input = [] pool = Pool(processes=args.N) for n in xrange(args.N): arg = {} arg["track_ids"] = track_ids arg["maindir"] = maindir arg["d"] = args.dictfile arg["N"] = n arg["clique_ids"] = clique_ids arg["outdir"] = args.outdir arg["origcodesdir"] = args.origcodesdir arg["pca_n"] = int(args.pca[1]) arg["norm"] = args.norm input.append(arg) # Start computing the codes pool.map(compute_codes, input) # Done! logger.info("Codes computation done!") logger.info("Took %.2f seconds" % (time.time() - start_time)) sys.exit() # Scores feats, clique_ids, track_ids = utils.clean_feats(feats, clique_ids, track_ids) stats = score(feats, clique_ids, N=len(all_tracks)) # TODO: change file name utils.save_pickle(stats, "stats.pk") # done logger.info('Average rank per track: %.2f, clique: %.2f, MAP: %.2f%%' \ % (anst.average_rank_per_track(stats), anst.average_rank_per_clique(stats), anst.mean_average_precision(stats) * 100)) logger.info("Done! Took %.2f seconds" % (time.time() - start_time))
def main(): # Args parser parser = argparse.ArgumentParser(description= "Cover song ID on the training Second Hand Song dataset", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("msd_dir", action="store", help="Million Song Dataset main directory") parser.add_argument("-dictfile", action="store", default="", help="Pickle to the learned dictionary") parser.add_argument("-lda", action="store", nargs=2, default=[None,0], help="LDA file and version", metavar=('lda.pkl', 'n')) parser.add_argument("-codes", action="store", default=None, dest="codesfile", help="Pickle to the features file") parser.add_argument("-f", action="store", default="", dest="featfile", help="Pickle to the final features") parser.add_argument("-pca", nargs=2, metavar=('f.pkl', 'n'), default=("", 0), help="pca model saved in a pickle file, " \ "use n dimensions") args = parser.parse_args() start_time = time.time() maindir = args.msd_dir shsf = "SHS/shs_dataset_train.txt" dictfile = args.dictfile # sanity cheks utils.assert_file(dictfile) utils.assert_file(maindir) utils.assert_file(shsf) # read clique ids and track ids cliques, all_tracks = utils.read_shs_file(shsf) track_ids = all_tracks.keys() clique_ids = np.asarray(utils.compute_clique_idxs(track_ids, cliques)) logger.info("Track ids and clique ids read") utils.save_pickle(clique_ids, "SHS/clique_ids_train.pk") utils.save_pickle(track_ids, "SHS/track_ids_train.pk") # read LDA file lda_file = args.lda[0] if lda_file != None: lda_file = utils.load_pickle(lda_file) logger.info("LDA file read") # read codes file codesfile = args.codesfile if codesfile != None: codesfile = utils.load_pickle(codesfile) logger.info("Codes file read") # Compute features if needed if args.featfile == "": feats = compute_feats(track_ids, maindir, dictfile, lda_file=lda_file, lda_n=int(args.lda[1]), codes=codesfile, pca=args.pca[0], pca_n=int(args.pca[1])) else: feats = utils.load_pickle(args.featfile) # Apply PCA pcafile = args.pca[0] pcadim = int(args.pca[1]) if pcafile != "" and False: trainedpca = utils.load_pickle(pcafile) assert pcadim > 0 logger.info('trained pca loaded') pcafeats = np.zeros((feats.shape[0], pcadim)) for i,feat in enumerate(feats): pcafeats[i] = trainedpca.apply_newdata(feat, ndims=pcadim) feats = pcafeats # Scores feats, clique_ids, track_ids = utils.clean_feats(feats, clique_ids, track_ids) stats = score(feats, clique_ids) # Save data if dictfile == "": dictfile = "thierry" # For saving purposes utils.save_pickle(stats, "results/stats-" + os.path.basename(dictfile) + ".pk") # done logger.info('Average rank per track: %.2f, clique: %.2f, MAP: %.2f%%' \ % (anst.average_rank_per_track(stats), anst.average_rank_per_clique(stats), anst.mean_average_precision(stats) * 100)) logger.info("Done! Took %.2f seconds" % (time.time() - start_time))
def main(): # Args parser parser = argparse.ArgumentParser( description="Cover song ID on the training Second Hand Song dataset", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("msd_dir", action="store", help="Million Song Dataset main directory") parser.add_argument("-dictfile", action="store", default="", help="Pickle to the learned dictionary") parser.add_argument("-lda", action="store", nargs=2, default=[None, 0], help="LDA file and version", metavar=('lda.pkl', 'n')) parser.add_argument("-codes", action="store", default=None, dest="codesfile", help="Pickle to the features file") parser.add_argument("-f", action="store", default="", dest="featfile", help="Pickle to the final features") parser.add_argument("-pca", nargs=2, metavar=('f.pkl', 'n'), default=("", 0), help="pca model saved in a pickle file, " \ "use n dimensions") args = parser.parse_args() start_time = time.time() maindir = args.msd_dir shsf = "SHS/shs_dataset_train.txt" dictfile = args.dictfile # sanity cheks utils.assert_file(dictfile) utils.assert_file(maindir) utils.assert_file(shsf) # read clique ids and track ids cliques, all_tracks = utils.read_shs_file(shsf) track_ids = all_tracks.keys() clique_ids = np.asarray(utils.compute_clique_idxs(track_ids, cliques)) logger.info("Track ids and clique ids read") utils.save_pickle(clique_ids, "SHS/clique_ids_train.pk") utils.save_pickle(track_ids, "SHS/track_ids_train.pk") # read LDA file lda_file = args.lda[0] if lda_file != None: lda_file = utils.load_pickle(lda_file) logger.info("LDA file read") # read codes file codesfile = args.codesfile if codesfile != None: codesfile = utils.load_pickle(codesfile) logger.info("Codes file read") # Compute features if needed if args.featfile == "": feats = compute_feats(track_ids, maindir, dictfile, lda_file=lda_file, lda_n=int(args.lda[1]), codes=codesfile, pca=args.pca[0], pca_n=int(args.pca[1])) else: feats = utils.load_pickle(args.featfile) # Apply PCA pcafile = args.pca[0] pcadim = int(args.pca[1]) if pcafile != "" and False: trainedpca = utils.load_pickle(pcafile) assert pcadim > 0 logger.info('trained pca loaded') pcafeats = np.zeros((feats.shape[0], pcadim)) for i, feat in enumerate(feats): pcafeats[i] = trainedpca.apply_newdata(feat, ndims=pcadim) feats = pcafeats # Scores feats, clique_ids, track_ids = utils.clean_feats(feats, clique_ids, track_ids) stats = score(feats, clique_ids) # Save data if dictfile == "": dictfile = "thierry" # For saving purposes utils.save_pickle(stats, "results/stats-" + os.path.basename(dictfile) + ".pk") # done logger.info('Average rank per track: %.2f, clique: %.2f, MAP: %.2f%%' \ % (anst.average_rank_per_track(stats), anst.average_rank_per_clique(stats), anst.mean_average_precision(stats) * 100)) logger.info("Done! Took %.2f seconds" % (time.time() - start_time))
def main(): # Args parser parser = argparse.ArgumentParser(description= "Evaluates the average rank and mean AP for the test SHS " \ "over the entire MSD", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("msd_dir", action="store", help="Million Song Dataset main directory") parser.add_argument("-dictfile", action="store", default="", help="Pickle to the learned dictionary") parser.add_argument("-outdir", action="store", default="msd_codes", help="Output directory for the features") parser.add_argument("-N", action="store", default=10, type=int, help="Number of processors to use when computing " \ "the codes for 1M tracks,") parser.add_argument("-lda", action="store", default=None, help="LDA file") parser.add_argument("-pca", nargs=2, metavar=('f.pkl', 'n'), default=(None, 0), help="pca model saved in a pickle file, " \ "use n dimensions") parser.add_argument("-codes", action="store", nargs=2, default=[None, 0], dest="codesdir", metavar=("msd_codes/", "n"), help="Path to the folder with all the codes and " "version to evaluate") parser.add_argument("-orig_codes", action="store", default=None, dest="origcodesdir", help="Path to the folder with all the codes without " "dimensionality reduction") parser.add_argument("-norm", action="store_true", dest="norm", default=False, help="Normalize before LDA/PCA or not") args = parser.parse_args() start_time = time.time() maindir = args.msd_dir shsf = "SHS/shs_dataset_test.txt" global lda global pca # sanity cheks utils.assert_file(maindir) utils.assert_file(shsf) utils.create_dir(args.outdir) # read cliques and all tracks cliques, all_tracks = utils.read_shs_file(shsf) track_ids = utils.load_pickle("SHS/track_ids_test.pk") clique_ids = utils.load_pickle("SHS/clique_ids_test.pk") # read codes file codesdir = args.codesdir[0] if codesdir is not None: if os.path.isfile(codesdir): c = utils.load_pickle(codesdir) feats = c[0] track_ids = c[1] clique_ids = c[2] else: feats, track_ids, clique_ids = load_codes(codesdir, lda_idx=int( args.codesdir[1])) logger.info("Codes files read") print feats.shape else: # Read PCA file if args.pca[0] is not None: pca = utils.load_pickle(args.pca[0])[int(args.pca[1])] # read LDA file lda_file = args.lda if lda_file is not None: lda = utils.load_pickle(lda_file) utils.assert_file(args.dictfile) # Prepare Multiprocessing computation input = [] pool = Pool(processes=args.N) for n in xrange(args.N): arg = {} arg["track_ids"] = track_ids arg["maindir"] = maindir arg["d"] = args.dictfile arg["N"] = n arg["clique_ids"] = clique_ids arg["outdir"] = args.outdir arg["origcodesdir"] = args.origcodesdir arg["pca_n"] = int(args.pca[1]) arg["norm"] = args.norm input.append(arg) # Start computing the codes pool.map(compute_codes, input) # Done! logger.info("Codes computation done!") logger.info("Took %.2f seconds" % (time.time() - start_time)) sys.exit() # Scores feats, clique_ids, track_ids = utils.clean_feats(feats, clique_ids, track_ids) stats = score(feats, clique_ids, N=len(all_tracks)) # TODO: change file name utils.save_pickle(stats, "stats.pk") # done logger.info('Average rank per track: %.2f, clique: %.2f, MAP: %.2f%%' \ % (anst.average_rank_per_track(stats), anst.average_rank_per_clique(stats), anst.mean_average_precision(stats) * 100)) logger.info("Done! Took %.2f seconds" % (time.time() - start_time))