def updateViews(evalAlgos, dName): train, val = DATASET_DIC[dName].randomSplit(CLF_SPLIT_RATIO, seed=RANDOM_SEED) loader_views = { "_VAL": val, "_TRAIN": train, } for vName, vLoader in loader_views.items(): for dName, dLoader in DATASET_DIC.items(): if vLoader.__class__ is dLoader.__class__: name = dName + vName logger.info( "----------------------------------------------------------" ) logger.info(f"loader view, name={name}") viewSaver = Metrics_Saver(name) dSaver = Metrics_Saver(dName) dSaver.load(EVAL_RESULT_DIR) for aName in dSaver.algoNames: titles = [pp.title for pp in vLoader.pathPairs] metrics, _ = dSaver.getResult(aName, titles) viewSaver.addResult(aName, metrics, titles) if aName in evalAlgos: printResult(aName, metrics) dumpResult(viewSaver)
def writeMirexOutput(mirexFmt, output): intervals, labels = mirexFmt mirexOutput = np.array([(x[0], x[1], y) for x, y in zip(intervals, labels)], np.dtype("f, f, U16")) np.savetxt(output, mirexOutput, fmt=["%.2f", "%.2f", "%s"], delimiter="\t") logger.info(f"mirex format music structure written to {output}")
def convertFileName(basedir, norm="NFD"): files = os.listdir(basedir) for fileName in files: normalName = unicodedata.normalize(norm, fileName) src = os.path.join(basedir, fileName) dst = os.path.join(basedir, normalName) if src != dst: os.rename(src, dst) logger.info(f"rename, src='{src}' dst='{dst}'")
def build(self, preprocessor, force=False, num_workers=NUM_WORKERS): logger.info( f"building <{self.__class__.__name__}> from <{self.dataset.__class__.__name__}> with transform identifier=<{self.tid}>" ) self.preprocessor = preprocessor self.force_build = force with Pool(num_workers) as p: N = len(self.dataset) _ = list(tqdm(p.imap(self.storeFeature, range(N)), total=N))
def writeAveResults(self, dirname): aveOutputFile = os.path.join(dirname, f"{self.datasetName}.csv") columns = ["algo"] + METRIC_NAMES df = pd.DataFrame(columns=columns) for algoName, metrics in zip(self.algoNames, self.metricsList): data = np.hstack([algoName, np.mean(metrics, axis=0)]).reshape(1, -1) df = pd.concat([df, pd.DataFrame(data=data, columns=columns)]) df.to_csv(aveOutputFile) logger.info(f"results written to '{aveOutputFile}'")
def removeResult(self, algoName): try: # remove all match algoName result while True: idx = self.algoNames.index(algoName) self.algoNames.pop(idx) self.metricsList.pop(idx) self.titlesList.pop(idx) except ValueError: logger.info(f"all {algoName} result removed")
def dump(self, dirname): dumpFile = os.path.join(dirname, f"{self.datasetName}.pkl") with open(dumpFile, "wb") as f: pickle.dump( (self.datasetName, self.algoNames, self.metricsList, self.titlesList), f, pickle.HIGHEST_PROTOCOL, ) logger.info(f"saver object written to '{dumpFile}'") return self
def loadData(self, dataFile): if os.path.exists(dataFile): with open(dataFile, "rb") as f: X, y = pickle.load(f) logger.info( f"<{self.__class__.__name__}> load data from '{dataFile}'") logger.info( f"target(chorus)/total={sum(np.array(y)==CLF_TARGET_LABEL)}/{len(y)}" ) else: logger.error(f"build dataset for classifier first") raise FileNotFoundError(dataFile) return X, y
def load(self, dirname): dumpFile = os.path.join(dirname, f"{self.datasetName}.pkl") try: with open(dumpFile, "rb") as f: dname, self.algoNames, self.metricsList, self.titlesList = pickle.load( f) if dname != self.datasetName: logger.warn( f"old name:<{dname}> != new name:<{self.datasetName}>") logger.info(f"saver object loaded from '{dumpFile}'") except FileNotFoundError: logger.warn( f"saver object file '{dumpFile}' not found, set to empty") return self
def writeFullResults(self, dirname): fullOutputFile = os.path.join(dirname, f"{self.datasetName}_full.csv") cols = ["title", "algo"] + METRIC_NAMES df = pd.DataFrame(columns=cols) for algoName, metrics, titles in zip(self.algoNames, self.metricsList, self.titlesList): n = len(titles) head = np.array([titles, [algoName] * n]).T headDf = pd.DataFrame(data=head, columns=cols[:2]) metricDf = pd.DataFrame(data=metrics, columns=cols[2:]) algoDf = pd.concat([headDf, metricDf], axis=1) df = pd.concat([df, algoDf], ignore_index=True) df.to_csv(fullOutputFile) logger.info(f"results written to '{fullOutputFile}'")
def main(force, dataset, algorithm): if dataset is None: evalLoader = DATASET_DIC elif dataset == "auto": evalLoader = findLoader(USING_DATASET.__class__) else: evalLoader = {dataset: DATASET_DIC[dataset]} if algorithm is None: evalAlgos = algos else: evalAlgos = {algorithm: algos[algorithm]} for dName, loader in evalLoader.items(): logger.info( "-----------------------eval_algos---------------------------") logger.info(f"processing datasetloader, name={dName}") saver = Metrics_Saver(dName) # run incremental evaluation by default saver.load(EVAL_RESULT_DIR) for aName, algo in evalAlgos.items(): # avoid duplicate evaluation if (aName not in saver.algoNames) or force: if force and (aName in saver.algoNames): logger.info(f"re-eval algo, name={aName}") else: logger.info(f"algo, name={aName}") if hasattr(algo, "clf"): algo.clf.train() ae = AlgoEvaluator(loader, algo) metrics, titles = ae() printResult(aName, metrics) if force and (aName in saver.algoNames): saver.reWriteResult(aName, metrics, titles) else: saver.addResult(aName, metrics, titles) # save result every iter saver.dump(EVAL_RESULT_DIR) else: logger.info(f"!! skipping algo, name={aName}") dumpResult(saver) updateViews(evalAlgos, dName)
def writeJsonMetadata(audiofile, predicted, figure, output, gt=None): def annotation(mirexFmt): annotation = [] for intv, label in zip(*mirexFmt): annotation.append({ "begin": float("%.2f" % intv[0]), "end": float("%.2f" % intv[1]), "label": label, }) return annotation meta = { "audio": audiofile, "annotation": annotation(predicted), "gt_annotation": annotation(gt) if gt is not None else None, "figure": figure, } with open(output, "w") as f: json.dump(meta, f) logger.info(f"metadata written to {output}")
def buildCCDataset(cpath, baseset, getData, force=True): if not os.path.exists(cpath) or force: X = [] y = [] logger.info( f"building clique class Data for <{baseset.__class__.__name__}> @ {cpath}" ) with Pool(NUM_WORKERS) as p: N = len(baseset) results = list( tqdm( p.imap( starGetCliqueClassData, zip([getData] * N, [baseset] * N, range(N)), ), total=N, ) ) for features, clabels in results: X.extend([feature for feature in features]) y.extend([clabel for clabel in clabels]) with open(cpath, "wb") as f: pickle.dump((X, y), f)
def saveViolinPlot(self, dirname, plotMetric=PLOT_METRIC_FIELDS, order=None): matplotlib.use("Agg") pltOutputFile = os.path.join(dirname, f"{self.datasetName}.svg") rows, cols = len(plotMetric), len(plotMetric[0]) axisNames = np.array(plotMetric).flatten() metricsList = np.array(self.metricsList) if order is not None: algoNames = list(filter(lambda x: x in self.algoNames, order)) metricsList = metricsList[ [self.algoNames.index(aName) for aName in algoNames], :, :] else: algoNames = self.algoNames metricsFieldSelector = np.array( [METRIC_NAMES.index(name) for name in axisNames]) metricsList = metricsList[:, :, metricsFieldSelector] pos = np.arange(len(algoNames), dtype=int) + 1 _, axes = plt.subplots(nrows=rows, ncols=cols, figsize=(cols * 4 * len(algoNames) / 10, rows * 4)) for i, axis in enumerate(axes.flatten()): data = [metrics[:, i] for metrics in metricsList] axis.violinplot(data, pos, showmeans=True, showextrema=True) axis.set_title(axisNames[i]) plt.setp(axis.get_xticklabels(), rotation=45) # plt.suptitle(self.datasetName, fontsize=20) plt.setp(axes, xticks=pos, xticklabels=algoNames) plt.tight_layout() plt.subplots_adjust(top=0.9) plt.savefig(pltOutputFile, quality=100) logger.info(f"violin plot written to '{pltOutputFile}'")
def testCCDataset(method): logger.info(f"testCC method:{method}") cpath_train = CHORUS_CLASSIFIER_TRAIN_DATA_FILE[method] cpath_val = CHORUS_CLASSIFIER_VAL_DATA_FILE[method] _clf = ChorusClassifier(cpath_train) _clf.train() clf = _clf.clf Xt, yt = _clf.loadData(cpath_val) with np.printoptions(precision=3, suppress=True): if hasattr(clf, "feature_importances_"): logger.info( f'feature importance, {[f"{s}={x*len(_clf.feature_names):.3f}" for x, s in sorted(zip(clf.feature_importances_, _clf.feature_names))]}' ) logger.info(f"test classifier on valid data, score={clf.score(Xt, yt):.3f}")
def printResult(aName, metrics): logger.info(f"average result, algoName={aName}:") logger.info(f"metricNames={METRIC_NAMES}") logger.info(f"metric={np.mean(metrics, axis=0)}")
def main(audiofiles, outputdir, metaoutputdir, algo, force, workers): logger.debug(f"algo={algo}") logger.info(f"preprocess to generate features") ddataset = DummyDataset(audiofiles) transforms = [ ExtractMel(), GenerateSSM(dataset=ddataset), ExtractCliques(dataset=ddataset), ] for tf in transforms: preDataset = Preprocess_Dataset(tf.identifier, ddataset) preDataset.build(tf.preprocessor, force=force, num_workers=workers) predictor = switchPred(algo) predictorStruct = (predictor if algo not in ["mixed", "highlighter"] else AlgoSeqRecur(trainFile=USE_MODEL_DIC["seqRecur"])) for i, pair in enumerate(ddataset.pathPairs): audioFileName, audiofile, _ = pair audiofile = os.path.abspath(audiofile) output = os.path.join(outputdir, audioFileName + ".txt") metaOutput = os.path.join(metaoutputdir, audioFileName + "_meta.json") ssm_f, mels_f = getFeatures(ddataset, i) cliques = predictorStruct._process(ddataset, i, ssm_f) mirexFmt = chorusDetection(cliques, ssm_f[0], mels_f, predictorStruct.clf) if algo == "multi": mirexFmt = tuneIntervals(mirexFmt, mels_f, chorusDur=CHORUS_DURATION, window=TUNE_WINDOW) elif algo == "single": mirexFmt = maxOverlap(mirexFmt, chorusDur=CHORUS_DURATION_SINGLE, centering=False) mirexFmt = tuneIntervals( mirexFmt, mels_f, chorusDur=CHORUS_DURATION_SINGLE, window=TUNE_WINDOW, ) # plot mats tf = ExtractCliques(dataset=ddataset) origCliques = Preprocess_Dataset(tf.identifier, ddataset, transform=tf.transform)[i]["cliques"] olssm = getLabeledSSM(origCliques, ssm_f[1].shape[-1]) lssm = getLabeledSSM(cliques, ssm_f[1].shape[-1]) olssm = drawSegments(mirexFmt, mirexFmt, olssm, ssm_f[0]) mats = np.array([ssm_f[1], lssm, olssm]) titles = ["fused SSM", "result structure", "low level structure"] plotMats(mats, titles, show=False) # write output and viewer metadata if algo not in ["single", "multi"]: mirexFmt = predictor(ddataset, i) mirexFmt = removeNumber(mirexFmt) mirexFmt = mergeIntervals(mirexFmt) writeMirexOutput(mirexFmt, output) figurePath = os.path.join(os.getcwd(), f"data/test/predict_{audioFileName}.svg") plt.savefig(figurePath, bbox_inches="tight") writeJsonMetadata(audiofile, mergeIntervals(mirexFmt), figurePath, metaOutput) if DEBUG: plt.show()