Beispiel #1
0
def testModel(experiment, testRun, setTarg):
    # print("Testing model ...")
    dataset = DataReader(experiment.data["path"])
    dataset.setDatasetClassic("test", experiment.data["feature"],
                              experiment.data["annotation"])
    if setTarg == "MeanStd": dataset.setTargetMeanStd()
    inp, tar = dataset[0]
    experiment.inputDim = inp.shape[1]
    experiment.outputDim = tar.shape[1]

    firstID1 = list(dataset.dataPart.keys())[0]
    firstID2 = list(dataset.dataPart[firstID1]["annotations"])[0]
    headers = dataset.dataPart[firstID1]["annotations"][firstID2]["headers"]
    if setTarg == "MeanStd": headers = ["mean", "std"]
    # print(headers)
    wrapper = getWrapper(experiment, getBest=True)
    modelOutPath = os.path.join(wrapper.savePath, "ouputs")
    if testRun: dataset = keepOne(dataset)
    IDs = dataset.dataPart.keys()
    for key in experiment.evaluation.keys():
        metrics = {}
        for idx, ID in enumerate(IDs):
            savePath = os.path.join(modelOutPath, ID + ".csv")
            outputs = pd.read_csv(savePath).to_numpy()
            targets = dataset.targetReader(ID)
            # print(targets.shape, outputs.shape)
            if idx == 0:
                results = [[] for _ in range(targets.shape[1])]
                # bestresult = 0; bestID = "0"
                for dim in range(targets.shape[1]):
                    metrics[headers[dim]] = {}
            for dim in range(targets.shape[1]):
                output = outputs[:, dim]
                target = targets[:, dim]
                while target.shape[0] > output.shape[0]:
                    output = np.append(output, outputs[-1])
                while target.shape[0] < output.shape[0]:
                    output = outputs[:target.shape[0]].reshape(target.shape[0])
                result = getMetric(target, output, metric=key)
                # if result > bestresult: bestresult=result; bestID = ID
                # print(ID, result, len(output))
                results[dim].append(result)
            printProgressBar(idx + 1,
                             len(IDs),
                             prefix='Testing model with ' + key + ':',
                             suffix='',
                             length="fit")
        for dim in range(targets.shape[1]):
            metrics[headers[dim]]['mean'] = np.mean(np.array(results[dim]))
            metrics[headers[dim]]['std'] = np.std(np.array(results[dim]))
        experiment.evaluation[key] = metrics
    return experiment
def main(featsList, jsonPath):
    """
    Adding mfcc features to a given dataset and writting its reference to a json file


    Example
    ----------
    python Standardize.py -f MFCC opensmile_ComParE_2016 MFB -j "/mnt/HD-Storage/Databases/RECOLA_46_P_S/data.json"
    python Standardize.py -f MFB -j "/mnt/HD-Storage/Datasets/Recola_46/data.json"
    python Standardize.py -f MFB -j "/mnt/HD-Storage/Datasets/AlloSat/data.json"
    python Standardize.py -f MFB -j "/mnt/HD-Storage/Datasets/Recola_46_S/data.json"
    """
    for featName in featsList:
        samples = loadFromJson(jsonPath)
        trainFilePaths = []
        print("feature:", featName)
        for i, ID in enumerate(samples.keys()):
            sample = samples[ID]
            featRef = sample["features"][featName]
            if sample["partition"] == "train":
                path = featRef["path"]
                fullPath = os.path.join(os.path.split(jsonPath)[0], path)
                trainFilePaths.append(fullPath)
        mean, std = getMeanStd(trainFilePaths)

        for i, ID in enumerate(samples.keys()):
            sample = samples[ID]
            featRef = sample["features"][featName]
            newfeatName = featName + "_standardized"
            path = featRef["path"]
            fullPath = os.path.join(os.path.split(jsonPath)[0], path)
            fileOutPath = os.path.join(
                os.path.split(jsonPath)[0],
                path.replace(featName, newfeatName))
            standardize(fullPath, fileOutPath, mean, std)
            featsDict = getFeatsDict(newfeatName, featRef["genre"],
                                     featRef["dimension"],
                                     path.replace(featName, newfeatName))
            samples[ID]["features"][featsDict["ID"]] = featsDict
            printProgressBar(i + 1,
                             len(samples),
                             prefix='Standardizing ' + featName + ' features',
                             suffix='Complete',
                             length="fit")

        with open(jsonPath, 'w') as jsonFile:
            json.dump(samples, jsonFile, indent=4, ensure_ascii=False)
def main(inputPath, newPath):
    """
        Transform all audio files under a folder into wav PCM 16kHz 16bits signed-intege.

        example: 
            python Preprocess.py --input "../../Data/Wavs" --output "../../Data/WavsProcessed"
            python Preprocess.py --input "/mnt/HD-Storage/Databases/AlloSat_corpus/audio" --output "/mnt/HD-Storage/Datasets/AlloSat/Wavs"
    """
    path = os.path.join(inputPath, "**")#"../PMDOM2FR/**/"
    theFiles = get_files_in_path(path)

    for i, filePath in enumerate(theFiles):
        # Making wav files
        fileNewPath = filePath.replace(inputPath, newPath)
        makeDirFor(fileNewPath)
        os.system('sox ' + filePath + ' -r 16000 -c 1 -b 16 -e signed-integer ' + fileNewPath)
        printProgressBar(i + 1, len(theFiles), prefix = 'Transforming Files:', suffix = 'Complete')
Beispiel #4
0
def writeOutForRegression(experiment, testRun, seed=0):
	dataset = DataReader(experiment.data["path"])
	dataset.setDatasetFeatOnly("test", experiment.data["feature"])
	if testRun: dataset = keepOne(dataset)
	dataloader = DataLoader(dataset=dataset,   batch_size=1, shuffle=False)
	wrapper = getWrapper(experiment, seed=seed, getBest=True)
	modelOutPath = os.path.join(wrapper.savePath, "outputs")
	if not os.path.exists(modelOutPath): os.makedirs(modelOutPath)
	for idx, (ID, feat) in enumerate(dataloader):
		output = wrapper.forwardModel(feat)
		output = output.detach().cpu().numpy()
		# print(ID, feat.shape, output.shape)
		savePath = os.path.join(modelOutPath, ID[0]+".csv")
		headers = ["output_"+str(i) for i in range(output.shape[2])]
		df = pd.DataFrame(output[0], columns = headers)
		df.to_csv(savePath, index=False)
		printProgressBar(idx+1, len(dataloader), prefix = 'Writing outputs:', suffix = '', length = "fit")
Beispiel #5
0
def writeOutForClassification(experiment, testRun, seed=0):
	dataset = DataReader(experiment.data["path"])
	dataset.setDatasetFeatOnly("test", experiment.data["feature"])
	if testRun: dataset = keepOne(dataset)
	dataloader = DataLoader(dataset=dataset,   batch_size=1, shuffle=False)
	wrapper = getWrapper(experiment, seed=seed, getBest=True)
	modelOutPath = os.path.join(wrapper.savePath, "outputs")
	if not os.path.exists(modelOutPath): os.makedirs(modelOutPath)
	headers = []
	outputs = []
	for idx, (ID, feat) in enumerate(dataloader):
		printProgressBar(idx+1, len(dataloader), prefix = 'Writing outputs:', suffix = '', length = "fit")
		output = wrapper.forwardModel(feat)
		output = output.detach().cpu().numpy()
		# print(ID, feat.shape, output.shape)
		outputs = output if len(outputs)==0 else np.concatenate((outputs,output))
		headers.append(ID[0])
		# print(len(headers), outputs.shape)
	savePath = os.path.join(modelOutPath, "outputs.csv")
	df = pd.DataFrame(np.transpose(outputs), columns = headers)
	df.to_csv(savePath, index=False)
 def trainModel(self,
                datasetTrain,
                datasetDev,
                batchSize=1,
                maxEpoch=200,
                loadBefore=True,
                tolerance=15,
                minForTolerance=15):
     if loadBefore: self.loadCheckpoint()
     trainDataloader = DataLoader(dataset=datasetTrain,
                                  batch_size=batchSize,
                                  shuffle=True)
     devDataloader = DataLoader(dataset=datasetDev,
                                batch_size=batchSize,
                                shuffle=False)
     while self.currentEpoch <= maxEpoch:
         if self.noMoreTrain:
             if self.printLvl > 0:
                 print("Early stopping has been achieved!")
             break
         self.trainEpoch(trainDataloader)
         devLoss = self.evaluateModel(devDataloader)
         self.modelStates[self.currentEpoch] = copy.deepcopy(
             self.model.state_dict())
         self.epochDevLosses.append(devLoss)
         if self.printLvl > 1:
             printProgressBar(self.currentEpoch,
                              maxEpoch,
                              prefix='Training model:',
                              suffix='| epoch loss: ' + str(devLoss),
                              length="fit")
             # print("loss", self.currentEpoch, devLoss)
         self.currentEpoch += 1
         # --- Early Stopping ---
         if (self.currentEpoch - self.getBestEpochIdx() >=
                 tolerance) and self.currentEpoch > minForTolerance:
             self.noMoreTrain = True
         self.saveCheckpoint()
     self.saveLogToCSV()
     if self.printLvl > 0: print("Training the model has been finished!")
def getMeanStd(filePaths):
    # filePaths = glob.glob(os.path.join(csvInFolder, "**", fileNamesWith+"*.csv"), recursive=True)
    df = pd.read_csv(filePaths[0])
    dims = len([aux for aux in df.keys() if "feat_" in aux])
    myFeats = [[] for _ in range(dims)]
    # print("dim", dim)
    for f, filePath in enumerate(filePaths):
        df = pd.read_csv(filePath)
        for dim in range(dims):
            keyWord = "feat_" + str(dim)
            col = list(df[keyWord])
            myFeats[dim] += col
        printProgressBar(f + 1,
                         len(filePaths),
                         prefix='Calculating Mean and Std:',
                         suffix='Complete',
                         length="fit")

    allFeats = np.array(myFeats)
    mean = np.mean(allFeats, axis=1)
    std = np.std(allFeats, axis=1)
    return mean, std
Beispiel #8
0
def main():
    Datasets_Path = "/mnt/HD-Storage/Datasets"
    main_path = os.path.join(Datasets_Path, "AlloSat")
    wavs_path = os.path.join(main_path, "Wavs")
    jsonPath = os.path.join(main_path, "data.json")

    path = os.path.join(wavs_path, "**", "*.wav")
    filesPaths = glob.glob(path, recursive=True)

    allDics = {}

    trainList, devList, testList = getParts(os.path.join(main_path, "Parts"))

    for i, filePath in enumerate(filesPaths):
        baseName = os.path.basename(filePath)[:-4]
        fileDict = AudioSample()

        partition = ""
        if baseName in trainList: partition = "train"
        if baseName in devList: partition = "dev"
        if baseName in testList: partition = "test"

        fileDict.setParams(baseName, filePath, partition)
        # fileDict.features = getFeatures(main_path, baseName)
        fileDict.annotations = getAnnots(main_path, baseName)
        fileDict.speaker_info.language = "French"

        dic = classToDic(fileDict.__dict__)
        # dic = changePaths(dic, main_path, ".")
        dic = localizePaths(dic, main_path)
        allDics[baseName] = dic
        printProgressBar(i + 1,
                         len(filesPaths),
                         prefix='Processing Files:',
                         suffix='Complete')

    with open(jsonPath, 'w') as fp:
        json.dump(allDics, fp, indent=4, ensure_ascii=False)
def main(featsFolder, jsonPath):
    """
    Adding mel-frequency filterbank features to a given dataset and writting its reference to a json file


    Example
    ----------
    python MelFilterBank.py -f "MFB" -j "/mnt/HD-Storage/Datasets/Recola_46/data.json"
    python MelFilterBank.py -f "MFB" -j "/mnt/HD-Storage/Datasets/AlloSat/data.json"
    python MelFilterBank.py -f "MFB" -j "/mnt/HD-Storage/Datasets/Recola_46_S/data.json"
    
    """
    samples = loadFromJson(jsonPath)
    for i, ID in enumerate(samples.keys()):
        sample = samples[ID]
        wavePath = sample["path"]
        wavsFolder = wavePath.split(os.sep)[0]
        waveFullPath = os.path.join(os.path.split(jsonPath)[0], wavePath)
        featsLocalPath = wavePath.replace(wavsFolder,
                                          featsFolder).replace(".wav", ".csv")
        featsLocalPath = os.path.join("Feats", featsLocalPath)
        featsFullPath = os.path.join(
            os.path.split(jsonPath)[0], featsLocalPath)

        # print(featsLocalPath, featsFullPath)
        dim = makeFeatsCsv(waveFullPath, featsFullPath)
        if dim == 0: continue
        featsDict = getFeatsDict(dim, featsFolder, featsLocalPath)
        samples[ID]["features"][featsDict["ID"]] = featsDict
        # saveToJson(jsonPath, sample)
        printProgressBar(i + 1,
                         len(samples),
                         prefix='Adding mel-frequency filterbank features:',
                         suffix='Complete',
                         length="fit")
    with open(jsonPath, 'w') as jsonFile:
        json.dump(samples, jsonFile, indent=4, ensure_ascii=False)
Beispiel #10
0
def testClassification(experiment, testRun, setTarg, seed=0):
	dataset = DataReader(experiment.data["path"])
	dataset.setDatasetClassic("test", experiment.data["feature"], experiment.data["annotation"])
	inp, tar = dataset[0]
	experiment.inputDim = inp.shape[1]
	firstID1 = list(dataset.dataPart.keys())[0]
	firstID2 = list(dataset.dataPart[firstID1]["annotations"])[0]
	headers = dataset.dataPart[firstID1]["annotations"][firstID2]["headers"]
	# print(headers)
	wrapper = getWrapper(experiment, seed=seed, getBest=True)
	modelOutPath = os.path.join(wrapper.savePath, "outputs")
	savePath = os.path.join(modelOutPath, "outputs.csv")
	outputsCSV = pd.read_csv(savePath)
	if testRun: dataset = keepOne(dataset)
	IDs = dataset.dataPart.keys()
	AllOuts = []
	AllTars = []
	for idx, ID in enumerate(IDs):
		outputs = outputsCSV[ID].to_numpy()
		targets = dataset.targetReader(ID)
		AllOuts.append(np.argmax(outputs))
		AllTars.append(targets[0,0])
		# print(np.argmax(outputs), targets[0,0])
		printProgressBar(idx+1, len(IDs), prefix = 'Testing model :', suffix = '', length = "fit")
		# if idx > 50: break
	target = np.array(AllTars)
	output = np.array(AllOuts)
	metrics = {}
	for key in experiment.evaluation.keys():
		metrics[key] = getMetric(target, output, metric=key)
	experiment.evaluation = metrics
	confMat = confMatrix(target, output, numTars=experiment.outputDim)
	# print(confMatrix(target, output, numTars=experiment.outputDim))
	savePath = os.path.join(wrapper.savePath, "confMat.csv")
	np.savetxt(savePath, confMat, delimiter=",")
	return experiment
Beispiel #11
0
def main(featsFolder, jsonPath, modelPath, maxDur, normalised):
    """
    Adding wav2vec2 features to a given dataset and writting its reference to a json file


    Example
    ----------
    python wav2vec2.py -f "xlsr_53_56k_cut30" -d 29.99 -n True  -j "/mnt/HD-Storage/Datasets/Recola_46/data.json" -m "/mnt/HD-Storage/Models/xlsr_53_56k.pt"
    python wav2vec2.py -f "FlowBERT_2952h_base_cut30_noNorm" -d 29.99 -n False -j "/mnt/HD-Storage/Datasets/Recola_46/data.json" -m "/mnt/HD-Storage/Models/FlowBERT_2952h_base.pt"
    python wav2vec2.py -f "FlowBERT_2952h_base_cut30" -d 29.99 -n True -j "/mnt/HD-Storage/Datasets/Recola_46/data.json" -m "/mnt/HD-Storage/Models/FlowBERT_2952h_base.pt"
    python wav2vec2.py -f "FlowBERT_2952h_large_cut30" -d 29.99 -n True -j "/mnt/HD-Storage/Datasets/Recola_46/data.json" -m "/mnt/HD-Storage/Models/FlowBERT_2952h_large.pt"
    python wav2vec2.py -f "FlowBERT_2952h_large_noNorm_cut30" -d 29.99 -n False -j "/mnt/HD-Storage/Datasets/Recola_46/data.json" -m "/mnt/HD-Storage/Models/FlowBERT_2952h_large.pt"
    
    python wav2vec2.py -f "xlsr_53_56k_cut30" -d 29.98 -n True  -j "/mnt/HD-Storage/Datasets/AlloSat/data.json" -m "/mnt/HD-Storage/Models/xlsr_53_56k.pt"
    python wav2vec2.py -f "FlowBERT_2952h_base_cut30" -d 29.99 -n False -j /home/getalp/alisamis/Datasets/AlloSat/data.json -m /home/getalp/nguyen35/flowbert_ssl_resources/wav2vec2.0_models/2952h_base/checkpoint_best.pt
    
    python wav2vec2.py -f "FlowBERT_2952h_base_cut30_noNorm" -d 29.99 -n False -j "/mnt/HD-Storage/Datasets/Recola_46_S/data.json" -m "/mnt/HD-Storage/Models/FlowBERT_2952h_base.pt"

    not working: python wav2vec2.py -f "wav2vec2-large-xlsr-53-french" -d 29.99 -n True -j "/mnt/HD-Storage/Datasets/Recola_46/data.json" -m "/mnt/HD-Storage/Models/wav2vec2-large-xlsr-53-french.zip"

    python wav2vec2.py -f "mls_french_base_cut30" -d 29.98 -n False  -j "/home/getalp/alisamis/Datasets/AlloSat/data.json" -m "/home/getalp/nguyen35/flowbert_ssl_resources/wav2vec2.0_models/mls_french_base/checkpoint_best.pt"
    python wav2vec2.py -f "xlsr_53_56k_cut30" -d 29.98 -n True  -j "/home/getalp/alisamis/Datasets/AlloSat/data.json" -m "/home/getalp/alisamis/Models/xlsr_53_56k.pt"
    python wav2vec2.py -f "mls_french_base_cut30" -d 29.98 -n False  -j "/home/getalp/alisamis/Datasets/Recola_46/data.json" -m "/home/getalp/nguyen35/flowbert_ssl_resources/wav2vec2.0_models/mls_french_base/checkpoint_best.pt"
    python wav2vec2.py -f "mls_french_base_cut30" -d 29.98 -n False  -j "/mnt/HD-Storage/Datasets/Recola_46/data.json" -m "/mnt/HD-Storage/Models/mls_french_base/checkpoint_best.pt"
    
	python wav2vec2.py -f "libri960_big_cut30" -d 29.98 -n True  -j "/home/getalp/alisamis/Datasets/Recola_46/data.json" -m "/home/getalp/dinarelm/work/data/Exchance/wav2vec/models/libri960_big.pt"
	python wav2vec2.py -f "libri960_big_cut30" -d 29.98 -n True  -j "/home/getalp/alisamis/Datasets/AlloSat/data.json" -m "/home/getalp/dinarelm/work/data/Exchance/wav2vec/models/libri960_big.pt"
    """

    # cp = torch.load(modelPath, map_location=torch.device('cpu'))
    # model = Wav2VecModel.build_model(cp['args'], task=None)
    # model.load_state_dict(cp['model'])
    # model.eval()
    # cp = torch.load(modelPath, map_location=torch.device('cpu'))
    model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
        [modelPath])
    model = model[0]
    model.eval()

    samples = loadFromJson(jsonPath)
    for i, ID in enumerate(samples.keys()):
        sample = samples[ID]
        wavePath = sample["path"]
        wavsFolder = wavePath.split(os.sep)[0]
        waveFullPath = os.path.join(os.path.split(jsonPath)[0], wavePath)
        featsLocalPath = wavePath.replace(wavsFolder,
                                          featsFolder).replace(".wav", ".csv")
        featsLocalPath = os.path.join("Feats", featsLocalPath)
        featsFullPath = os.path.join(
            os.path.split(jsonPath)[0], featsLocalPath)

        # print(featsLocalPath, featsFullPath)
        dim = makeFeatsCsv(waveFullPath, featsFullPath, model, maxDur,
                           normalised)
        if dim == 0: continue
        featsDict = getFeatsDict(dim, featsFolder, featsLocalPath)
        samples[ID]["features"][featsDict["ID"]] = featsDict
        # saveToJson(jsonPath, sample)
        printProgressBar(i + 1,
                         len(samples),
                         prefix='Adding wav2vec features:',
                         suffix='Complete',
                         length="fit")
    with open(jsonPath, 'w') as jsonFile:
        json.dump(samples, jsonFile, indent=4, ensure_ascii=False)