# -*- coding: utf-8 -*- """ Example LandKit Script Created on Thu Jan 21 15:48:07 2016 @author: jrkerlin """ import numpy as np from matplotlib.pyplot import * import landkit reload(landkit) lktable = landkit.LoadDict("test.csv") sc = landkit.SentCompare(lktable['SentWords'], lktable['OrigResponse']) sc.SpellCorrect() sc.ScoreWords() sc.GeneratePhonemes() sc.ScorePhonemes() sc.GeneratePhonemeTable() sc.SentenceAnalysis() #for snum,bools in enumerate(sc.hits_phonemes): # print sc.target_phonemes[snum] # print sc.source_phonemes[snum] # print bools # #sentenceACC = [] #for snum,bools in enumerate(sc.hits_phonemes): # print sc.target[snum] # print sc.source[snum] # booze = [int(boo) for boo in bools]
for subjectDir in folders: blockFolder = normjoin(dataDir, subjectDir) csvNames = [ normjoin(blockFolder, f) for f in os.listdir(blockFolder) if fnmatch.fnmatch(f, '*.csv') ] for section in csvNames: lks = pd.read_csv(section) lktable = lktable.append(lks) print "Now Loading Subject..." + " ".join([subjectDir]) lktable = lktable.reset_index() # Set all non-responses to a single black lktable['SourceSentence'] = lktable['SourceSentence'].fillna(" ") sc = landkit.SentCompare(list(lktable['TargetSentence']), list(lktable['SourceSentence']), True, normjoin('C:\Experiments\JK302')) #filehandler = open(dataDir + '\\sc.pickle', 'w') #pickle.dump(sc, filehandler) sc.phonTable.to_csv(dataDir + '\\phonTable.csv') # Make pandas table sentT = pd.DataFrame(lktable) #Join the ngram and IPHoD info and "correctness" at the word level wordT = pd.concat([sc.tngram, sc.tphod], axis=1, join_axes=[sc.tngram.index]) # #Table with phoneme Level info phonT = sc.phonTable bigPhonT = pd.concat([
#volSent = np.append(volSent,pd.DataFrame(phon,onset,offset,vol)) print(folder + ' ' + fname + ' Volume Tagging...') #volGrouped.append(volSent) tt['FileID'] = fCount tt['SpeechRMS'] = volume audioTable = pd.concat([audioTable, tt]) fCount += 1 audioTable.to_csv(os.path.normpath(r'C:\TCDTIMIT\Tables\audioTable_r1.csv')) #Match HTK forced alignments to landkit phonemes taggedCorpus = pd.DataFrame.from_csv(normjoin(outPath, 'posTags_r1.csv')) justSent = taggedCorpus.groupby(['Subject', 'File']).first().reset_index() sc = landkit.SentCompare(list(justSent['Sentence']), list(justSent['Sentence']), False, normjoin('C:\Experiments\JK302')) sc.GeneratePhonemes() audioTable = pd.DataFrame.from_csv( os.path.normpath(r'C:\TCDTIMIT\Tables\audioTable_r1.csv')) def source2target(sourceList, targetList, sourceVal, fillOpt='Neg1'): import difflib d = difflib.Differ() list(d.compare(sourceList, targetList)) instruct = [x[0] for x in list(d.compare(sourceList, targetList))] c = 0
print "Now Loading ..." + " ".join([folder, folder2]) lktable = lktable.reset_index() speechToText = [] for f in lktable['VerbalResponse']: try: transcript = landkit.ATTSR(f) except: pass transcript = " " speechToText.append(transcript) print speechToText[-1] lktable['speechToText'] = speechToText # Set all non-responses to a single black lktable['SourceSentence'] = lktable['SourceSentence'].fillna(" ") sc = landkit.SentCompare(list(lktable['TargetSentence']), list(lktable['SourceSentence']), True) filehandler = open(dataDir + '\\sc.pickle', 'w') pickle.dump(sc, filehandler) sc.phonTable.to_csv(dataDir + '\\phonTable.csv') # Make pandas table sentT = pd.DataFrame(lktable) #Join the ngram and IPHoD info and "correctness" at the word level wordT = pd.concat([sc.tngram, sc.tphod], axis=1, join_axes=[sc.tngram.index]) #wordT['HitIndex']= [y for x in sc.hits for y in x] #wordT['WordIndex']= [y for x in sc.hits for y,z in enumerate(x)] #
import numpy as np from matplotlib import * import sys #Add path to landkit sys.path.append(r'C:\Users\jrkerlin\Documents\GitHub\landk\Analysis') import landkit reload(landkit) ##Just the spell correction and word -level scoring #sc = landkit.SentCompare(['hello worlddsfg go','soul train with him'],['hellot mold','sol tarin'],False) #sc.SpellCorrect() #sc.ScoreWords() lktable = pd.read_csv(r'C:\TCDTIMIT\dataOut\sb\1456357335\sb1456357335.csv') sc = landkit.SentCompare(lktable['TargetSentence'], lktable['SourceSentence'], False) ## Make pandas table #sentT = pd.DataFrame(lktable) # ##Join the ngram and IPHoD info and "correctness" at the word level #wordT = pd.concat([sc.tngram, sc.tphod], axis=1, join_axes=[sc.tngram.index]) #wordT['HitIndex']= [y for x in sc.hits for y in x] #wordT['WordIndex']= [y for x in sc.hits for y,z in enumerate(x)] # ##Table with phoneme Level info #phonT = sc.phonTable #bigPhonT = pd.concat([landkit.IndexFill(sentT,phonT['SentenceCount'],phonT['PhonemeCount']),landkit.IndexFill(wordT,phonT['WordCount'],phonT['PhonemeCount']),phonT], axis=1, join_axes=[phonT.index]) #bigPhonT = pd.concat([landkit.IndexFill(wordT,phonT['WordCount'],phonT['PhonemeCount']),phonT], axis=1, join_axes=[phonT.index]) #bigPhonT.to_csv('C:\TCDTIMIT\DataMine\bigP.csv') #wordT.to_csv('C:\TCDTIMIT\DataMine\wordT.csv')
print(textIn) else: print('user cancelled') #Record final response if textIn: sourceSentence = textIn[0] else: sourceSentence = " " print sourceSentence print "target " + targetSentence table['SourceSentence'][trial] = sourceSentence table['TargetSentence'][trial] = targetSentence #Just the spell correction and word -level scoring sc = landkit.SentCompare([targetSentence], [sourceSentence], False) #sc.SpellCorrect() enchant crashes this script at testing computer for unknown reason!!! sc.ScoreWords() wscore = sc.wscore print wscore table['SpellCorrSource'][trial] = sc.source[0] table['SentenceWordScore'][trial] = wscore[0] #Adapt dbSNR on every trial if wscore > 50: dBSNR += -3 elif wscore == 50: dBSNR += 0
for subjectDir in folders: blockFolder = normjoin(dataDir, subjectDir) csvNames = [ normjoin(blockFolder, f) for f in os.listdir(blockFolder) if fnmatch.fnmatch(f, '*.csv') ] for section in csvNames: lks = pd.read_csv(section) lktable = lktable.append(lks) print "Now Loading Subject..." + " ".join([subjectDir]) lktable = lktable.reset_index() # Set all non-responses to a single black lktable['SourceSentence'] = lktable['SourceSentence'].fillna(" ") sc = landkit.SentCompare(list(lktable['FullSentence']), list(lktable['SourceSentence']), True, normjoin('C:\Experiments\JK310')) #filehandler = open(dataDir + '\\sc.pickle', 'w') #pickle.dump(sc, filehandler) sc.phonTable.to_csv(dataDir + '\\phonTable.csv') # Make pandas table sentT = pd.DataFrame(lktable) #Join the ngram and IPHoD info and "correctness" at the word level wordT = pd.concat([sc.tngram, sc.tphod], axis=1, join_axes=[sc.tngram.index]) # #Table with phoneme Level info phonT = sc.phonTable bigPhonT = pd.concat([