normjoin('C:\Experiments\JK302')) #filehandler = open(dataDir + '\\sc.pickle', 'w') #pickle.dump(sc, filehandler) sc.phonTable.to_csv(dataDir + '\\phonTable.csv') # Make pandas table sentT = pd.DataFrame(lktable) #Join the ngram and IPHoD info and "correctness" at the word level wordT = pd.concat([sc.tngram, sc.tphod], axis=1, join_axes=[sc.tngram.index]) # #Table with phoneme Level info phonT = sc.phonTable bigPhonT = pd.concat([ landkit.IndexFill(sentT, phonT['SentenceCount'], phonT['PhonemeCount']), landkit.IndexFill(wordT, phonT['WordCount'], phonT['PhonemeCount']), phonT ], axis=1, join_axes=[phonT.index]) audioTableTM = pd.DataFrame.from_csv(normjoin(tablePath, 'audioTableTM.csv')) audioTableTM['PhonemeIndex'] = [int(x) for x in audioTableTM['PhonemeIndex']] #atm = audioTableTM.groupby(['Talker','SentenceID']).first().reset_index() bigPhonT = pd.merge(bigPhonT, audioTableTM, how='left', on=['Talker', 'SentenceID', 'PhonemeIndex']) del bigPhonT['TargetPhoneme_y'] bigPhonT = bigPhonT.rename(columns={'TargetPhoneme_x': 'TargetPhoneme'})
lktable['SourceSentence'] = lktable['SourceSentence'].fillna(" ") sc = landkit.SentCompare(list(lktable['FullSentence']),list(lktable['SourceSentence']),True,normjoin('C:\Experiments\JK310')) #filehandler = open(dataDir + '\\sc.pickle', 'w') #pickle.dump(sc, filehandler) sc.phonTable.to_csv(dataDir+'\\phonTable.csv') # Make pandas table sentT = pd.DataFrame(lktable) #Join the ngram and IPHoD info and "correctness" at the word level wordT = pd.concat([sc.tngram, sc.tphod], axis=1, join_axes=[sc.tngram.index]) # #Table with phoneme Level info phonT = sc.phonTable bigPhonT = pd.concat([landkit.IndexFill(sentT,phonT['SentenceCount'],phonT['PhonemeCount']),landkit.IndexFill(wordT,phonT['WordCount'],phonT['PhonemeCount']),phonT], axis=1, join_axes=[phonT.index]) audioTableTM = pd.DataFrame.from_csv(normjoin(tablePath,'audioTableTM.csv')) audioTableTM['PhonemeIndex']= [int(x) for x in audioTableTM['PhonemeIndex']] #atm = audioTableTM.groupby(['Talker','SentenceID']).first().reset_index() bigPhonT = pd.merge(bigPhonT,audioTableTM, how='left', on=['Talker','SentenceID','PhonemeIndex']) del bigPhonT['TargetPhoneme_y'] bigPhonT=bigPhonT.rename(columns = {'TargetPhoneme_x':'TargetPhoneme'}) #subjectTable = pd.DataFrame.from_csv(normjoin(dataDir,'SubjectInfoJK302.csv')).reset_index() #bigPhonT = pd.merge(bigPhonT,subjectTable,how='left',on=['Subject']) POSTable = pd.DataFrame.from_csv(normjoin(tablePath,'posTags_r1.csv')).reset_index() POSTable=POSTable.rename(columns = {'Subject':'Talker'}) POSTable=POSTable.rename(columns = {'File':'SentenceID'}) POSTable['Talker'] = POSTable['Talker'].apply(lambda x: 's' +x)