def main(currentFile): featureStr = "" dialogUtterances = get_utterances_from_filename(currentFile) prevSpeaker = "" for dialogUtterance in dialogUtterances: dialogActTag = dialogUtterance.act_tag if dialogActTag is None: dialogActTag = "UNK" currentSpeaker = dialogUtterance.speaker if dialogUtterance.pos is not None: tokens = [word.token for word in dialogUtterance.pos] poss = [word.pos for word in dialogUtterance.pos] featureStr += dialogActTag+"\t" if prevSpeaker=="": featureStr += "FirstUtterance="+dialogUtterance.text+"\t" if prevSpeaker != "": if(prevSpeaker != currentSpeaker): featureStr += "SpeakerChanged=Yes\t" else: featureStr += "SpeakerChanged=No\t" if(dialogUtterance.pos is None or tokens is None): featureStr += "Text="+dialogUtterance.text+"\t" elif(len(tokens)==1 and len(poss)==1): if((tokens[0]=="." and poss[0]==".") or (tokens[0] is None)): featureStr += "Text="+dialogUtterance.text+"\t" else: for i in range(0,len(tokens)): if tokens[i]=="MUMBLEx": featureStr += "Text="+dialogUtterance.text+"\t" else: featureStr +="Unigram="+tokens[i]+"\t"+"Unigram POS="+poss[i]+"\t" for i in range(0,len(tokens)-1): if tokens[i]=="MUMBLEx": featureStr += "Text="+dialogUtterance.text+"\t" else: featureStr +="Bigram="+tokens[i]+"|"+tokens[i+1]+"\t"+"Bigram POS="+poss[i]+"|"+poss[i+1]+"\t" for i in range(0,len(tokens)-2): if tokens[i]=="MUMBLEx": featureStr += "Text="+dialogUtterance.text+"\t" else: featureStr +="Trigram="+tokens[i]+"|"+tokens[i+1]+"|"+tokens[i+2]+"\t"+"Trigram POS="+poss[i]+"|"+poss[i+1]+"|"+poss[i+2]+"\t" pattern = re.compile('[\W_]$') words = pattern.sub('', dialogUtterance.text) for word in words.split(): if word!="": featureStr += "Transcript="+word+"\t" featureStr = featureStr.strip() featureStr += "\n" prevSpeaker = currentSpeaker featureStr.replace(":","{colon}") featureStr.replace("\\","{backslash}") featureStr = featureStr.strip() featureStr += "\n\n" f = open(currentFile+".features","w+") f.write(featureStr)
def evaluate(self, test_dir): for file_name in os.listdir(test_dir): if file_name.endswith(".csv"): utterances_list = get_utterances_from_filename( os.path.join(test_dir, file_name)) try: tagged_labels = self.tagged_data[file_name] except KeyError as e: print(file_name) for i in range(len(utterances_list)): # print(file_name) if utterances_list[i].act_tag == tagged_labels[i]: self.correctly_classified_tags += 1 self.total_tags += 1 print(self.correctly_classified_tags) print(self.total_tags) print("Accuracy: " + str(self.correctly_classified_tags / self.total_tags))
def main(currentFile): featureStr = "" dialogUtterances = get_utterances_from_filename(currentFile) prevSpeaker = "" for dialogUtterance in dialogUtterances: dialogActTag = dialogUtterance.act_tag if dialogActTag is None: dialogActTag = "UNK" currentSpeaker = dialogUtterance.speaker if dialogUtterance.pos is not None: tokens = [word.token for word in dialogUtterance.pos] poss = [word.pos for word in dialogUtterance.pos] featureStr += dialogActTag+"\t" if prevSpeaker=="": featureStr += "FirstUtterance="+dialogUtterance.text+"\t" if prevSpeaker != "": if(prevSpeaker != currentSpeaker): featureStr += "SpeakerChanged=Yes\t" else: featureStr += "SpeakerChanged=No\t" if(dialogUtterance.pos is None or tokens is None): featureStr += "Text="+dialogUtterance.text+"\t" elif(len(tokens)==1 and len(poss)==1): if((tokens[0]=="." and poss[0]==".") or (tokens[0] is None)): featureStr += "Text="+dialogUtterance.text+"\t" else: for token in tokens: if token=="MUMBLEx": featureStr += "Text="+dialogUtterance.text+"\t" else: featureStr +="Token="+token+"\t" for pos in poss: featureStr += "POS="+pos+"\t" featureStr = featureStr.strip() featureStr += "\n" prevSpeaker = currentSpeaker featureStr.replace(":","{colon}") featureStr.replace("\\","{backslash}") featureStr = featureStr.strip() featureStr += "\n\n" f = open(currentFile+".features","w+") f.write(featureStr)
def main(currentFile): featureStr = "" dialogUtterances = get_utterances_from_filename(currentFile) prevSpeaker = "" for dialogUtterance in dialogUtterances: dialogActTag = dialogUtterance.act_tag if dialogActTag is None: dialogActTag = "UNK" currentSpeaker = dialogUtterance.speaker if dialogUtterance.pos is not None: tokens = [word.token for word in dialogUtterance.pos] poss = [word.pos for word in dialogUtterance.pos] featureStr += dialogActTag + "\t" if prevSpeaker == "": featureStr += "FirstUtterance=" + dialogUtterance.text + "\t" if prevSpeaker != "": if (prevSpeaker != currentSpeaker): featureStr += "SpeakerChanged=Yes\t" else: featureStr += "SpeakerChanged=No\t" if (dialogUtterance.pos is None or tokens is None): featureStr += "Text=" + dialogUtterance.text + "\t" elif (len(tokens) == 1 and len(poss) == 1): if ((tokens[0] == "." and poss[0] == ".") or (tokens[0] is None)): featureStr += "Text=" + dialogUtterance.text + "\t" else: for token in tokens: if token == "MUMBLEx": featureStr += "Text=" + dialogUtterance.text + "\t" else: featureStr += "Token=" + token + "\t" for pos in poss: featureStr += "POS=" + pos + "\t" featureStr = featureStr.strip() featureStr += "\n" prevSpeaker = currentSpeaker featureStr.replace(":", "{colon}") featureStr.replace("\\", "{backslash}") featureStr = featureStr.strip() featureStr += "\n\n" f = open(currentFile + ".features", "w+") f.write(featureStr)
outputFile = sys.argv[2] filePath = "" predictTags = [] predictList = [] with open(outputFile, 'r') as inputStream: for line in inputStream: if line.strip()[-5:] == ".csv\"": fileName = line.strip().replace("\"", "").replace("Filename=", "") filePath = os.path.join(testFolder, fileName) predictTags = [] elif len(line.strip()) > 0: predictTags += [line.strip()] else: predictList += [(filePath, predictTags)] numPredict = 0 numCorrect = 0 for filePath, predictTags in predictList: dialogUtterances = taTool.get_utterances_from_filename(filePath) actualTags = [d.act_tag for d in dialogUtterances] for predictTag, actualTag in zip(predictTags, actualTags): if (predictTag == actualTag): numCorrect += 1 numPredict += 1 accuracy = ((numCorrect + 0.0) / numPredict) print("accuracy:{}".format(accuracy))
def main(currentFile): featureStr = "" dialogUtterances = get_utterances_from_filename(currentFile) prevSpeaker = "" for dialogUtterance in dialogUtterances: dialogActTag = dialogUtterance.act_tag if dialogActTag is None: dialogActTag = "UNK" currentSpeaker = dialogUtterance.speaker if dialogUtterance.pos is not None: tokens = [word.token for word in dialogUtterance.pos] poss = [word.pos for word in dialogUtterance.pos] featureStr += dialogActTag + "\t" if prevSpeaker == "": featureStr += "FirstUtterance=" + dialogUtterance.text + "\t" if prevSpeaker != "": if (prevSpeaker != currentSpeaker): featureStr += "SpeakerChanged=Yes\t" else: featureStr += "SpeakerChanged=No\t" if (dialogUtterance.pos is None or tokens is None): featureStr += "Text=" + dialogUtterance.text + "\t" elif (len(tokens) == 1 and len(poss) == 1): if ((tokens[0] == "." and poss[0] == ".") or (tokens[0] is None)): featureStr += "Text=" + dialogUtterance.text + "\t" else: for i in range(0, len(tokens)): if tokens[i] == "MUMBLEx": featureStr += "Text=" + dialogUtterance.text + "\t" else: featureStr += "Unigram=" + tokens[ i] + "\t" + "Unigram POS=" + poss[i] + "\t" for i in range(0, len(tokens) - 1): if tokens[i] == "MUMBLEx": featureStr += "Text=" + dialogUtterance.text + "\t" else: featureStr += "Bigram=" + tokens[i] + "|" + tokens[ i + 1] + "\t" + "Bigram POS=" + poss[i] + "|" + poss[ i + 1] + "\t" for i in range(0, len(tokens) - 2): if tokens[i] == "MUMBLEx": featureStr += "Text=" + dialogUtterance.text + "\t" else: featureStr += "Trigram=" + tokens[i] + "|" + tokens[ i + 1] + "|" + tokens[ i + 2] + "\t" + "Trigram POS=" + poss[i] + "|" + poss[ i + 1] + "|" + poss[i + 2] + "\t" pattern = re.compile('[\W_]$') words = pattern.sub('', dialogUtterance.text) for word in words.split(): if word != "": featureStr += "Transcript=" + word + "\t" featureStr = featureStr.strip() featureStr += "\n" prevSpeaker = currentSpeaker featureStr.replace(":", "{colon}") featureStr.replace("\\", "{backslash}") featureStr = featureStr.strip() featureStr += "\n\n" f = open(currentFile + ".features", "w+") f.write(featureStr)
def read_all_files(self, input_dir): for file_name in os.listdir(input_dir): if file_name.endswith(".csv"): utterances_list = get_utterances_from_filename( os.path.join(input_dir, file_name))
print ('D1',end='\t') #feature if speaker same or not elif prev[1]!=utter[1]: print ('S_change',end='\t') #feature for the token if utter[2] == None or utter[2] == './.': T = utter[3].split() for t in T: t = t.replace('\\','/') print('TOKEN_',t.replace(':','-'),sep='',end='\t') else: for each in utter[2]: temp1 = each[0].replace('\\','/') print('TOKEN_',temp1.replace(':','-'),sep='',end='\t') #feature for pos tag for each in utter[2]: temp2 = each[1].replace('\\','/') print('POS_',temp2.replace(':','-'),sep='',end='\t') print(end='\n') dialog = hw3_corpus_tool.get_utterances_from_filename(sys.argv[1]) d_prev = "" for d in dialog: generate_feature(d,d_prev) d_prev = d print(end='\n') #print(d)