def __new__(self, annotationFile="../annotate/emo20q.txt"): # read in tournament, do some testing, get some stats tournament = HumanHumanTournament(annotationFile) #count turns in a dict, for pruning qcounts = defaultdict(int) for m in tournament.matches(): for t in m.turns(): qcounts[t.qgloss] += 1 #create graph G = nx.MultiDiGraph() G.add_node("Emotion") #add emo20q data to graph for m in tournament.matches(): H = nx.MultiDiGraph() parent = "Emotion" prevAns = "yes" H.add_node(parent) for t in m.turns(): if (qcounts[t.qgloss] < 2): continue #deal with guesses: guess = re.search(r'^e==(\w+)$', t.qgloss) if (guess): H.add_edge(parent, guess.group(1), ans=prevAns) #G.add_nodes_from(H) continue #deal with questions if (t.qgloss.find("non-yes-no") == 0): continue if (t.qgloss.find("giveup") == 0): continue ans = "other" if t.agloss.find("yes") == 0: ans = "yes" if t.agloss.find("no") == 0: ans = "no" #if ans == "other": continue H.add_edge(parent, t.qgloss, ans=prevAns) parent = t.qgloss prevAns = ans else: #python has for... else! # if(parent == "Emotion"): # add intermediate node # H.add_edge(parent,"LowFrequencyGuesses") # parent = "LowFrequencyGuesses" emotionSynonyms = re.search(r'(\w+)(?:/(\w+))*$', m.emotion()) for e in emotionSynonyms.groups(): if e is not None: H.add_edge(parent, e, ans=prevAns) # plt.figure(figsize=(18,18)) # pos=nx.graphviz_layout(H,prog='twopi',root='Emotion',args='',) # nx.draw(H,pos,node_size=10,alpha=0.5,node_color="blue", with_labels=True) # edge_labels=dict([((u,v,),d['ans']) # for u,v,d in H.edges(data=True)]) # nx.draw_networkx_edge_labels(G,pos,edge_labels=edge_labels) # plt.show() G.add_edges_from(H.edges(data=True)) return G
def __new__(self, annotationFile="../annotate/emo20q.txt"): # read in tournament, do some testing, get some stats tournament = HumanHumanTournament(annotationFile) #count turns in a dict, for pruning qcounts = defaultdict(int) for m in tournament.matches(): for t in m.turns(): qcounts[t.qgloss]+=1 #create graph G = nx.MultiDiGraph() G.add_node("Emotion") #add emo20q data to graph for m in tournament.matches(): H = nx.MultiDiGraph() parent = "Emotion" prevAns = "yes" H.add_node(parent) for t in m.turns(): if(qcounts[t.qgloss]<2): continue #deal with guesses: guess = re.search(r'^e==(\w+)$',t.qgloss ) if(guess): H.add_edge(parent,guess.group(1),ans=prevAns) #G.add_nodes_from(H) continue #deal with questions if (t.qgloss.find("non-yes-no")==0): continue if (t.qgloss.find("giveup")==0): continue ans = "other" if t.agloss.find("yes") == 0 : ans = "yes" if t.agloss.find("no") == 0 : ans = "no" #if ans == "other": continue H.add_edge(parent,t.qgloss,ans=prevAns) parent = t.qgloss prevAns = ans else: #python has for... else! # if(parent == "Emotion"): # add intermediate node # H.add_edge(parent,"LowFrequencyGuesses") # parent = "LowFrequencyGuesses" emotionSynonyms = re.search(r'(\w+)(?:/(\w+))*$',m.emotion() ) for e in emotionSynonyms.groups(): if e is not None: H.add_edge(parent,e,ans=prevAns) # plt.figure(figsize=(18,18)) # pos=nx.graphviz_layout(H,prog='twopi',root='Emotion',args='',) # nx.draw(H,pos,node_size=10,alpha=0.5,node_color="blue", with_labels=True) # edge_labels=dict([((u,v,),d['ans']) # for u,v,d in H.edges(data=True)]) # nx.draw_networkx_edge_labels(G,pos,edge_labels=edge_labels) # plt.show() G.add_edges_from(H.edges(data=True)) return G
def __init__(self): # read in tournament, do some testing, get some stats tournament = HumanHumanTournament() self._dictionary = defaultdict(list) for m in tournament.matches(): for t in m.turns(): self._dictionary[t.questionId()].append(t.q)
def __new__(self, annotationFile="../annotate/emo20q.txt"): # read in tournament, do some testing, get some stats tournament = HumanHumanTournament(annotationFile) #count turns in a dict, for pruning qcounts = defaultdict(int) for m in tournament.matches(): for t in m.turns(): qcounts[t.qgloss] += 1 #create graph G = nx.DiGraph() G.add_node("Emotion") #add emo20q data to graph for m_idx, m in enumerate(tournament.matches()): H = nx.DiGraph() parent = "Emotion" edge = "yes" H.add_node(parent) for t in m.turns(): if (qcounts[t.qgloss] > 1): #deal with guesses: guess = re.search(r'^e==(\w+)$', t.qgloss) if (guess): H.add_edge(parent, guess.group(1)) #G.add_nodes_from(H) continue #deal with questions if (t.qgloss.find("non-yes-no") == 0): continue if (t.qgloss.find("giveup") == 0): continue ans = "other" if t.agloss.find("yes") == 0: ans = "yes" if t.agloss.find("no") == 0: ans = "no" #if ans == "other": continue newNode = (t.qgloss, ans) H.add_edge(parent, newNode) parent = newNode else: #python has for... else! # if(parent == "Emotion"): # add intermediate node # H.add_edge(parent,"LowFrequencyGuesses") # parent = "LowFrequencyGuesses" emotionSynonyms = re.search(r'(\w+)(?:/(\w+))*$', m.emotion()) for e in emotionSynonyms.groups(): if e is not None: H.add_edge(parent, e) G.add_edges_from(H.edges()) return G
#!/usr/bin/python import sys sys.path.append('/home/abe/emo20qgoogle/python') #update this for your own installation from emo20q.data.base import HumanHumanTournament, HumanComputerTournament, Match, Turn import time import couchdb db = couchdb.client.Database(url='http://ark.usc.edu:5984/emo20q') import json from uuid import uuid4 #doc_id = uuid4().hex hh = HumanHumanTournament(annotationFile="/home/abe/emo20qgoogle/annotate/emo20q.txt") hc = HumanComputerTournament(annotationFile="/home/abe/emo20qgoogle/lists/onlineResults_2011-10-28.txt") # note: currently human human dialogs have more data than human computer # in the data sources that are currently being used # the biggest difference is that human-computer dialogs don't have the surface # question. This needs to be addressed asap! def match2JsonEncoder(x): out = {} if isinstance(x,Match): out['type'] = 'Dialog' if x.type: out['param'] = {'type':x.type} if x.provenance: out['param'] = {'provenance':x.provenance} out['container'] = [{'type':'Match',
def __init__(self): # read in tournament, do some testing, get some stats #tournament = HumanHumanTournament() #tournament = (HumanHumanTournament()+HumanComputerTournament()+HumanComputerCouchDbTournament()) # workaround: use json dump of couch instead of db connection tournament = (HumanHumanTournament() + HumanComputerTournament() + HumanComputerCouchJsonTournament()) #count turns in a dict, for pruning qcounts = defaultdict(int) for m in tournament.matches(): for t in m.turns(): qcounts[t.questionId()] += 1 feature_count_threshold = 2 # get list of emotions(entities/labels) and a list of # questions(properties/features) self._labels = set() self._features = set() #get FreqDist of emotions(entities/labels) self._label_freqdist = FreqDist() #get FreqDist of questions(properties/features) given emotions self._feature_freqdist = defaultdict(FreqDist) self._feature_values = defaultdict(set) for m in tournament.matches(): #print m.emotion() emotions = m.emotion().split( "/") #deal with synonyms (sep'd w/ '/' ) for e in emotions: self._labels.add(e) self._label_freqdist[e] += 1 for t in m.turns(): qid = t.questionId() if (qcounts[qid] >= feature_count_threshold): #deal with b.s. questions if (qid.find("non-yes-no") == 0): continue if (qid.find("giveup") == 0): continue self._features.add(qid) #convert answer to yes/no/other ans = t.answerId() #if ans == "other": continue #deal with guesses: #guess = re.search(r'^e==(\w+)$',t.qgloss ) #if(guess): self._feature_freqdist[e, qid][ans] += 1 self._feature_values[qid].add(ans) # assign "None" to properties of entities when property is unseen for e in self._labels: num_samples = self._label_freqdist[e] for fname in self._features: count = self._feature_freqdist[e, fname].N() if count == 0: self._feature_freqdist[e, fname][None] += 1 self._feature_values[fname].add(None) #these next 3 lines are questionable self._feature_values[fname].add("yes") self._feature_values[fname].add("no") self._feature_values[fname].add("other") # Create the P(label) distribution self._label_probdist = ELEProbDist(self._label_freqdist) # Create the P(fval|label, fname) distribution self._feature_probdist = {} for ((label, fname), freqdist) in self._feature_freqdist.items(): probdist = ELEProbDist(freqdist, bins=len(self._feature_values[fname])) self._feature_probdist[label, fname] = probdist
def __new__(self, annotationFile="../annotate/emo20q.txt"): # read in tournament, do some testing, get some stats tournament = HumanHumanTournament( annotationFile) + HumanComputerTournament() #count turns in a dict, for pruning qcounts = defaultdict(int) for m in tournament.matches(): for t in m.turns(): qcounts[t.qgloss] += 1 #create graph G = nx.DiGraph() G.add_node(("Emotion", )) #add emo20q data to graph for m_idx, m in enumerate(tournament.matches()): H = nx.DiGraph() parent = ("Emotion", ) edge = "yes" H.add_node(parent) for t in m.turns(): if (qcounts[t.qgloss] > 0): #deal with guesses: guess = re.search(r'^e==(\w+)(\|\|.*)*$', t.qgloss) if (guess): #if re.search(r'close', t.a ): #connect a close guess # H.add_edge(parent,guess.group(1), weight=20) continue #deal with questions if (t.qgloss.find("non-yes-no") == 0): continue if (t.qgloss.find("clarification") == 0): continue if (t.qgloss.find("giveup") == 0): continue ans = "other" # #use nlp module here! # if t.a.find("yes") == 0 : # ans = "yes" # weight = -5 # if t.a.find("no") == 0 : # ans = "no" # weight = 2 # if ans == "other": # weight = 5 if nlp.classifyYN(t.a) == 1: ans = "yes" weight = -1 if nlp.classifyYN(t.a) == -1: ans = "no" weight = 0 if ans == "other": weight = 1 newNode = parent, (t.qgloss, ans) H.add_edge(parent, newNode, weight=weight) parent = newNode if t.qgloss == "e.valence==positive" and m.emotion( ) == "happiness" and ans == "no": print "wtf!" print t.qgloss, t.a, m.emotion() for t in m.turns(): print t.qgloss, t.a else: #python has for... else! #if(parent == "Emotion"): # add intermediate node # H.add_edge(parent,"LowFrequencyGuesses") # parent = "LowFrequencyGuesses" emotionSynonyms = re.search(r'(\w+)(?:/(\w+))*$', m.emotion()) for e in emotionSynonyms.groups(): if e is not None: H.add_edge(parent, e, weight=20) G.add_edges_from(H.edges(data=True)) return G
#!/usr/bin/python from emo20q.data.base import HumanHumanTournament import networkx as nx from networkx import graphviz_layout import matplotlib.pyplot as plt from collections import defaultdict import re # read in tournament, do some testing, get some stats tournament = HumanHumanTournament("../annotate/emo20q.txt") #count turns in a dict, for pruning qcounts = defaultdict(int) for m in tournament.matches(): for t in m.turns(): qcounts[t.qgloss] += 1 #create graph G = nx.DiGraph() G.add_node(("Emotion", )) #add emo20q data to graph for m_idx, m in enumerate(tournament.matches()): H = nx.DiGraph() parent = "Emotion", #parent = () H.add_node(parent) for t in m.turns(): if (qcounts[t.qgloss] > 10): #deal with guesses:
#!/usr/bin/python from emo20q.data.base import HumanHumanTournament import networkx as nx from networkx import graphviz_layout import matplotlib.pyplot as plt from collections import defaultdict import re # read in tournament, do some testing, get some stats tournament = HumanHumanTournament("../annotate/emo20q.txt") #count turns in a dict, for pruning qcounts = defaultdict(int) for m in tournament.matches(): for t in m.turns(): qcounts[t.qgloss]+=1 #create graph G = nx.DiGraph() G.add_node(("Emotion",)) #add emo20q data to graph for m_idx,m in enumerate(tournament.matches()): H = nx.DiGraph() parent = "Emotion", #parent = () H.add_node(parent) for t in m.turns():
#!/usr/bin/python import sys sys.path.append( '/home/abe/emo20qgoogle/python') #update this for your own installation from emo20q.data.base import HumanHumanTournament, HumanComputerTournament, Match, Turn import time import couchdb db = couchdb.client.Database(url='http://ark.usc.edu:5984/emo20q') import json from uuid import uuid4 #doc_id = uuid4().hex hh = HumanHumanTournament( annotationFile="/home/abe/emo20qgoogle/annotate/emo20q.txt") hc = HumanComputerTournament( annotationFile="/home/abe/emo20qgoogle/lists/onlineResults_2011-10-28.txt") # note: currently human human dialogs have more data than human computer # in the data sources that are currently being used # the biggest difference is that human-computer dialogs don't have the surface # question. This needs to be addressed asap! def match2JsonEncoder(x): out = {} if isinstance(x, Match): out['type'] = 'Dialog' if x.type: out['param'] = {'type': x.type}
#!/usr/bin/python import sys sys.path.append( '/home/abe/emo20qgoogle/python') #update this for your own installation from emo20q.data.base import HumanHumanTournament, HumanComputerTournament, Match, Turn import time import couchdb db = couchdb.client.Database(url='http://ark.usc.edu:5984/emo20q') import json from uuid import uuid4 #doc_id = uuid4().hex hh = HumanHumanTournament(annotationFile="../../annotate/emo20q.txt") hc = HumanComputerTournament( annotationFile="../../lists/onlineResults_2011-10-28.txt") # note: currently human human dialogs have more data than human computer # in the data sources that are currently being used # the biggest difference is that human-computer dialogs don't have the surface # question. This needs to be addressed asap! def match2JsonEncoder(x): out = {} if isinstance(x, Match): if x.type: out['type'] = x.type if x.provenance: out['provenance'] = x.provenance out['events'] = [{'type': 'Match', 'turns': x.turns()}] #out['turns'] = [] return out