Exemple #1
0
def lemmacategoryvector(lemma):
    category_vector = []
    for category in DV.ALL_CATEGORIES:
        if lemma in category: category_vector.append(truth(True))
        else: category_vector.append(truth(False))

    return category_vector
Exemple #2
0
def lemmavector(lemma):
    vector = []
    for lemma_type in DV.VPE_TRIGGERS_IN_WSJ:
        if lemma == lemma_type: vector.append(truth(True))
        else: vector.append(truth(False))

    return vector
def antecedent_description(trig_sentdict, ant_sentdict, ant, POS_TAGS):
    vector = []

    ant_words = ant.get_words()
    subtree = ant.get_subtree()

    # Feature 1.
    vector.append(truth(len(subtree.leaves()) == len(ant_words)))

    # Feature 2.
    vector.append(len(ant_words))

    # Feature 3.
    vector.append(truth(NT.dominates(subtree.root(), subtree, ant.get_trigger().get_subtree())))

    # Features 4.
    pos_tags_dict = {}
    for tag in POS_TAGS:
        pos_tags_dict[tag] = 0

    idx = get_antecedent_head_index(ant.get_context(), ant)
    for tag in ant.get_context()['pos'][idx:len(ant_words)]:
        pos_tags_dict[tag] += 1

    vector += [pos_tags_dict[tag] for tag in pos_tags_dict]

    # Feature 5: if the antecedent starts with an auxiliary, verb, adj.
    vector.append(truth(DV.isauxiliary(ant_sentdict, idx)))
    vector.append(truth(DV.isverb(ant_sentdict['pos'][idx])))
    vector.append(truth(DV.isadj(ant_sentdict['pos'][idx])))

    return vector
def antecedent_description(trig_sentdict, ant_sentdict, ant, POS_TAGS):
    vector = []

    ant_words = ant.get_words()
    subtree = ant.get_subtree()

    # Feature 1.
    vector.append(truth(len(subtree.leaves()) == len(ant_words)))

    # Feature 2.
    vector.append(len(ant_words))

    # Feature 3.
    vector.append(
        truth(
            NT.dominates(subtree.root(), subtree,
                         ant.get_trigger().get_subtree())))

    # Features 4.
    pos_tags_dict = {}
    for tag in POS_TAGS:
        pos_tags_dict[tag] = 0

    idx = get_antecedent_head_index(ant.get_context(), ant)
    for tag in ant.get_context()['pos'][idx:len(ant_words)]:
        pos_tags_dict[tag] += 1

    vector += [pos_tags_dict[tag] for tag in pos_tags_dict]

    # Feature 5: if the antecedent starts with an auxiliary, verb, adj.
    vector.append(truth(DV.isauxiliary(ant_sentdict, idx)))
    vector.append(truth(DV.isverb(ant_sentdict['pos'][idx])))
    vector.append(truth(DV.isadj(ant_sentdict['pos'][idx])))

    return vector
Exemple #5
0
def auxwordvector(word, all_auxs):
    vector = []
    for aux in all_auxs:
        if word == aux: vector.append(truth(True))
        else: vector.append(truth(False))

    return vector
Exemple #6
0
    def addauxs(self, mrgmatrix, gsdict, gs_sent_list, make_file=False):
        crt_sentnum,crt_auxidx = 0,-1
        found_aux = False
        sent_has_vpe = False

        #print gsdict
        #print gs_sent_list

        while crt_sentnum < len(mrgmatrix):

            try:
                old_sentnum = crt_sentnum

                # Reassign the values for the next auxiliary, recursively.
                crt_sentnum,crt_auxidx = nextaux(mrgmatrix, crt_sentnum, crt_auxidx+1)

                if make_file: self.auxiliary_names.append(mrgmatrix[crt_sentnum]['words'][crt_auxidx].lower())

                # This is to check if we missed a GS aux by accident.
                if old_sentnum+1 == crt_sentnum:
                    if sent_has_vpe and not found_aux:

                        auxs = getauxs(mrgmatrix[old_sentnum])
                        crt_auxnum_out_of_total = len(self.aux_bools) - len(auxs)

                        for idx,aux in auxs:

                            if auxindict(aux, idx, gsdict):
                                self.aux_bools[crt_auxnum_out_of_total] = truth(True)
                                found_aux = True
                                print 'RULE 1 Added sentence.\n'

                            crt_auxnum_out_of_total += 1

                        if not found_aux:
                            print 'We missed the sentence below.'#sentence: %d'%old_sentnum
                            printsent(mrgmatrix, old_sentnum)
                            print dict(zip(gs_sent_list,gsdict))
                            print '*',
                            self.missed_sentences += 1

                    found_aux = False
                    sent_has_vpe = False

            except TypeError:
                return

            self.nth_aux += 1

            if crt_sentnum in gs_sent_list:
                sent_has_vpe = True

                if auxandidxindict(mrgmatrix[crt_sentnum]['words'][crt_auxidx], crt_auxidx, gsdict): #idxindict(crt_auxidx, gsdict, gs_sent_list, crt_sentnum):
                    found_aux = True
                    self.aux_bools.append(truth(True))
                else:
                    self.aux_bools.append(truth(False))
            else:
                self.aux_bools.append(truth(False))
Exemple #7
0
def lemmavector(lemma):
    vector = []
    for lemma_type in DV.VPE_TRIGGERS_IN_WSJ:
        if lemma == lemma_type:
            vector.append(truth(True))
        else:
            vector.append(truth(False))

    return vector
Exemple #8
0
def lemmacategoryvector(lemma):
    category_vector = []
    for category in DV.ALL_CATEGORIES:
        if lemma in category:
            category_vector.append(truth(True))
        else:
            category_vector.append(truth(False))

    return category_vector
Exemple #9
0
def auxwordvector(word, all_auxs):
    vector = []
    for aux in all_auxs:
        if word == aux:
            vector.append(truth(True))
        else:
            vector.append(truth(False))

    return vector
def trigger_description(trig_sentdict, ant_sentdict, trigger, POS_TAGS,
                        AUX_WORDS):
    vector = []

    trig_words = trigger.get_words()
    subtree = trigger.get_subtree()
    context_idx = 0

    for w in trigger.get_context()['words']:
        if w == trig_sentdict['words'][trigger.get_idx()]: break
        context_idx += 1

    # Features 1,2
    vector.append(truth(len(subtree.leaves()) == len(trig_words)))
    vector.append(len(trig_words))

    # Feature set 3.
    pos_tags_dict = {}

    for tag in POS_TAGS:
        pos_tags_dict[tag] = 0

    for tag in trigger.get_context()['pos'][context_idx:len(trig_words)]:
        pos_tags_dict[tag] += 1

    vector += [pos_tags_dict[tag] for tag in pos_tags_dict]

    # Feature sets 4,5,6. Description of the auxiliary.
    vector += VC.lemmacategoryvector(
        trig_sentdict['lemmas'][trigger.get_idx()])
    vector += VC.lemmavector(trig_sentdict['lemmas'][trigger.get_idx()])
    vector += VC.auxwordvector(trig_sentdict['words'][trigger.get_idx()],
                               AUX_WORDS)

    return vector
def trigger_description(trig_sentdict, ant_sentdict, trigger, POS_TAGS, AUX_WORDS):
    vector = []

    trig_words = trigger.get_words()
    subtree = trigger.get_subtree()
    context_idx = 0

    for w in trigger.get_context()['words']:
        if w == trig_sentdict['words'][trigger.get_idx()]: break
        context_idx += 1

    # Features 1,2
    vector.append(truth(len(subtree.leaves()) == len(trig_words)))
    vector.append(len(trig_words))

    # Feature set 3.
    pos_tags_dict = {}

    for tag in POS_TAGS: pos_tags_dict[tag] = 0

    for tag in trigger.get_context()['pos'][context_idx:len(trig_words)]:
        pos_tags_dict[tag] += 1

    vector += [pos_tags_dict[tag] for tag in pos_tags_dict]

    # Feature sets 4,5,6. Description of the auxiliary.
    vector += VC.lemmacategoryvector(trig_sentdict['lemmas'][trigger.get_idx()])
    vector += VC.lemmavector(trig_sentdict['lemmas'][trigger.get_idx()])
    vector += VC.auxwordvector(trig_sentdict['words'][trigger.get_idx()], AUX_WORDS)

    return vector
Exemple #12
0
def verblocativevector(sentdict, auxidx):
    vector = []
    verb_locations = []
    num_auxiliaries = 0

    closest = 99
    for i in range(0, len(sentdict["pos"])):
        if DV.isverb(sentdict["pos"][i]) and not i == auxidx:
            verb_locations.append(i)
            closest = min(closest, abs(auxidx - i))
        if sentdict["lemmas"][i] in DV.VPE_TRIGGERS_IN_WSJ:
            num_auxiliaries += 1

    # The first feature is the distance between the Auxiliary and the closest verb.
    if closest != 99:
        vector.append(closest)
    else:
        vector.append(truth(False))

    # Distance between auxiliary and closest previous verb.
    closest = 99
    for idx in verb_locations:
        if idx < auxidx:
            closest = min(closest, abs(auxidx - i))
    if closest != 99:
        vector.append(closest)
    else:
        vector.append(truth(False))

    # Distance between auxiliary and closest following verb.
    closest = 99
    for idx in verb_locations:
        if idx > auxidx:
            closest = min(closest, abs(auxidx - i))
    if closest != 99:
        vector.append(closest)
    else:
        vector.append(truth(False))

    # This next feature is the number of verbs in the auxiliary's sentence.
    vector.append(len(verb_locations))

    # This feature is the number of auxiliary's in the sentence.
    vector.append(num_auxiliaries)

    return vector
Exemple #13
0
    def makestructtypevector(sentdict, idx, key_in_sentdict, lst):
        vector = []

        try:
            test_val = sentdict[key_in_sentdict][idx]
            if key_in_sentdict == 'words': test_val = test_val.lower()

        except IndexError:
            test_val = '-~NONE~-'

        if test_val == '-~NONE~-':
            for val in lst:
                vector.append(truth(False))

        else:
            got = False
            for val in lst:
                if not got and val == test_val: vector.append(truth(True))
                else: vector.append(truth(False))

        return vector
Exemple #14
0
def verblocativevector(sentdict, auxidx):
    vector = []
    verb_locations = []
    num_auxiliaries = 0

    closest = 99
    for i in range(0, len(sentdict['pos'])):
        if DV.isverb(sentdict['pos'][i]) and not i == auxidx:
            verb_locations.append(i)
            closest = min(closest, abs(auxidx - i))
        if sentdict['lemmas'][i] in DV.VPE_TRIGGERS_IN_WSJ:
            num_auxiliaries += 1

    # The first feature is the distance between the Auxiliary and the closest verb.
    if closest != 99: vector.append(closest)
    else: vector.append(truth(False))

    # Distance between auxiliary and closest previous verb.
    closest = 99
    for idx in verb_locations:
        if idx < auxidx:
            closest = min(closest, abs(auxidx - i))
    if closest != 99: vector.append(closest)
    else: vector.append(truth(False))

    # Distance between auxiliary and closest following verb.
    closest = 99
    for idx in verb_locations:
        if idx > auxidx:
            closest = min(closest, abs(auxidx - i))
    if closest != 99: vector.append(closest)
    else: vector.append(truth(False))

    # This next feature is the number of verbs in the auxiliary's sentence.
    vector.append(len(verb_locations))

    # This feature is the number of auxiliary's in the sentence.
    vector.append(num_auxiliaries)

    return vector
Exemple #15
0
    def compare(self,
                gs,
                my_alg,
                end_training_set,
                multiplier=1,
                verbose=False):
        results = {'tp': 0, 'fp': 0, 'fn': 0, 'tn': 0}

        if len(gs) != len(my_alg):
            print 'Error -> the vectors are not the same size!'
            print 'GS length %d, comparison length %d' % (len(gs), len(my_alg))
            quit()

        try:
            training_data_length = len(self.getgsdata(-1, end_training_set))
        except KeyError:
            training_data_length = 0

        for i in range(0, len(gs)):
            # print '%dv%d'%(gs[i],my_alg[i]),

            mapped_index = i + training_data_length

            if gs[i] == truth(True) and my_alg[i] == truth(True):
                results['tp'] += 1
                if verbose and False:
                    print '\nTrue positive: %s' % self.gold_standard_auxs.auxiliary_names[
                        mapped_index],
                    self.each_sentence.printsentence(
                        self.auxnum_to_sent_map[mapped_index])

            elif gs[i] == truth(True) and my_alg[i] == truth(False):
                results['fn'] += 1
                if verbose:
                    print '\nFalse negative: %s' % self.gold_standard_auxs.auxiliary_names[
                        mapped_index],
                    self.each_sentence.printsentence(
                        self.auxnum_to_sent_map[mapped_index])

            elif gs[i] == truth(False) and my_alg[i] == truth(True):
                results['fp'] += 1
                if verbose:
                    print '\nFalse positive: %s' % self.gold_standard_auxs.auxiliary_names[
                        mapped_index],
                    self.each_sentence.printsentence(
                        self.auxnum_to_sent_map[mapped_index])

            else:
                results['tn'] += 1

        for k in results:
            if k in ['tp', 'fn']:
                results[k] /= multiplier

        print results
        scores = f1(results)
        for k in scores:
            print k.capitalize() + ' : %0.2f' % scores[k]
Exemple #16
0
    def oversample(self, start_section, end_section, multiplier):
        new_feature_vector,new_gs_bools = [],[]

        print 'Adding x%d oversample vectors...'%multiplier
        for i in range(0, len(self.getgsdata(start_section, end_section))):
            if self.gold_standard_auxs.aux_bools[i] == truth(True):
                for k in range(0, multiplier):
                    new_feature_vector.append(self.getfeaturevector(i))
                    new_gs_bools.append(self.getgsentry(i))
            else:
                new_feature_vector.append(self.getfeaturevector(i))
                new_gs_bools.append(self.getgsentry(i))

        return new_feature_vector,new_gs_bools
Exemple #17
0
    def oversample(self, start_section, end_section, multiplier):
        new_feature_vector, new_gs_bools = [], []

        print 'Adding x%d oversample vectors...' % multiplier
        for i in range(0, len(self.getgsdata(start_section, end_section))):
            if self.gold_standard_auxs.aux_bools[i] == truth(True):
                for k in range(0, multiplier):
                    new_feature_vector.append(self.getfeaturevector(i))
                    new_gs_bools.append(self.getgsentry(i))
            else:
                new_feature_vector.append(self.getfeaturevector(i))
                new_gs_bools.append(self.getgsentry(i))

        return new_feature_vector, new_gs_bools
Exemple #18
0
    def makestructtypevector(sentdict, idx, key_in_sentdict, lst):
        vector = []

        try:
            test_val = sentdict[key_in_sentdict][idx]
            if key_in_sentdict == "words":
                test_val = test_val.lower()

        except IndexError:
            test_val = "-~NONE~-"

        if test_val == "-~NONE~-":
            for val in lst:
                vector.append(truth(False))

        else:
            got = False
            for val in lst:
                if not got and val == test_val:
                    vector.append(truth(True))
                else:
                    vector.append(truth(False))

        return vector
Exemple #19
0
	def call_graph(self,graph_type,params):
		self.m = mongo('10.0.0.1', 27017,'rktest','sensors')
		self.a = analysis()
		self.p = plotting()
		self.t = truth('test.txt')
		self.z = zone('zone.txt')
		print params
		dates = params[4][1].split("/")[0].split("-")
		times = params[4][1].split("/")[1].split(":")
		start = datetime.datetime(int(dates[2]),int(dates[1]),int(dates[0]),int(times[0]),int(times[1]),int(times[2]));
		dates = params[5][1].split("/")[0].split("-")
		times = params[5][1].split("/")[1].split(":")
		end = datetime.datetime(int(dates[2]),int(dates[1]),int(dates[0]),int(times[0]),int(times[1]),int(times[2]));
		start = time.mktime(start.timetuple())-36000
		end = time.mktime(end.timetuple())-36000
		self.start = start
		self.end = end
		print start
		print end
		self.m_time = self.m.get_array_time(start, end,self.m.get_array())
		if(graph_type == "occupancy"):
			self.p.new(self.fig)
			try:
				c = self.a.room_occupency(self.m, 0,start,end)
				self.p.add_line(c[0], c[1], 'b')
				self.p.show_legend()
			except:
				a = 1
			self.p.show()
		if(graph_type == "average"):
			self.p.new(self.fig)
			self.average(params)
			self.p.show_legend()
			self.p.show()
		if(graph_type == "weighted"):
			self.p.new(self.fig)
			self.weighted_vote(params)
			self.p.show_legend()
			self.p.show()
		if(graph_type == "joined"):
			self.p.new(self.fig)
			self.average(params)
			self.weighted_vote(params)
			self.p.show_legend()
			self.p.show()
		if(graph_type == "heat"):
			self.heatmap(params)
		self.fig = self.fig+1
Exemple #20
0
def testmyrules(classifier, section_start, section_end):
    gs_vector = classifier.getgsdata(section_start, section_end)

    aux_start, aux_end = classifier.section_split[
        section_start], classifier.section_split[section_end]

    my_rules_return_vector = []
    count = 0
    for sentdict in classifier.each_sentence.sentences:
        for i in range(0, len(sentdict['lemmas'])):
            word = sentdict['lemmas'][i]
            if isauxiliary(sentdict, i):
                count += 1
                if aux_start < count <= aux_end:
                    tree = NT.maketree(sentdict['tree'][0])
                    subtree_positions = NT.get_smallest_subtree_positions(tree)
                    if word in MODALS:
                        my_rules_return_vector.append(
                            truth(
                                modalcheck(sentdict, i, tree,
                                           subtree_positions))
                        )  #Todo: I modified these b/c they were incorrectly written.
                    elif word in BE:
                        my_rules_return_vector.append(
                            truth(becheck(sentdict, i, tree,
                                          subtree_positions)))
                    elif word in HAVE:
                        my_rules_return_vector.append(
                            truth(
                                havecheck(sentdict, i, tree,
                                          subtree_positions)))
                    elif word in DO:
                        my_rules_return_vector.append(
                            truth(docheck(sentdict, i, tree,
                                          subtree_positions)))
                    elif word in TO:
                        my_rules_return_vector.append(
                            truth(tocheck(sentdict, i, tree,
                                          subtree_positions)))
                    elif word in SO:
                        my_rules_return_vector.append(
                            truth(socheck(sentdict, i, tree,
                                          subtree_positions)))

    classifier.compare(gs_vector,
                       my_rules_return_vector,
                       section_start - 1,
                       verbose=False)
Exemple #21
0
    def compare(self, gs, my_alg, end_training_set, multiplier=1, verbose=False):
        results = {'tp': 0, 'fp': 0, 'fn': 0, 'tn':0}

        if len(gs) != len(my_alg):
            print 'Error -> the vectors are not the same size!'
            print 'GS length %d, comparison length %d'%(len(gs), len(my_alg))
            quit()

        try:
            training_data_length = len(self.getgsdata(-1,end_training_set))
        except KeyError:
            training_data_length = 0

        for i in range(0, len(gs)):
            # print '%dv%d'%(gs[i],my_alg[i]),

            mapped_index = i+training_data_length

            if gs[i] == truth(True) and my_alg[i] == truth(True):
                results['tp'] += 1
                if verbose and False:
                    print '\nTrue positive: %s'%self.gold_standard_auxs.auxiliary_names[mapped_index],
                    self.each_sentence.printsentence(self.auxnum_to_sent_map[mapped_index])

            elif gs[i] == truth(True) and my_alg[i] == truth(False):
                results['fn'] += 1
                if verbose:
                    print '\nFalse negative: %s'%self.gold_standard_auxs.auxiliary_names[mapped_index],
                    self.each_sentence.printsentence(self.auxnum_to_sent_map[mapped_index])

            elif gs[i] == truth(False) and my_alg[i] == truth(True):
                results['fp'] += 1
                if verbose:
                    print '\nFalse positive: %s'%self.gold_standard_auxs.auxiliary_names[mapped_index],
                    self.each_sentence.printsentence(self.auxnum_to_sent_map[mapped_index])

            else:
                results['tn'] += 1

        for k in results:
            if k in ['tp', 'fn']:
                results[k] /= multiplier

        print results
        scores = f1(results)
        for k in scores:
            print k.capitalize()+' : %0.2f' %scores[k]
Exemple #22
0
def makeposbigramsvector(sentdict, auxidx, postags, combine=False):
    vector = []
    true_idxs = []

    for i in range(auxidx - 3, auxidx + 3):
        try:
            crtpos = sentdict["pos"][i]
            nextpos = sentdict["pos"][i + 1]

        except IndexError:
            for p1 in postags:
                for p2 in postags:
                    if not combine:
                        vector.append(truth(False))
            continue

        got = False
        count = 0
        for k in range(0, len(postags)):
            for j in range(0, len(postags)):
                count += 1
                if not got:
                    if crtpos == postags[k] and nextpos == postags[j]:
                        if not combine:
                            vector.append(truth(True))
                        else:
                            true_idxs.append(count)

                        got = True
                    else:
                        if not combine:
                            vector.append(truth(False))
                else:
                    if not combine:
                        vector.append(truth(False))

    if combine:
        length_of_bigrams_set = (len(postags) - 1) ** 2
        for i in range(0, length_of_bigrams_set):
            if i in true_idxs:
                vector.append(truth(True) * true_idxs.count(i))
            else:
                vector.append(truth(False))

    return vector
Exemple #23
0
def makeposbigramsvector(sentdict, auxidx, postags, combine=False):
    vector = []
    true_idxs = []

    for i in range(auxidx - 3, auxidx + 3):
        try:
            crtpos = sentdict['pos'][i]
            nextpos = sentdict['pos'][i + 1]

        except IndexError:
            for p1 in postags:
                for p2 in postags:
                    if not combine:
                        vector.append(truth(False))
            continue

        got = False
        count = 0
        for k in range(0, len(postags)):
            for j in range(0, len(postags)):
                count += 1
                if not got:
                    if crtpos == postags[k] and nextpos == postags[j]:
                        if not combine:
                            vector.append(truth(True))
                        else:
                            true_idxs.append(count)

                        got = True
                    else:
                        if not combine: vector.append(truth(False))
                else:
                    if not combine: vector.append(truth(False))

    if combine:
        length_of_bigrams_set = (len(postags) - 1)**2
        for i in range(0, length_of_bigrams_set):
            if i in true_idxs:
                vector.append(truth(True) * true_idxs.count(i))
            else:
                vector.append(truth(False))

    return vector
Exemple #24
0
def testmyrules(classifier, section_start, section_end):
    gs_vector = classifier.getgsdata(section_start, section_end)

    aux_start,aux_end = classifier.section_split[section_start], classifier.section_split[section_end]

    my_rules_return_vector = []
    count = 0
    for sentdict in classifier.each_sentence.sentences:
        for i in range(0,len(sentdict['lemmas'])):
            word = sentdict['lemmas'][i]
            if isauxiliary(sentdict, i):
                count += 1
                if aux_start < count <= aux_end:
                    tree = NT.maketree(sentdict['tree'][0])
                    subtree_positions = NT.get_smallest_subtree_positions(tree)
                    if word in MODALS: my_rules_return_vector.append(truth(modalcheck(sentdict, i, tree, subtree_positions))) #Todo: I modified these b/c they were incorrectly written.
                    elif word in BE: my_rules_return_vector.append(truth(becheck(sentdict, i, tree, subtree_positions)))
                    elif word in HAVE: my_rules_return_vector.append(truth(havecheck(sentdict, i, tree, subtree_positions)))
                    elif word in DO: my_rules_return_vector.append(truth(docheck(sentdict, i, tree, subtree_positions)))
                    elif word in TO: my_rules_return_vector.append(truth(tocheck(sentdict, i, tree, subtree_positions)))
                    elif word in SO: my_rules_return_vector.append(truth(socheck(sentdict, i, tree, subtree_positions)))

    classifier.compare(gs_vector, my_rules_return_vector, section_start-1, verbose=False)
def alignment_comparison(trig_sentdict, ant_sentdict, ant, trigger,
                         word2vec_dict):
    vector = []

    ant_context_sentdict = ant.get_context()
    trig_context_sentdict = trigger.get_context()

    ant_head_idx = get_antecedent_head_index(ant_sentdict, ant)

    # Feature 1.
    ant_auxs = []
    for i in range(0, len(ant_sentdict['words'])):
        if DV.isauxiliary(ant_sentdict, i):
            ant_auxs.append(ant_sentdict['lemmas'][i])

    found = False
    for aux in ant_auxs:
        if aux in trig_sentdict['lemmas']:
            vector.append(truth(True))
            found = True
            break

    if not found:
        vector.append(truth(False))

    # Feature 2.
    if ant.get_sentnum() == trigger.get_sentnum():
        vector.append(truth(ant_head_idx > trigger.get_idx()))
        vector.append(truth(ant_head_idx == trigger.get_idx()))
        vector.append(truth(ant_head_idx < trigger.get_idx()))
    else:
        vector += [0, 0, 0]

    # Features 3,4,5.
    for k in ['words', 'lemmas', 'pos']:
        total = len(ant_context_sentdict[k]) + len(trig_context_sentdict[k])
        common = len(
            set(ant_context_sentdict[k]).intersection(
                trig_context_sentdict[k]))
        vector.append(common)
        vector.append((2.0 * float(common)) / float(total))

    # Feature 6 - number of words between trigger and antecedent.
    vector.append(ant.get_sentnum() - trigger.get_sentnum())
    if ant.get_sentnum() == trigger.get_sentnum():
        vector.append(ant_head_idx - trigger.get_idx())
    else:
        crt_sentnum = trigger.get_sentnum()
        distance = ant_head_idx
        while crt_sentnum < ant.get_sentnum():
            distance += len(trig_sentdict['words'])
            crt_sentnum += 1
        vector.append(distance)

    # Feature 7.
    # First we get the vecs from the Ant NP and average them.
    blank_np = False

    ant_np_word2vec = []
    ant_np_location = ant.get_context()['np']

    if ant_np_location != (-1, -1):
        ant_np_word2vec = get_average_np_vec(word2vec_dict, ant_sentdict,
                                             ant_np_location[0],
                                             ant_np_location[1])
    else:
        blank_np = True

    # Next we do the same for the Trigger NP.
    trig_np_word2vec = []
    trig_np_location = trigger.get_context()['np']

    if trig_np_location != (-1, -1):
        trig_np_word2vec = get_average_np_vec(word2vec_dict, trig_sentdict,
                                              trig_np_location[0],
                                              trig_np_location[1])
    else:
        blank_np = True

    # Adding the angle of the vector between the trigger NP and antecedent NP.
    if not blank_np:
        ant_length = vector_length(ant_np_word2vec)
        trig_length = vector_length(trig_np_word2vec)
        try:
            angle = angle_btwn_vectors(ant_np_word2vec,
                                       trig_np_word2vec,
                                       v1_length=ant_length,
                                       v2_length=trig_length)
        except ValueError:
            angle = 90.0

        vector.append(angle)
        vector.append(truth(angle == 0.0))
    else:
        vector.append(90.0)
        vector.append(truth(90.0 == 0.0))

    if not ant_np_word2vec:
        vector += [0 for _ in range(0, WORD2VEC_LENGTH)]
    else:
        vector += ant_np_word2vec
    if not trig_np_word2vec:
        vector += [0 for _ in range(0, WORD2VEC_LENGTH)]
    else:
        vector += trig_np_word2vec

    # Now for what comes after the head.
    ant_head_idx = get_antecedent_head_index(ant_sentdict, ant)
    ant_post_head_w2vec = get_average_np_vec(word2vec_dict, ant_sentdict,
                                             ant_head_idx,
                                             len(ant_sentdict['words']))

    # if not ant_post_head_w2vec: vector += [0 for i in range(0,WORD2VEC_LENGTH)]
    # else: vector += ant_post_head_w2vec

    stop_idx = len(trig_sentdict['words'])
    for i in range(trigger.get_idx(), len(trig_sentdict['words'])):
        if DV.ispunctuation(trig_sentdict['lemmas'][i]):
            stop_idx = i
            break

    post_trig_w2vec = get_average_np_vec(word2vec_dict, trig_sentdict,
                                         trigger.get_idx(), stop_idx)

    # if not post_trig_w2vec: vector += [0 for i in range(0,WORD2VEC_LENGTH)]
    # else: vector += post_trig_w2vec

    if ant_post_head_w2vec and post_trig_w2vec:
        try:
            post_angle = angle_btwn_vectors(ant_post_head_w2vec,
                                            post_trig_w2vec)
        except ValueError:
            post_angle = 90.0
        vector.append(post_angle)
        vector.append(truth(post_angle == 0.0))
    else:
        vector.append(90.0)
        vector.append(truth(90.0 == 0.0))

    # Sentenial complement check.
    tree = NT.maketree(ant_sentdict['tree'][0])
    if NT.dominates(tree, ant.get_subtree(), trigger.get_subtree()):
        vector.append(
            truth(
                NT.has_phrases_between_trees(
                    ant.get_subtree(), trigger.get_subtree(),
                    NIELSON_SENTENIAL_COMPLEMENT_PHRASES)))
    else:
        vector.append(truth(False))

    # Features to account for the number of each phrase type between the antecedent and trigger.
    phrases_between = [0 for _ in ALL_PHRASES]

    if ant.get_sentnum() == trigger.get_sentnum():
        for i in range(0, len(phrases_between)):
            if NT.has_phrases_between_trees(ant.get_subtree(),
                                            trigger.get_subtree(),
                                            [ALL_PHRASES[i]]):
                phrases_between[i] += 1

    vector += phrases_between
    vector.append(sum(phrases_between))

    return vector
Exemple #26
0
from mongo import *
from truth import *
from analysis import *
from plotting import *
import pylab

t = truth('test.txt')
m = mongo('10.0.0.1', 27017,'rktest','sensors')
a = analysis()
p = plotting()

master = m.get_array()
#print m.get_nodes()
end_time = a.get_time_bounds(master)[1]
start_time = end_time-800
master = m.get_array_time(start_time,end_time,master)
#print m.get_array_time(0,end_time,master)
#print m.get_sensortype(master)
#print m.get_sensortype_activation('VC',master)
#print m.get_sensortype_activation('ACCL',master) 
#print t.get_table()
tmaster = t.parse_raw(master)
i = 0
for d in m.get_nodes():
	print d
	col = a.collective_average(m,t,{d:master[d]})
	wv = a.weighted_vote(m,t,{d:master[d]})
	#print col
	#print wv
	pos = []
	neg = []
def alignment_comparison(trig_sentdict, ant_sentdict, ant, trigger, word2vec_dict):
    vector = []

    ant_context_sentdict = ant.get_context()
    trig_context_sentdict = trigger.get_context()

    ant_head_idx = get_antecedent_head_index(ant_sentdict, ant)

    # Feature 1.
    ant_auxs = []
    for i in range(0,len(ant_sentdict['words'])):
        if DV.isauxiliary(ant_sentdict, i):
            ant_auxs.append(ant_sentdict['lemmas'][i])

    found = False
    for aux in ant_auxs:
        if aux in trig_sentdict['lemmas']:
            vector.append(truth(True))
            found = True
            break

    if not found:
        vector.append(truth(False))

    # Feature 2.
    if ant.get_sentnum() == trigger.get_sentnum():
        vector.append(truth(ant_head_idx > trigger.get_idx()))
        vector.append(truth(ant_head_idx == trigger.get_idx()))
        vector.append(truth(ant_head_idx < trigger.get_idx()))
    else: vector += [0,0,0]

    # Features 3,4,5.
    for k in ['words','lemmas','pos']:
        total = len(ant_context_sentdict[k])+len(trig_context_sentdict[k])
        common = len(set(ant_context_sentdict[k]).intersection(trig_context_sentdict[k]))
        vector.append(common)
        vector.append((2.0*float(common))/float(total))

    # Feature 6 - number of words between trigger and antecedent.
    vector.append(ant.get_sentnum()-trigger.get_sentnum())
    if ant.get_sentnum() == trigger.get_sentnum(): vector.append(ant_head_idx - trigger.get_idx())
    else:
        crt_sentnum = trigger.get_sentnum()
        distance = ant_head_idx
        while crt_sentnum < ant.get_sentnum():
            distance += len(trig_sentdict['words'])
            crt_sentnum += 1
        vector.append(distance)

    # Feature 7.
    # First we get the vecs from the Ant NP and average them.
    blank_np = False

    ant_np_word2vec = []
    ant_np_location = ant.get_context()['np']

    if ant_np_location != (-1,-1):
        ant_np_word2vec = get_average_np_vec(word2vec_dict, ant_sentdict, ant_np_location[0], ant_np_location[1])
    else: blank_np = True

    # Next we do the same for the Trigger NP.
    trig_np_word2vec = []
    trig_np_location = trigger.get_context()['np']

    if trig_np_location != (-1,-1):
        trig_np_word2vec = get_average_np_vec(word2vec_dict, trig_sentdict, trig_np_location[0], trig_np_location[1])
    else: blank_np = True

    # Adding the angle of the vector between the trigger NP and antecedent NP.
    if not blank_np:
        ant_length = vector_length(ant_np_word2vec)
        trig_length = vector_length(trig_np_word2vec)
        try:
            angle = angle_btwn_vectors(ant_np_word2vec, trig_np_word2vec, v1_length=ant_length, v2_length=trig_length)
        except ValueError:
            angle = 90.0

        vector.append(angle)
        vector.append(truth(angle == 0.0))
    else:
        vector.append(90.0)
        vector.append(truth(90.0 == 0.0))

    if not ant_np_word2vec:
        vector += [0 for _ in range(0,WORD2VEC_LENGTH)]
    else:
        vector += ant_np_word2vec
    if not trig_np_word2vec:
        vector += [0 for _ in range(0,WORD2VEC_LENGTH)]
    else:
        vector += trig_np_word2vec

    # Now for what comes after the head.
    ant_head_idx = get_antecedent_head_index(ant_sentdict, ant)
    ant_post_head_w2vec = get_average_np_vec(word2vec_dict, ant_sentdict, ant_head_idx, len(ant_sentdict['words']))

    # if not ant_post_head_w2vec: vector += [0 for i in range(0,WORD2VEC_LENGTH)]
    # else: vector += ant_post_head_w2vec

    stop_idx = len(trig_sentdict['words'])
    for i in range(trigger.get_idx(), len(trig_sentdict['words'])):
        if DV.ispunctuation(trig_sentdict['lemmas'][i]):
            stop_idx = i
            break

    post_trig_w2vec = get_average_np_vec(word2vec_dict, trig_sentdict, trigger.get_idx(), stop_idx)

    # if not post_trig_w2vec: vector += [0 for i in range(0,WORD2VEC_LENGTH)]
    # else: vector += post_trig_w2vec

    if ant_post_head_w2vec and post_trig_w2vec:
        try:
            post_angle = angle_btwn_vectors(ant_post_head_w2vec, post_trig_w2vec)
        except ValueError: post_angle = 90.0
        vector.append(post_angle)
        vector.append(truth(post_angle == 0.0))
    else:
        vector.append(90.0)
        vector.append(truth(90.0 == 0.0))

    # Sentenial complement check.
    tree = NT.maketree(ant_sentdict['tree'][0])
    if NT.dominates(tree, ant.get_subtree(), trigger.get_subtree()):
        vector.append(truth( NT.has_phrases_between_trees(ant.get_subtree(), trigger.get_subtree(), NIELSON_SENTENIAL_COMPLEMENT_PHRASES)))
    else:
        vector.append(truth(False))

    # Features to account for the number of each phrase type between the antecedent and trigger.
    phrases_between = [0 for _ in ALL_PHRASES]

    if ant.get_sentnum() == trigger.get_sentnum():
        for i in range(0,len(phrases_between)):
            if NT.has_phrases_between_trees(ant.get_subtree(), trigger.get_subtree(), [ALL_PHRASES[i]]):
                phrases_between[i] += 1

    vector += phrases_between
    vector.append(sum(phrases_between))

    return vector
Exemple #28
0
def myfeaturesvector(sentdict, idx, features):
    vector = []

    tree = NT.maketree(sentdict['tree'][0])
    subtrees = NT.getsmallestsubtrees(tree)
    subtree_positions = NT.get_smallest_subtree_positions(
        tree, subtree_list=subtrees)
    aux = sentdict['lemmas'][idx]

    if 'my_features' in features:
        vector.append(
            truth(DV.auxccommandsverb(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(
                DV.auxccommandsverbthatcomesafter(sentdict, idx, tree,
                                                  subtree_positions)))
        vector.append(
            truth(
                DV.auxisccommandedbyverb(sentdict, idx, tree,
                                         subtree_positions)))
        vector.append(
            truth(
                DV.auxislocallyccommandedbyverb(sentdict, idx, tree,
                                                subtree_positions)))
        vector.append(
            truth(
                DV.auxlocallyccommandsverb(sentdict, idx, tree,
                                           subtree_positions)))
        vector.append(
            truth(
                DV.isccommandedbycontinuationword(sentdict, idx, tree,
                                                  subtree_positions)))
        vector.append(
            truth(DV.nexttopunct(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.isfollowedbypunct(sentdict, idx, end=['.'])))
        vector.append(
            truth(DV.previouswordisasorsoorthan(sentdict['words'], idx)))
        vector.append(truth(DV.thesamecheck(sentdict['words'], idx)))
        vector.append(truth(DV.toprecedesaux(sentdict, idx)))
        vector.append(truth(DV.verbfollowsaux(sentdict, idx)))

        # TODO: added this new feature!
        vector.append(truth(DV.nextwordistoo(sentdict, idx)))

    if 'my_rules' in features:
        vector.append(
            truth(aux in DV.MODALS
                  and DV.modalcheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.BE
                  and DV.becheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.HAVE
                  and DV.havecheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.DO
                  and DV.docheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.TO
                  and DV.tocheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.SO
                  and DV.socheck(sentdict, idx, tree, subtree_positions)))

        # This adds a new layer of features by combining all of the ones I had.
    if 'square_rules' in features:
        size = len(vector)
        for i in range(0, size):
            for j in range(0, size):
                if i != j:
                    vector.append(
                        truth(untruth(vector[i]) and untruth(vector[j])))

    if 'combine_aux_type' in features:
        bools = [
            aux in DV.MODALS, aux in DV.BE, aux in DV.HAVE, aux in DV.DO, aux
            in DV.TO, aux in DV.SO
        ]
        vec = [v for v in vector]
        for v in vec:
            for b in bools:
                vector.append(truth(untruth(v) and b))

    return vector
Exemple #29
0
 def addsentences(self, gsvpelist, size):
     for i in range(0, size):
         if i in gsvpelist:
             self.sentence_booleans.append(truth(True))
         else:
             self.sentence_booleans.append(truth(False))
Exemple #30
0
 def addsentences(self, gsvpelist, size):
     for i in range(0, size):
         if i in gsvpelist:
             self.sentence_booleans.append(truth(True))
         else:
             self.sentence_booleans.append(truth(False))
Exemple #31
0
def myfeaturesvector(sentdict, idx, features):
    vector = []

    tree = NT.maketree(sentdict["tree"][0])
    subtrees = NT.getsmallestsubtrees(tree)
    subtree_positions = NT.get_smallest_subtree_positions(tree, subtree_list=subtrees)
    aux = sentdict["lemmas"][idx]

    if "my_features" in features:
        vector.append(truth(DV.auxccommandsverb(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.auxccommandsverbthatcomesafter(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.auxisccommandedbyverb(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.auxislocallyccommandedbyverb(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.auxlocallyccommandsverb(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.isccommandedbycontinuationword(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.nexttopunct(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.isfollowedbypunct(sentdict, idx, end=["."])))
        vector.append(truth(DV.previouswordisasorsoorthan(sentdict["words"], idx)))
        vector.append(truth(DV.thesamecheck(sentdict["words"], idx)))
        vector.append(truth(DV.toprecedesaux(sentdict, idx)))
        vector.append(truth(DV.verbfollowsaux(sentdict, idx)))

        # TODO: added this new feature!
        vector.append(truth(DV.nextwordistoo(sentdict, idx)))

    if "my_rules" in features:
        vector.append(truth(aux in DV.MODALS and DV.modalcheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.BE and DV.becheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.HAVE and DV.havecheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.DO and DV.docheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.TO and DV.tocheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.SO and DV.socheck(sentdict, idx, tree, subtree_positions)))

        # This adds a new layer of features by combining all of the ones I had.
    if "square_rules" in features:
        size = len(vector)
        for i in range(0, size):
            for j in range(0, size):
                if i != j:
                    vector.append(truth(untruth(vector[i]) and untruth(vector[j])))

    if "combine_aux_type" in features:
        bools = [aux in DV.MODALS, aux in DV.BE, aux in DV.HAVE, aux in DV.DO, aux in DV.TO, aux in DV.SO]
        vec = [v for v in vector]
        for v in vec:
            for b in bools:
                vector.append(truth(untruth(v) and b))

    return vector
Exemple #32
0
def counttruth(vector):
    count = 0
    for val in vector:
        if val == truth(True):
            count += 1
    return count
Exemple #33
0
def counttruth(vector):
    count = 0
    for val in vector:
        if val == truth(True): count += 1
    return count
Exemple #34
0
    def addauxs(self, mrgmatrix, gsdict, gs_sent_list, make_file=False):
        crt_sentnum, crt_auxidx = 0, -1
        found_aux = False
        sent_has_vpe = False

        #print gsdict
        #print gs_sent_list

        while crt_sentnum < len(mrgmatrix):

            try:
                old_sentnum = crt_sentnum

                # Reassign the values for the next auxiliary, recursively.
                crt_sentnum, crt_auxidx = nextaux(mrgmatrix, crt_sentnum,
                                                  crt_auxidx + 1)

                if make_file:
                    self.auxiliary_names.append(
                        mrgmatrix[crt_sentnum]['words'][crt_auxidx].lower())

                # This is to check if we missed a GS aux by accident.
                if old_sentnum + 1 == crt_sentnum:
                    if sent_has_vpe and not found_aux:

                        auxs = getauxs(mrgmatrix[old_sentnum])
                        crt_auxnum_out_of_total = len(
                            self.aux_bools) - len(auxs)

                        for idx, aux in auxs:

                            if auxindict(aux, idx, gsdict):
                                self.aux_bools[
                                    crt_auxnum_out_of_total] = truth(True)
                                found_aux = True
                                print 'RULE 1 Added sentence.\n'

                            crt_auxnum_out_of_total += 1

                        if not found_aux:
                            print 'We missed the sentence below.'  #sentence: %d'%old_sentnum
                            printsent(mrgmatrix, old_sentnum)
                            print dict(zip(gs_sent_list, gsdict))
                            print '*',
                            self.missed_sentences += 1

                    found_aux = False
                    sent_has_vpe = False

            except TypeError:
                return

            self.nth_aux += 1

            if crt_sentnum in gs_sent_list:
                sent_has_vpe = True

                if auxandidxindict(
                        mrgmatrix[crt_sentnum]['words'][crt_auxidx],
                        crt_auxidx, gsdict
                ):  #idxindict(crt_auxidx, gsdict, gs_sent_list, crt_sentnum):
                    found_aux = True
                    self.aux_bools.append(truth(True))
                else:
                    self.aux_bools.append(truth(False))
            else:
                self.aux_bools.append(truth(False))
Exemple #35
0
 def number_of_positive_auxs(self):
     count = 0
     for aux_bool in self.aux_bools:
         if aux_bool == truth(True): count += 1
     return count
Exemple #36
0
 def number_of_positive_auxs(self):
     count = 0
     for aux_bool in self.aux_bools:
         if aux_bool == truth(True): count+=1
     return count