Esempio n. 1
0
def linguistic_features_vector(sentdict, aux, features):
    vector = []
    tree = sentdict.get_nltk_tree()
    subtree_positions = nt.get_smallest_subtree_positions(tree)

    if 'my_features' in features:
        vector += my_features_vector(sentdict, aux, tree, subtree_positions)

    if 'my_rules' in features:
        vector += my_rules_vector(sentdict, aux, tree, subtree_positions)

    if 'old_rules' in features:
        vector += old_rules_vector(sentdict, aux, tree, subtree_positions)

    if 'square_rules' in features:
        vector_length = len(vector)
        for i in range(0, vector_length):
            for j in range(i+1, vector_length):
                vector.append(bool_to_int(int_to_bool(vector[i]) and int_to_bool(vector[j])))

    if 'combine_aux_type' in features:
        vector_length = len(vector)
        aux_type = aux.type
        bools = [aux_type == 'modal', aux_type == 'be', aux_type == 'have', aux_type == 'do', aux_type == 'to', aux_type == 'so']
        for i in range(0, vector_length):
            for b in bools:
                vector.append(bool_to_int(b and int_to_bool(vector[i])))

    return vector
Esempio n. 2
0
    def test_rules(self, train_auxs):
        f = lambda x: 1 if x else 0

        predictions = []
        for i in range(len(train_auxs)):
            aux = train_auxs[i]
            sendict = self.sentences[aux.sentnum]
            tree = sendict.get_nltk_tree()
            word_subtree_positions = nt.get_smallest_subtree_positions(tree)

            if aux.type == 'modal':
                predictions.append(
                    f(wc.modal_rule(sendict, aux, tree,
                                    word_subtree_positions)))
            elif aux.type == 'be':
                predictions.append(f(wc.be_rule(sendict, aux)))
            elif aux.type == 'have':
                predictions.append(f(wc.have_rule(sendict, aux)))
            elif aux.type == 'do':
                predictions.append(
                    f(wc.do_rule(sendict, aux, tree, word_subtree_positions)))
            elif aux.type == 'so':
                predictions.append(f(wc.so_rule(sendict, aux)))
            elif aux.type == 'to':
                predictions.append(f(wc.to_rule(sendict, aux)))

        return predictions
Esempio n. 3
0
def linguistic_features_vector(sentdict, aux, features):
    vector = []
    tree = sentdict.get_nltk_tree()
    subtree_positions = nt.get_smallest_subtree_positions(tree)

    if 'my_features' in features:
        vector += my_features_vector(sentdict, aux, tree, subtree_positions)

    if 'my_rules' in features:
        vector += my_rules_vector(sentdict, aux, tree, subtree_positions)

    if 'old_rules' in features:
        vector += old_rules_vector(sentdict, aux, tree, subtree_positions)

    if 'square_rules' in features:
        vector_length = len(vector)
        for i in range(0, vector_length):
            for j in range(i + 1, vector_length):
                vector.append(
                    bool_to_int(
                        int_to_bool(vector[i]) and int_to_bool(vector[j])))

    if 'combine_aux_type' in features:
        vector_length = len(vector)
        aux_type = aux.type
        bools = [
            aux_type == 'modal', aux_type == 'be', aux_type == 'have',
            aux_type == 'do', aux_type == 'to', aux_type == 'so'
        ]
        for i in range(0, vector_length):
            for b in bools:
                vector.append(bool_to_int(b and int_to_bool(vector[i])))

    return vector
Esempio n. 4
0
    def test_my_rules(self, original_rules=False, idxs=None):
        self.predictions = []
        print "Length of test set: %d, length of All_auxs-training vectors: %d" % (
            len(self.test_classes),
            len(self.all_auxiliaries) - len(self.train_vectors),
        )
        for i in range(self.pre_oversample_length, len(self.all_auxiliaries)):
            if idxs == None or i in idxs:
                aux = self.all_auxiliaries.get_aux(i)
                sendict = self.sentences.get_sentence(aux.sentnum)
                tree = sendict.get_nltk_tree()
                word_subtree_positions = nt.get_smallest_subtree_positions(tree)

                if not original_rules:
                    if aux.type == "modal":
                        self.predictions.append(
                            vc.bool_to_int(wc.modal_rule(sendict, aux, tree, word_subtree_positions))
                        )
                    elif aux.type == "be":
                        self.predictions.append(vc.bool_to_int(wc.be_rule(sendict, aux)))
                    elif aux.type == "have":
                        self.predictions.append(vc.bool_to_int(wc.have_rule(sendict, aux)))
                    elif aux.type == "do":
                        self.predictions.append(vc.bool_to_int(wc.do_rule(sendict, aux, tree, word_subtree_positions)))
                    elif aux.type == "so":
                        self.predictions.append(vc.bool_to_int(wc.so_rule(sendict, aux)))
                    elif aux.type == "to":
                        self.predictions.append(vc.bool_to_int(wc.to_rule(sendict, aux)))
                else:
                    auxidx = aux.wordnum
                    if aux.type == "modal":
                        self.predictions.append(
                            vc.bool_to_int(dv.modalcheck(sendict, auxidx, tree, word_subtree_positions))
                        )
                    elif aux.type == "be":
                        self.predictions.append(
                            vc.bool_to_int(dv.becheck(sendict, auxidx, tree, word_subtree_positions))
                        )
                    elif aux.type == "have":
                        self.predictions.append(
                            vc.bool_to_int(dv.havecheck(sendict, auxidx, tree, word_subtree_positions))
                        )
                    elif aux.type == "do":
                        self.predictions.append(
                            vc.bool_to_int(dv.docheck(sendict, auxidx, tree, word_subtree_positions))
                        )
                    elif aux.type == "so":
                        self.predictions.append(
                            vc.bool_to_int(dv.socheck(sendict, auxidx, tree, word_subtree_positions))
                        )
                    elif aux.type == "to":
                        self.predictions.append(
                            vc.bool_to_int(dv.tocheck(sendict, auxidx, tree, word_subtree_positions))
                        )
Esempio n. 5
0
def myfeaturesvector(sentdict, idx, features):
    vector = []

    tree = NT.maketree(sentdict["tree"][0])
    subtrees = NT.getsmallestsubtrees(tree)
    subtree_positions = NT.get_smallest_subtree_positions(tree, subtree_list=subtrees)
    aux = sentdict["lemmas"][idx]

    if "my_features" in features:
        vector.append(truth(DV.auxccommandsverb(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.auxccommandsverbthatcomesafter(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.auxisccommandedbyverb(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.auxislocallyccommandedbyverb(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.auxlocallyccommandsverb(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.isccommandedbycontinuationword(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.nexttopunct(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.isfollowedbypunct(sentdict, idx, end=["."])))
        vector.append(truth(DV.previouswordisasorsoorthan(sentdict["words"], idx)))
        vector.append(truth(DV.thesamecheck(sentdict["words"], idx)))
        vector.append(truth(DV.toprecedesaux(sentdict, idx)))
        vector.append(truth(DV.verbfollowsaux(sentdict, idx)))

        # TODO: added this new feature!
        vector.append(truth(DV.nextwordistoo(sentdict, idx)))

    if "my_rules" in features:
        vector.append(truth(aux in DV.MODALS and DV.modalcheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.BE and DV.becheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.HAVE and DV.havecheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.DO and DV.docheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.TO and DV.tocheck(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(aux in DV.SO and DV.socheck(sentdict, idx, tree, subtree_positions)))

        # This adds a new layer of features by combining all of the ones I had.
    if "square_rules" in features:
        size = len(vector)
        for i in range(0, size):
            for j in range(0, size):
                if i != j:
                    vector.append(truth(untruth(vector[i]) and untruth(vector[j])))

    if "combine_aux_type" in features:
        bools = [aux in DV.MODALS, aux in DV.BE, aux in DV.HAVE, aux in DV.DO, aux in DV.TO, aux in DV.SO]
        vec = [v for v in vector]
        for v in vec:
            for b in bools:
                vector.append(truth(untruth(v) and b))

    return vector
Esempio n. 6
0
def testmyrules(classifier, section_start, section_end):
    gs_vector = classifier.getgsdata(section_start, section_end)

    aux_start, aux_end = classifier.section_split[
        section_start], classifier.section_split[section_end]

    my_rules_return_vector = []
    count = 0
    for sentdict in classifier.each_sentence.sentences:
        for i in range(0, len(sentdict['lemmas'])):
            word = sentdict['lemmas'][i]
            if isauxiliary(sentdict, i):
                count += 1
                if aux_start < count <= aux_end:
                    tree = NT.maketree(sentdict['tree'][0])
                    subtree_positions = NT.get_smallest_subtree_positions(tree)
                    if word in MODALS:
                        my_rules_return_vector.append(
                            truth(
                                modalcheck(sentdict, i, tree,
                                           subtree_positions))
                        )  #Todo: I modified these b/c they were incorrectly written.
                    elif word in BE:
                        my_rules_return_vector.append(
                            truth(becheck(sentdict, i, tree,
                                          subtree_positions)))
                    elif word in HAVE:
                        my_rules_return_vector.append(
                            truth(
                                havecheck(sentdict, i, tree,
                                          subtree_positions)))
                    elif word in DO:
                        my_rules_return_vector.append(
                            truth(docheck(sentdict, i, tree,
                                          subtree_positions)))
                    elif word in TO:
                        my_rules_return_vector.append(
                            truth(tocheck(sentdict, i, tree,
                                          subtree_positions)))
                    elif word in SO:
                        my_rules_return_vector.append(
                            truth(socheck(sentdict, i, tree,
                                          subtree_positions)))

    classifier.compare(gs_vector,
                       my_rules_return_vector,
                       section_start - 1,
                       verbose=False)
Esempio n. 7
0
    def test_rules(self, train_auxs):
        f = lambda x: 1 if x else 0

        predictions = []
        for i in range(len(train_auxs)):
            aux = train_auxs[i]
            sendict = self.sentences[aux.sentnum]
            tree = sendict.get_nltk_tree()
            word_subtree_positions = nt.get_smallest_subtree_positions(tree)

            if aux.type == 'modal': predictions.append(f(wc.modal_rule(sendict, aux, tree, word_subtree_positions)))
            elif aux.type == 'be': predictions.append(f(wc.be_rule(sendict, aux)))
            elif aux.type == 'have': predictions.append(f(wc.have_rule(sendict, aux)))
            elif aux.type == 'do': predictions.append(f(wc.do_rule(sendict, aux, tree, word_subtree_positions)))
            elif aux.type == 'so': predictions.append(f(wc.so_rule(sendict, aux)))
            elif aux.type == 'to': predictions.append(f(wc.to_rule(sendict, aux)))

        return predictions
Esempio n. 8
0
File: vpesvm.py Progetto: kiankd/vpe
def testmyrules(classifier, section_start, section_end):
    gs_vector = classifier.getgsdata(section_start, section_end)

    aux_start,aux_end = classifier.section_split[section_start], classifier.section_split[section_end]

    my_rules_return_vector = []
    count = 0
    for sentdict in classifier.each_sentence.sentences:
        for i in range(0,len(sentdict['lemmas'])):
            word = sentdict['lemmas'][i]
            if isauxiliary(sentdict, i):
                count += 1
                if aux_start < count <= aux_end:
                    tree = NT.maketree(sentdict['tree'][0])
                    subtree_positions = NT.get_smallest_subtree_positions(tree)
                    if word in MODALS: my_rules_return_vector.append(truth(modalcheck(sentdict, i, tree, subtree_positions))) #Todo: I modified these b/c they were incorrectly written.
                    elif word in BE: my_rules_return_vector.append(truth(becheck(sentdict, i, tree, subtree_positions)))
                    elif word in HAVE: my_rules_return_vector.append(truth(havecheck(sentdict, i, tree, subtree_positions)))
                    elif word in DO: my_rules_return_vector.append(truth(docheck(sentdict, i, tree, subtree_positions)))
                    elif word in TO: my_rules_return_vector.append(truth(tocheck(sentdict, i, tree, subtree_positions)))
                    elif word in SO: my_rules_return_vector.append(truth(socheck(sentdict, i, tree, subtree_positions)))

    classifier.compare(gs_vector, my_rules_return_vector, section_start-1, verbose=False)
Esempio n. 9
0
def myfeaturesvector(sentdict, idx, features):
    vector = []

    tree = NT.maketree(sentdict['tree'][0])
    subtrees = NT.getsmallestsubtrees(tree)
    subtree_positions = NT.get_smallest_subtree_positions(
        tree, subtree_list=subtrees)
    aux = sentdict['lemmas'][idx]

    if 'my_features' in features:
        vector.append(
            truth(DV.auxccommandsverb(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(
                DV.auxccommandsverbthatcomesafter(sentdict, idx, tree,
                                                  subtree_positions)))
        vector.append(
            truth(
                DV.auxisccommandedbyverb(sentdict, idx, tree,
                                         subtree_positions)))
        vector.append(
            truth(
                DV.auxislocallyccommandedbyverb(sentdict, idx, tree,
                                                subtree_positions)))
        vector.append(
            truth(
                DV.auxlocallyccommandsverb(sentdict, idx, tree,
                                           subtree_positions)))
        vector.append(
            truth(
                DV.isccommandedbycontinuationword(sentdict, idx, tree,
                                                  subtree_positions)))
        vector.append(
            truth(DV.nexttopunct(sentdict, idx, tree, subtree_positions)))
        vector.append(truth(DV.isfollowedbypunct(sentdict, idx, end=['.'])))
        vector.append(
            truth(DV.previouswordisasorsoorthan(sentdict['words'], idx)))
        vector.append(truth(DV.thesamecheck(sentdict['words'], idx)))
        vector.append(truth(DV.toprecedesaux(sentdict, idx)))
        vector.append(truth(DV.verbfollowsaux(sentdict, idx)))

        # TODO: added this new feature!
        vector.append(truth(DV.nextwordistoo(sentdict, idx)))

    if 'my_rules' in features:
        vector.append(
            truth(aux in DV.MODALS
                  and DV.modalcheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.BE
                  and DV.becheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.HAVE
                  and DV.havecheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.DO
                  and DV.docheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.TO
                  and DV.tocheck(sentdict, idx, tree, subtree_positions)))
        vector.append(
            truth(aux in DV.SO
                  and DV.socheck(sentdict, idx, tree, subtree_positions)))

        # This adds a new layer of features by combining all of the ones I had.
    if 'square_rules' in features:
        size = len(vector)
        for i in range(0, size):
            for j in range(0, size):
                if i != j:
                    vector.append(
                        truth(untruth(vector[i]) and untruth(vector[j])))

    if 'combine_aux_type' in features:
        bools = [
            aux in DV.MODALS, aux in DV.BE, aux in DV.HAVE, aux in DV.DO, aux
            in DV.TO, aux in DV.SO
        ]
        vec = [v for v in vector]
        for v in vec:
            for b in bools:
                vector.append(truth(untruth(v) and b))

    return vector
Esempio n. 10
0
    def test_my_rules(self, original_rules=False, idxs=None):
        self.predictions = []
        print 'Length of test set: %d, length of All_auxs-training vectors: %d' % (
            len(self.test_classes),
            len(self.all_auxiliaries) - len(self.train_vectors))
        for i in range(self.pre_oversample_length, len(self.all_auxiliaries)):
            if idxs == None or i in idxs:
                aux = self.all_auxiliaries.get_aux(i)
                sendict = self.sentences.get_sentence(aux.sentnum)
                tree = sendict.get_nltk_tree()
                word_subtree_positions = nt.get_smallest_subtree_positions(
                    tree)

                if not original_rules:
                    if aux.type == 'modal':
                        self.predictions.append(
                            vc.bool_to_int(
                                wc.modal_rule(sendict, aux, tree,
                                              word_subtree_positions)))
                    elif aux.type == 'be':
                        self.predictions.append(
                            vc.bool_to_int(wc.be_rule(sendict, aux)))
                    elif aux.type == 'have':
                        self.predictions.append(
                            vc.bool_to_int(wc.have_rule(sendict, aux)))
                    elif aux.type == 'do':
                        self.predictions.append(
                            vc.bool_to_int(
                                wc.do_rule(sendict, aux, tree,
                                           word_subtree_positions)))
                    elif aux.type == 'so':
                        self.predictions.append(
                            vc.bool_to_int(wc.so_rule(sendict, aux)))
                    elif aux.type == 'to':
                        self.predictions.append(
                            vc.bool_to_int(wc.to_rule(sendict, aux)))
                else:
                    auxidx = aux.wordnum
                    if aux.type == 'modal':
                        self.predictions.append(
                            vc.bool_to_int(
                                dv.modalcheck(sendict, auxidx, tree,
                                              word_subtree_positions)))
                    elif aux.type == 'be':
                        self.predictions.append(
                            vc.bool_to_int(
                                dv.becheck(sendict, auxidx, tree,
                                           word_subtree_positions)))
                    elif aux.type == 'have':
                        self.predictions.append(
                            vc.bool_to_int(
                                dv.havecheck(sendict, auxidx, tree,
                                             word_subtree_positions)))
                    elif aux.type == 'do':
                        self.predictions.append(
                            vc.bool_to_int(
                                dv.docheck(sendict, auxidx, tree,
                                           word_subtree_positions)))
                    elif aux.type == 'so':
                        self.predictions.append(
                            vc.bool_to_int(
                                dv.socheck(sendict, auxidx, tree,
                                           word_subtree_positions)))
                    elif aux.type == 'to':
                        self.predictions.append(
                            vc.bool_to_int(
                                dv.tocheck(sendict, auxidx, tree,
                                           word_subtree_positions)))