Ejemplo n.º 1
0
            text = utils.textClean(np.getText().lower()).strip()
            if (text in data.ALL_PRONOUNS):
                continue

            #if specificity_utils.isProper(np):
            #    continue
            anaphor_np = gold_nps.getAnnotBySpan(np.getStart(), np.getEnd())
            if anaphor_np["PROPER_NAME"] != "true" and anaphor_np["PROPER_NOUN"] != "true":
                continue

            #print text
            add_stats(nominals, doc, np, text)

        #true singletons -- TODO: double check that these numbers are correct
        for key in list(nominals.keys()):
            nominals[key].singletons += doc.getSingletonCount(key)

    sys.stdout.write("\r \r\n")

    #histogram for sentence distance
    nominals_total_sent_histo = {}
    for key in list(nominals.keys()):
        h = nominals[key].sent_distance_histogram()
        for dist in list(h.keys()):
            nominals_total_sent_histo[dist] = nominals_total_sent_histo.get(dist, 0) + h[dist]

    #combine histograms
    make_chart(nominals_total_sent_histo, DATASET, "sentence_distance", "Sentence Distance",
            max_x=10)

    #histogram for word distance
Ejemplo n.º 2
0
            text = utils.textClean(np.getText().lower()).strip()
            if text in data.THIRD_PERSON:
                #then it is he, him, she
                add_stats(third_person, doc, np, text)
            elif (text in data.IT) and (text != "i"):
                #then we have 'it' or 'its'
                add_stats(it, doc, np, text)
            elif text in data.THIRD_PERSON_PLURAL:
                #we have 'they' or 'them'
                add_stats(third_person_plural, doc, np, text)
            else:
                continue

        #true singletons -- TODO: double check that these numbers are correct
        for key in list(it.keys()):
            it[key].singletons += doc.getSingletonCount(key)
        for key in list(third_person.keys()):
            third_person[key].singletons += doc.getSingletonCount(key)
        for key in list(third_person_plural.keys()):
            third_person_plural[key].singletons += doc.getSingletonCount(key)

        #this word exists outside of annotations
        #This needs to take place at the document level and cycle over all the
        #pronouns we want to check

    sys.stdout.write("\r \r\n")
    #print "{0:5} : {1}".format("it", it["it"].getCount())
    #print it["it"].wd_distance_histogram()
    #print it["it"].sent_distance_histogram()
    #print "{0} : total antecedents".format(sum(it["it"].sent_distance_histogram().values()))
    #print "{0} : total antecedents".format(sum(it["it"].wd_distance_histogram().values()))
Ejemplo n.º 3
0
            elif specificity_utils.isNominal(np):
                add_stats(noun_classes["nominal"], doc, np, text)
                #sys.stderr.write("{0}\n".format(text))
            elif specificity_utils.isProper(np):
                add_stats(noun_classes["proper"], doc, np, text)
                #sys.stderr.write("{0}\n".format(text))
            else:
                #sys.stderr.write("Word not found: {0}\n".format(text))
                continue

        #true singletons -- TODO double check that these numbers are correct
        #this word exists outside of annotations
        #This needs to take place at the document level and cycle over all the
        for cls in list(noun_classes.keys()):
            for word in noun_classes[cls]:
                noun_classes[cls][word].singletons += doc.getSingletonCount(
                    word)

    sys.stdout.write("\r \r\n")

    #TODO printount the stats
    #with open("nouns.stats", "a") as outFile:
    for cls in sorted(noun_classes.keys()):
        total_antecendents = 0
        total_productivity = 0
        total_nominal_antecedents = 0
        total_proper_antecedents = 0
        total_pronoun_antecedents = 0
        total_subject_antecedents = 0
        total_object_antecedents = 0
        total_self_subject = 0
        total_self_object = 0