예제 #1
0
def processACE(f, np, heads2qp):
    ace_annots = reconcile.parseGoldAnnots(f)
    stanford_deps = reconcile.getStanfordDep(f)
    gold_chains = reconcile.getGoldChains(f)
    ace_np = ace_annots.getAnnotBySpan(np.getStart(), np.getEnd())

    if ace_np["is_nominal"]:
        head = utils.textClean(ace_np["HEAD"].strip().lower())
        text = utils.textClean(np.getText())

        #bookkeeping
        if head not in list(heads2qp.keys()):
            heads2qp[head] = QuasiPronoun(head)
        else:
            heads2qp[head].updateDocs(f)
            heads2qp[head].updateCount()

        if ace_np["GOLD_SINGLETON"]:
            heads2qp[head].singelton += 1
        else:
            #does it start the chain?
            for gc in list(gold_chains.keys()):
                if gold_chains[gc][0] == np:
                    heads2qp[head].starts_chain += 1
                    break

        process(f, np, head, text, heads2qp, stanford_deps)
예제 #2
0
def getACECommonNouns(f):
    nps = reconcile.getNPs(f)
    gold_nps = reconcile.parseGoldAnnots(f)
    common_nps = AnnotationSet("common_nouns")
    for np in nps:
        gold_np = gold_nps.getAnnotBySpan(np.getStart(), np.getEnd())
        if not gold_np["GOLD_SINGLETON"] and gold_np["is_nominal"]:
            common_nps.add(np)
    return common_nps
예제 #3
0
def gold_singletons(base_directory):
    nps = reconcile.getNPs_annots(base_directory)
    golds = reconcile.parseGoldAnnots(base_directory)
    existentials = []

    for n in nps:
        for g in golds:
            if n.contains(g) or g.contains(n):
                break
        else:
            if n not in existentials:
                existentials.append(n)
    return existentials
예제 #4
0
def processACE(f, head2qp):
    global USE_GOLD
    ace_annots = reconcile.parseGoldAnnots(f)
    nps = reconcile.getNPs(f)
    stanford_deps = reconcile.getStanfordDep(f)
    gold_chains = reconcile.getGoldChains(f)
    for np in nps:
        ace_np = ace_annots.getAnnotBySpan(np.getStart(), np.getEnd())
        head = None
        text = None
        if PRONOUNS:
            if qp_utils.isPronoun(np):
                head = ace_np["HEAD"].lower()
                text = np.getText()
            else:
                continue
        else:
            if ace_np["is_nominal"]:
                head = utils.textClean(ace_np["HEAD"].strip().lower())
                text = utils.textClean(np.getText())
            else:
                continue

        #bookkeeping
        if head not in list(head2qp.keys()):
            head2qp[head] = QuasiPronoun(head)
        else:
            head2qp[head].updateDocs(f)
            head2qp[head].updateCount()

        if ace_np["GOLD_SINGLETON"]:
            head2qp[head].singleton += 1
            if (text.startswith("a ") or text.startswith("an ")):
                head2qp[head].faux_ba += 1
        else:
            #does it start the chain?
            if USE_GOLD:
                process_gold(f, np, head, text, head2qp, gold_chains)
        process_syntax(f, np, head, text, head2qp, stanford_deps)
    #set up for bare definites
    #            ACE_HEADS.append("the " + line)
    #            ACE_HEADS.append("that " + line)
    #            ACE_HEADS.append("this " + line)
    #            used.append(line)
    #set up for all commons
    #ACE_HEADS.append(line)

    for f in files:
        f = f.strip()
        print("Working on file: {0}".format(f))
        this_files_common_nouns = []
        if ACE:
            tokens = reconcile.getTokens(f)
            pos = reconcile.getPOS(f)
            ace_annots = reconcile.parseGoldAnnots(f)
            this_files_common_nouns_orig = []
            collectACEFPs(ace_annots, this_files_common_nouns_orig)

            #remove post modded commons
            for fp in this_files_common_nouns_orig:
                if not checkForModification(fp, tokens, pos):
                    this_files_common_nouns.append(fp)
        else:
            gold_nps = reconcile.getNPs(f)
            collectFPs(gold_nps, this_files_common_nouns)

        #output common nouns to file
        i = 0
        with open(f + "/annotations/faux_pronouns", 'w') as outFile:
            for annot in this_files_common_nouns:
예제 #6
0
# Created By : Nathan Gilbert
#
import sys

from pyconcile import reconcile
from pyconcile import data

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: %s <first-argument>" % (sys.argv[0]))
        sys.exit(1)

    V = []
    fList = open(sys.argv[1], 'r')
    for f in fList:
        f = f.strip()
        if f.startswith("#"):
            continue

        gold_annots = reconcile.parseGoldAnnots(f, True)

        for g in gold_annots:
            text = g.getText().replace("\n", "").replace("\r", "")
            if text in data.ALL_PRONOUNS:
                continue
            if text not in V:
                V.append(text)

    for v in V:
        print("\"%s\"," % v)
    stats_file = open(sys.argv[2], 'r')
    for line in stats_file:
        if line.startswith("TEXT:"):
            text = line.replace("TEXT:", "").strip()
            commons.append(text)
    stats_file.close()

    test_docs = open(sys.argv[1], 'r')

    found_gold = 0
    found_response = 0
    total_gold_nps = 0
    total_response_nps = 0
    for doc in test_docs:
        doc = doc.strip()
        gold_nps = reconcile.parseGoldAnnots(doc)
        response_nps = reconcile.getNPs_annots(doc)

        for a in gold_nps:
            total_gold_nps += 1
            #print a.getATTR("TEXT_CLEAN").lower()
            if a.getATTR("TEXT_CLEAN").lower() in commons:
                found_gold += 1

        for a in response_nps:
            total_response_nps += 1
            if a.getATTR("TEXT_CLEAN").lower() in commons:
                found_response += 1
    test_docs.close()

    print("Found: %d/%d (%0.2f) from stats" %
    text2docs2spans = defaultdict(dict)
    class2annots = defaultdict(list)

    #text -> semantic_class -> decision 
    cache = defaultdict(dict)
    readInCache(cache)
    needed_annotations = 0

    for f in filelist:
        if f.startswith("#"):
            continue

        f = f.strip()

        gold_annots = reconcile.parseGoldAnnots(options.dir + "/" + f)
        annots = getSpecificNPs(gold_annots)
        needed_annotations = len(annots)

        #for a in annots:
        #    if a.getATTR("GOLD_SEMANTIC") in ("ORGANIZATION", "GPE"):
        #        class2annots["ORG"].append(a)
        #    elif a.getATTR("GOLD_SEMANTIC") == "PERSON":
        #        class2annots["PER"].append(a)
        #    elif a.getATTR("GOLD_SEMANTIC") == "LOC":
        #        class2annots["LOC"].append(a)

        #actually perform the annotations   
        #for sem in class2annots.keys():
        #check the cache to make see if we already have an answer
        num = 0