Exemple #1
0
def call_cort(text_blob):

    mention_features = [
        features.fine_type, features.gender, features.number,
        features.sem_class, features.deprel, features.head_ner,
        features.length, features.head, features.first, features.last,
        features.preceding_token, features.next_token, features.governor,
        features.ancestry
    ]

    pairwise_features = [
        features.exact_match, features.head_match, features.same_speaker,
        features.alias, features.sentence_distance, features.embedding,
        features.modifier, features.tokens_contained, features.head_contained,
        features.token_distance
    ]

    # todo make sure these are exact!
    model_abs = '/Users/ryanpanos/Documents/code/cort_experiments/models/model-pair-train+dev.obj'  #OMG evil!
    # perceptron_path = 'cort.coreference.approaches.mention_ranking.RankingPerceptron'
    # extractor_path = ' cort.coreference.approaches.mention_ranking.extract_substructures'
    perceptron_path = 'cort.coreference.approaches.mention_ranking.RankingPerceptron'
    extractor_path = ' coreference.approaches.mention_ranking.extract_substructures'
    corenlp_path = '/Users/ryanpanos/Documents/code/StanfordNLP/stanford-corenlp-full-2016-10-31/'  #OMG evil!
    clusterer_path = 'cort.coreference.clusterer.all_ante'

    # logging.info("Loading model.")
    print("Loading model . ... (this takes a while) ")
    priors, weights = pickle.load(open(model_abs, "rb"))
    print("Model loaded.")

    # perceptron = import_helper.import_from_path(perceptron_path)(
    #     priors=priors,
    #     weights=weights,
    #     cost_scaling=0
    # )

    perceptron = RankingPerceptron(priors=priors,
                                   weights=weights,
                                   cost_scaling=0)

    extractor = instance_extractors.InstanceExtractor(
        # import_helper.import_from_path(extractor_path),
        extract_substructures,
        mention_features,
        pairwise_features,
        cost_functions.null_cost,
        perceptron.get_labels())

    logging.info("Reading in and preprocessing data.")
    p = pipeline.Pipeline(corenlp_path)

    testing_corpus = p.run_on_blob("corpus", text_blob)

    logging.info("Extracting system mentions.")
    for doc in testing_corpus:
        doc.system_mentions = mention_extractor.extract_system_mentions(doc)

    mention_entity_mapping, antecedent_mapping = experiments.predict(
        testing_corpus,
        extractor,
        perceptron,
        # import_helper.import_from_path(clusterer_path)
        all_ante)

    testing_corpus.read_coref_decisions(mention_entity_mapping,
                                        antecedent_mapping)

    logging.info("Write output to file.")

    output_ls = []
    for doc in testing_corpus:
        output = doc.to_simple_output()
        # my_file = codecs.open(doc.identifier + "." + args.suffix, "w", "utf-8")
        # my_file.write(output)
        print " output: \n" + output
        # my_file.close()
        output_ls.append(output)

    logging.info("Done.")

    return
    pairwise_features = [
        features.exact_match, features.head_match, features.same_speaker,
        features.alias, features.sentence_distance, features.embedding,
        features.modifier, features.tokens_contained, features.head_contained,
        features.token_distance
    ]

logging.info("Loading model.")
priors, weights = pickle.load(open(args.model, "rb"))

perceptron = import_helper.import_from_path(args.perceptron)(priors=priors,
                                                             weights=weights,
                                                             cost_scaling=0)

extractor = instance_extractors.InstanceExtractor(
    import_helper.import_from_path(args.extractor), mention_features,
    pairwise_features, cost_functions.null_cost, perceptron.get_labels())

logging.info("Reading in data.")
testing_corpus = corpora.Corpus.from_file(
    "testing", codecs.open(args.input_filename, "r", "utf-8"))

logging.info("Extracting system mentions.")
for doc in testing_corpus:
    doc.system_mentions = mention_extractor.extract_system_mentions(doc)

mention_entity_mapping, antecedent_mapping = experiments.predict(
    testing_corpus, extractor, perceptron,
    import_helper.import_from_path(args.clusterer))

testing_corpus.read_coref_decisions(mention_entity_mapping, antecedent_mapping)
Exemple #3
0
    def __init__(self):
        mention_features = [
            features.fine_type, features.gender, features.number,
            features.sem_class, features.deprel, features.head_ner,
            features.length, features.head, features.first, features.last,
            features.preceding_token, features.next_token, features.governor,
            features.ancestry
        ]

        pairwise_features = [
            features.exact_match, features.head_match, features.same_speaker,
            features.alias, features.sentence_distance, features.embedding,
            features.modifier, features.tokens_contained,
            features.head_contained, features.token_distance
        ]

        self.extractor = instance_extractors.InstanceExtractor(
            mention_ranking.extract_substructures, mention_features,
            pairwise_features, cost_functions.null_cost)

        logging.info("Loading model.")

        priors, weights = pickle.load(open("latent-model-train.obj", "rb"))

        self.perceptron = mention_ranking.RankingPerceptron(priors=priors,
                                                            weights=weights,
                                                            cost_scaling=0)

        logging.info("Loading CoreNLP models.")
        self.p = pipeline.Pipeline(
            "/home/sebastian/Downloads/stanford-corenlp-full-2015-04-20")

        self.root = tki.Tk()
        self.root.title("cort Demo")

        # create a Frame for the Text and Scrollbar
        self.txt_frm = tki.Frame(self.root, width=400, height=200)
        self.txt_frm.pack(fill="both", expand=True)

        # ensure a consistent GUI size
        self.txt_frm.grid_propagate(False)

        # implement stretchability
        self.txt_frm.grid_rowconfigure(0, weight=1)
        self.txt_frm.grid_columnconfigure(0, weight=1)

        # create a Text widget
        self.txt = tki.Text(self.txt_frm, borderwidth=3, relief="sunken")
        self.txt.config(font=("consolas", 12), undo=True, wrap='word')
        self.txt.grid(row=0, column=0, sticky="nsew", padx=2, pady=2)

        # create a Scrollbar and associate it with txt
        scrollb = tki.Scrollbar(self.txt_frm, command=self.txt.yview)
        scrollb.grid(row=0, column=1, sticky='nsew')
        self.txt['yscrollcommand'] = scrollb.set

        self.button = tki.Button(self.root,
                                 text='Resolve Coreference',
                                 command=self.do_coreference)

        self.button.pack()