Ejemplo n.º 1
0
 def run(self):
     logging.info('running QA...')
     input_file = self.cfg.get('qa', 'input_file')
     for entry in QAParser.parse_file(input_file):
         logging.info('processing text...')
         all_text = "\n".join([doc['text'] for doc in entry['docs']])
         model = self.text_to_4lang.process(
             all_text, dep_dir=self.dep_dir, fn='text')
         print_text_graph(model, self.graph_dir)
         model_graph = MachineGraph.create_from_machines(model.values())
         for question in entry['questions']:
             answer = self.answer_question(question, model, model_graph)
             print answer['text']
Ejemplo n.º 2
0
 def run(self):
     logging.info('running QA...')
     input_file = self.cfg.get('qa', 'input_file')
     for entry in QAParser.parse_file(input_file):
         logging.info('processing text...')
         all_text = "\n".join([doc['text'] for doc in entry['docs']])
         model = self.text_to_4lang.process(all_text,
                                            dep_dir=self.dep_dir,
                                            fn='text')
         print_text_graph(model, self.graph_dir)
         model_graph = MachineGraph.create_from_machines(model.values())
         for question in entry['questions']:
             answer = self.answer_question(question, model, model_graph)
             print answer['text']
Ejemplo n.º 3
0
    def process_deps(self, fn, name="none"):
        sen_machines = []
        c = 0
        for line in open(fn):
            data = json.loads(line)
            deps, corefs = data['deps'], data['corefs']
            for sen_deps in deps:
                # logging.info("processing sentences...")
                machines = self.dep_to_4lang.get_machines_from_deps_and_corefs(
                    [sen_deps], corefs)

                print("process_deps")
                print(machines)

                if self.expand:
                    if self.abstract:
                        self.dep_to_4lang.lexicon_exp.expand(machines,
                                                             abstract=True)
                    else:
                        self.dep_to_4lang.lexicon.expand(machines,
                                                         abstract=False)

                if self.cfg.getboolean('text', 'expand'):
                    self.dep_to_4lang.lexicon.expand(machines)

                if self.cfg.getboolean('text', 'print_graphs'):
                    fn = print_text_graph(machines, self.graphs_dir, fn=name)

                sen_machines.append(machines)
                c += 1

        return sen_machines
Ejemplo n.º 4
0
def main_sen_sim(cfg):
    graph_dir = cfg.get("sim", "graph_dir")
    dep_dir = cfg.get("sim", "deps_dir")
    ensure_dir(graph_dir)
    ensure_dir(dep_dir)

    text_to_4lang = TextTo4lang(cfg)
    for i, line in enumerate(sys.stdin):
        preprocessed_line = line.decode("utf-8").strip().lower()
        sen1, sen2 = preprocessed_line.split("\t")
        machines1 = text_to_4lang.process(sen1, dep_dir=dep_dir, fn="{0}a".format(i))
        machines2 = text_to_4lang.process(sen2, dep_dir=dep_dir, fn="{0}b".format(i))

        print_text_graph(machines1, graph_dir, fn="{0}a".format(i))
        print_text_graph(machines2, graph_dir, fn="{0}b".format(i))

        graph1, graph2 = map(MachineGraph.create_from_machines, (machines1.values(), machines2.values()))
        print GraphSimilarity.graph_similarity(graph1, graph2)
Ejemplo n.º 5
0
    def answer_question(self, question, model, model_graph):
        logging.info('processing question: {0}...'.format(question['text']))
        question['machines'] = self.text_to_4lang.process(
            question['text'], dep_dir=self.dep_dir,
            fn="q{0}".format(question['id']))

        for answer in question['answers']:
            logging.info('processing answer: {0}...'.format(answer['text']))
            answer['machines'] = self.text_to_4lang.process(
                answer['text'], dep_dir=self.dep_dir,
                fn="q{0}a{1}".format(question['id'], answer['id']))
            print_text_graph(
                answer['machines'], self.graph_dir, fn="q{0}a{1}".format(
                    question['id'], answer['id']))
            self.score_answer(answer, model, model_graph)
            logging.info('score: {0}, evidence: {1}'.format(
                answer['score'], answer['evidence']))

        top_answer = sorted(question['answers'], key=lambda a: -a['score'])[0]
        return top_answer
Ejemplo n.º 6
0
def main_sen_sim(cfg):
    graph_dir = cfg.get("sim", "graph_dir")
    dep_dir = cfg.get("sim", "deps_dir")
    ensure_dir(graph_dir)
    ensure_dir(dep_dir)

    text_to_4lang = TextTo4lang(cfg)
    for i, line in enumerate(sys.stdin):
        preprocessed_line = line.decode('utf-8').strip().lower()
        sen1, sen2 = preprocessed_line.split('\t')
        machines1 = text_to_4lang.process(
            sen1, dep_dir=dep_dir, fn="{0}a".format(i))
        machines2 = text_to_4lang.process(
            sen2, dep_dir=dep_dir, fn="{0}b".format(i))

        print_text_graph(machines1, graph_dir, fn="{0}a".format(i))
        print_text_graph(machines2, graph_dir, fn="{0}b".format(i))

        graph1, graph2 = map(
            MachineGraph.create_from_machines,
            (machines1.values(), machines2.values()))
        print GraphSimilarity.graph_similarity(graph1, graph2)
Ejemplo n.º 7
0
    def answer_question(self, question, model, model_graph):
        logging.info('processing question: {0}...'.format(question['text']))
        question['machines'] = self.text_to_4lang.process(question['text'],
                                                          dep_dir=self.dep_dir,
                                                          fn="q{0}".format(
                                                              question['id']))

        for answer in question['answers']:
            logging.info('processing answer: {0}...'.format(answer['text']))
            answer['machines'] = self.text_to_4lang.process(
                answer['text'],
                dep_dir=self.dep_dir,
                fn="q{0}a{1}".format(question['id'], answer['id']))
            print_text_graph(answer['machines'],
                             self.graph_dir,
                             fn="q{0}a{1}".format(question['id'],
                                                  answer['id']))
            self.score_answer(answer, model, model_graph)
            logging.info('score: {0}, evidence: {1}'.format(
                answer['score'], answer['evidence']))

        top_answer = sorted(question['answers'], key=lambda a: -a['score'])[0]
        return top_answer
Ejemplo n.º 8
0
    def process_phrase(self, phrase):
        preproc_sens = []
        preproc_sens.append(TextTo4lang.preprocess_text(
                phrase.strip().decode('utf-8')))
        deps, corefs, _ = self.parser_wrapper.parse_text("\n".join(preproc_sens))
        machine = self.dep_to_4lang.get_machines_from_deps_and_corefs(
            [deps[0]], corefs)
        if self.cfg.getboolean('text', 'expand'):
            self.dep_to_4lang.lexicon.expand(machine)

        file_name = phrase.replace(' ', '_')
        file_name = file_name.replace('.', '')

        if self.cfg.getboolean('text', 'print_graphs'):
            fn = print_text_graph(machine, self.graphs_dir, fn=file_name)
        return machine
Ejemplo n.º 9
0
    def process_phrase(self, phrase):
        preproc_sens = []
        preproc_sens.append(
            TextTo4lang.preprocess_text(phrase.strip().decode('utf-8')))
        deps, corefs, _ = self.parser_wrapper.parse_text(
            "\n".join(preproc_sens))
        machine = self.dep_to_4lang.get_machines_from_deps_and_corefs(
            [deps[0]], corefs)
        if self.cfg.getboolean('text', 'expand'):
            self.dep_to_4lang.lexicon.expand(machine)

        file_name = phrase.replace(' ', '_')
        file_name = file_name.replace('.', '')

        if self.cfg.getboolean('text', 'print_graphs'):
            fn = print_text_graph(machine, self.graphs_dir, fn=file_name)
        return machine
Ejemplo n.º 10
0
def main():
    logging.basicConfig(
        level=__LOGLEVEL__,
        format="%(asctime)s : " +
        "%(module)s (%(lineno)s) - %(levelname)s - %(message)s")
    cfg_file = sys.argv[1] if len(sys.argv) > 1 else None
    max_sens = int(sys.argv[2]) if len(sys.argv) > 2 else None

    cfg = get_cfg(cfg_file)
    text_to_4lang = TextTo4lang(cfg)

    input_fn = cfg.get('data', 'input_sens')
    sens = [line.strip() for line in open(input_fn)]
    if max_sens is not None:
        sens = sens[:max_sens]

    words_to_machines = text_to_4lang.process(sens, print_deps=True)
    fn = print_text_graph(words_to_machines, cfg.get('machine', 'graph_dir'))
    logging.info('wrote graph to {0}'.format(fn))
Ejemplo n.º 11
0
def main():
    logging.basicConfig(
        level=__LOGLEVEL__,
        format="%(asctime)s : " +
        "%(module)s (%(lineno)s) - %(levelname)s - %(message)s")
    cfg_file = sys.argv[1] if len(sys.argv) > 1 else None
    max_sens = int(sys.argv[2]) if len(sys.argv) > 2 else None

    cfg = get_cfg(cfg_file)
    text_to_4lang = TextTo4lang(cfg)

    input_fn = cfg.get('data', 'input_sens')
    sens = [line.strip() for line in open(input_fn)]
    if max_sens is not None:
        sens = sens[:max_sens]

    words_to_machines = text_to_4lang.process(sens, print_deps=True)
    fn = print_text_graph(words_to_machines, cfg.get('machine', 'graph_dir'))
    logging.info('wrote graph to {0}'.format(fn))
Ejemplo n.º 12
0
    def process_deps(self, fn):
        sen_machines = []
        c = 0
        for line in open(fn):
            data = json.loads(line)
            deps, corefs = data['deps'], data['corefs']
            for sen_deps in deps:
                # logging.info("processing sentences...")
                machines = self.dep_to_4lang.get_machines_from_deps_and_corefs(
                    [sen_deps], corefs)
                if self.cfg.getboolean('text', 'expand'):
                    self.dep_to_4lang.lexicon.expand(machines)

                if self.cfg.getboolean('text', 'print_graphs'):
                    fn = print_text_graph(machines, self.graphs_dir, fn=c)

                sen_machines.append(machines)
                c += 1

        return sen_machines