Ejemplo n.º 1
0
def ioput_baseline_predict(ff):
    with open(ff, 'r') as f:
        text = f.read().splitlines()

    output_yaml = {}

    for i, orig_text in enumerate(text):
        rp = RParser()
        #Preprocess
        pre_p = PreProcessor()
        process_text, sub_table = pre_p.process([orig_text.strip()])
        sen = process_text[0]
        #RParse
        verb_parent, method_parent = rp.parse_v_method(sen)
        ioput_phrases = rp.parse_input_output(sen, verb_parent, method_parent)
        NPs = rp.return_NPs(sen)
        #Postprocess
        #post_p = PostProcessor(verb_parent, method_parent, ioput_phrases, orig_text.strip(), sub_table)
        #summary = post_p.process()
        summary = {}
        summary['action1'] = {}
        for ioput_seq_num, np in enumerate(NPs):
            summary['action1']['input_output_{}'.format(ioput_seq_num+1)] = repr(np)

        output_yaml['sen{}'.format(i+1)] = summary

    file_path = ff.split('/')
    output_file_name_list = file_path[-1].split('.')
    output_file_name_list[1] = 'ioput_baseline'
    output_file_name_list[2] = 'yaml'
    file_path[-1] = '.'.join(output_file_name_list)
    output_file_path = '/'.join(file_path)
    with open(output_file_path, 'w') as f:
        f.write(yaml.dump(output_yaml, default_flow_style=False))
Ejemplo n.º 2
0
def ioput_baseline_predict(ff):
    with open(ff, 'r') as f:
        text = f.read().splitlines()

    output_yaml = {}

    for i, orig_text in enumerate(text):
        rp = RParser()
        #Preprocess
        pre_p = PreProcessor()
        process_text, sub_table = pre_p.process([orig_text.strip()])
        sen = process_text[0]
        #RParse
        verb_parent, method_parent = rp.parse_v_method(sen)
        ioput_phrases = rp.parse_input_output(sen, verb_parent, method_parent)
        NPs = rp.return_NPs(sen)
        #Postprocess
        #post_p = PostProcessor(verb_parent, method_parent, ioput_phrases, orig_text.strip(), sub_table)
        #summary = post_p.process()
        summary = {}
        summary['action1'] = {}
        for ioput_seq_num, np in enumerate(NPs):
            summary['action1']['input_output_{}'.format(ioput_seq_num +
                                                        1)] = repr(np)

        output_yaml['sen{}'.format(i + 1)] = summary

    file_path = ff.split('/')
    output_file_name_list = file_path[-1].split('.')
    output_file_name_list[1] = 'ioput_baseline'
    output_file_name_list[2] = 'yaml'
    file_path[-1] = '.'.join(output_file_name_list)
    output_file_path = '/'.join(file_path)
    with open(output_file_path, 'w') as f:
        f.write(yaml.dump(output_yaml, default_flow_style=False))
Ejemplo n.º 3
0
__author__ = 'Shaun Rong'
__version__ = '0.1'
__maintainer__ = 'Shaun Rong'
__email__ = '*****@*****.**'


with open('environ.yaml', 'r') as f:
    env = yaml.load(f)

stanford_parser_folder = env['stanford_parser_folder']

os.environ['STANFORD_PARSER'] = stanford_parser_folder
os.environ['STANFORD_MODELS'] = stanford_parser_folder

cfuf = PreProcessor()
with open('data/3.raw.txt', 'r') as f:
    text = f.read().splitlines()

process_text, sub_table = cfuf.process(text)


sen = process_text[3]

rp = RParser()

verb_parent, method_parent = rp.parse_v_method(sen)

print verb_parent
print method_parent
Ejemplo n.º 4
0
    stanford_parser_folder = env['stanford_parser_folder']
    os.environ['STANFORD_PARSER'] = stanford_parser_folder
    os.environ['STANFORD_MODELS'] = stanford_parser_folder

    tree_parser = stanford.StanfordParser(model_path=env['model_path'])

    for ff in train_file:
        with open(os.path.join(args.f, "{}.raw.txt".format(ff)), 'r') as f:
            text = f.read().splitlines()

        with open(os.path.join(args.f, "{}.gold.yaml".format(ff)), 'r') as f:
            gold_ticket = yaml.load(f)

        for i, orig_text in enumerate(text):
            rp = RParser()
            #Preprocess
            pre_p = PreProcessor()
            process_text, sub_table = pre_p.process([orig_text.strip()])
            sen = process_text[0]
            #RParse
            NPs = rp.return_NPs(sen)

            gold_NPs = extract_gold_NPs(gold_ticket['sen{}'.format(i+1)], sub_table, tree_parser)

            for NP in NPs:
                if NP in gold_NPs:
                    train_summary['input_output'].append(repr(NP))
                else:
                    train_summary['else'].append(repr(NP))
Ejemplo n.º 5
0
from PreProcessor import PreProcessor
from RParser import RParser

__author__ = 'Shaun Rong'
__version__ = '0.1'
__maintainer__ = 'Shaun Rong'
__email__ = '*****@*****.**'

with open('environ.yaml', 'r') as f:
    env = yaml.load(f)

stanford_parser_folder = env['stanford_parser_folder']

os.environ['STANFORD_PARSER'] = stanford_parser_folder
os.environ['STANFORD_MODELS'] = stanford_parser_folder

cfuf = PreProcessor()
with open('data/3.raw.txt', 'r') as f:
    text = f.read().splitlines()

process_text, sub_table = cfuf.process(text)

sen = process_text[3]

rp = RParser()

verb_parent, method_parent = rp.parse_v_method(sen)

print verb_parent
print method_parent