def __cons_and_start_subprocess_component(command, arguments, input_forming_func, output_forming_func, state_mutator = None): args = [command] args.extend(arguments) pipe = subprocess.Popen(args, stdin = subprocess.PIPE, stdout = subprocess.PIPE) def get_process_function(process_pipe): def process_function(a, s): new_a = None if a: print >> process_pipe.stdin, str(a).strip() process_pipe.stdin.flush() new_a = process_pipe.stdout.readline().strip() return new_a return process_function try: arrow = cons_function_component(get_process_function(pipe), input_forming_func, output_forming_func, state_mutator) except Exception, ex: pipe.terminate() pipe.wait() raise ex
def test_pypeline_with_subprocess_and_function_components(self): if sys.platform.startswith('win'): self.fail("Currently only this unit test is only supported on non-Windows platforms") rev_msg_one = "reverse(1)" rev_msg_two = "reverse(2)" upper_msg = "upper" reverse_command = os.path.join("src", "pypeline", "helpers", "tests", "reverse.sh") comp_proc_one = PypelineHelperUnitTest.__cons_and_start_subprocess_component( reverse_command, tuple(), lambda a, s: str(a['input']), lambda a, s: {'output': str(a)}, state_mutator = lambda s: s.append(rev_msg_one) or s) try: comp_proc_two = PypelineHelperUnitTest.__cons_and_start_subprocess_component( reverse_command, tuple(), lambda a, s: str(a['input']), lambda a, s: {'output': str(a)}, state_mutator = lambda s: s.append(rev_msg_two) or s) try: comp_one = comp_proc_one[0] comp_two = comp_proc_two[0] upper_func = lambda a_string, s: a_string.upper() comp_three = cons_function_component(upper_func, state_mutator = lambda s: s.append(upper_msg) or s) input_wire_func = lambda a, s: {'input': a} input_wire = cons_wire(input_wire_func) wire = cons_dictionary_wire({'output': 'input'}) output_to_string_func = lambda a, s: str(a['output']) to_upper_wire = cons_wire(output_to_string_func) output_wire_func = lambda a, s: str(a['output']) output_wire = cons_wire(output_wire_func) pipeline = cons_pipeline(input_wire, cons_wired_components(comp_one, comp_two, wire), to_upper_wire) pipeline = cons_composed_component(pipeline, comp_three) value = "hello world" target = (upper_func(value, None), [rev_msg_one, rev_msg_two, upper_msg]) result = run_pipeline(pipeline, "hello world", list()) self.assertEquals(target, result) finally: comp_proc_two[1].terminate() comp_proc_two[1].wait() finally: comp_proc_one[1].terminate() comp_proc_one[1].wait()
def test_pypeline_with_split_and_unsplit_wires(self): if sys.platform.startswith('win'): self.fail("Currently only this unit test is only supported on non-Windows platforms") rev_msg_one = "reverse(subprocess)" rev_msg_two = "reverse(function)" reverse_command = os.path.join("src", "pypeline", "helpers", "tests", "reverse.sh") reverse_func = lambda a, s: a[::-1] input_func = lambda a, s: str(a['input']) output_func = lambda a, s: {'output': str(a)} comp_proc_one = PypelineHelperUnitTest.__cons_and_start_subprocess_component( reverse_command, tuple(), input_func, output_func, state_mutator = lambda s: s.append(rev_msg_one) or s) try: comp_one = comp_proc_one[0] comp_two = cons_function_component( reverse_func, input_func, output_func, state_mutator = lambda s: s.append(rev_msg_two) or s) parallel_reverse_comp = cons_parallel_component(comp_one, comp_two) split_wire = cons_split_wire() unsplit_func = lambda a, b: {'subprocess_output' : a['output'], 'function_output': b['output']} unsplit_wire = cons_unsplit_wire(unsplit_func) input_wire = cons_wire(lambda a, s: {'input': a}) pipeline = cons_pipeline(input_wire, cons_composed_component(split_wire, parallel_reverse_comp), unsplit_wire) value = "hello world" result = run_pipeline(pipeline, "hello world", list()) target_dict = {'output': reverse_func(value, None)} target_value = unsplit_func(target_dict, target_dict) target = (target_value, [rev_msg_one, rev_msg_two]) self.assertEquals(target, result) finally: comp_proc_one[1].terminate() comp_proc_one[1].wait()
def initialise(config): def _filter(limit, ifh1, ofh1, ifh2, ofh2): def _short(line): n = 0 for c in line: if c == " ": n += 1 #print(line, ":", n) return n < limit for (l1, l2) in zip(ifh1, ifh2): if _short(l1) and _short(l2): print(l1, end='', file=ofh1) print(l2, end='', file=ofh2) def _filter_main(config, value): limit = config['segment_length'] (ifh1, ifh2, ofh1, ofh2) = (None, None, None, None) try: ifh1 = open(value['src_filename'], "r") ifh2 = open(value['trg_filename'], "r") ofh1 = open(value['cleaned_src_filename'], "w") ofh2 = open(value['cleaned_trg_filename'], "w") _filter(limit, ifh1, ofh1, ifh2, ofh2) return {'cleaned_src_filename': value['cleaned_src_filename'], 'cleaned_trg_filename': value['cleaned_trg_filename']} finally: def _safe_close(fh): if fh is not None: fh.close() _safe_close(ifh1) _safe_close(ifh2) _safe_close(ofh1) _safe_close(ofh2) return cons_function_component(_filter_main)
if __name__ == '__main__': from pypeline.helpers.helpers import eval_pipeline, cons_function_component lm_dir = os.environ["PWD"] configuration = { 'irstlm_root': os.environ["IRSTLM"], 'irstlm_smoothing_method': 'improved-kneser-ney', 'language_model_directory': lm_dir } component_config = configure(configuration) component = initialise(component_config) value = eval_pipeline( cons_function_component(component), { 'input_filename': '/Users/ianjohnson/Dropbox/Documents/MTM2012/tokenised_files/news-commentary-v7.fr-en.tok.en' }, component_config) target = { 'add_start_end_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.sb.en'), 'lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.lm.en.gz'), 'compiled_lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.arpa.en') } print "Target: %s" % target if value != target: raise Exception("Massive fail!")
output = {'add_start_end_filename': start_end_output_filename, 'lm_filename': lm_filename, 'compiled_lm_filename': compiled_lm_filename} print "IRSTLM Build: Output = %s" % output return output return process if __name__ == '__main__': from pypeline.helpers.helpers import eval_pipeline, cons_function_component lm_dir = os.environ["PWD"] configuration = {'irstlm_root': os.environ["IRSTLM"], 'irstlm_smoothing_method': 'improved-kneser-ney', 'language_model_directory': lm_dir} component_config = configure(configuration) component = initialise(component_config) value = eval_pipeline(cons_function_component(component), {'input_filename': '/Users/ianjohnson/Dropbox/Documents/MTM2012/tokenised_files/news-commentary-v7.fr-en.tok.en'}, component_config) target = {'add_start_end_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.sb.en'), 'lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.lm.en.gz'), 'compiled_lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.arpa.en')} print "Target: %s" % target if value != target: raise Exception("Massive fail!")