Ejemplo n.º 1
0
     def __cons_and_start_subprocess_component(command,
                                               arguments,
                                               input_forming_func,
                                               output_forming_func,
                                               state_mutator = None):
          args = [command]
          args.extend(arguments)
          pipe = subprocess.Popen(args,
                                  stdin = subprocess.PIPE,
                                  stdout = subprocess.PIPE)

          def get_process_function(process_pipe):
               def process_function(a, s):
                    new_a = None
                    if a:
                         print >> process_pipe.stdin, str(a).strip()
                         process_pipe.stdin.flush()
                         new_a = process_pipe.stdout.readline().strip()
                    return new_a
               return process_function

          try:
               arrow = cons_function_component(get_process_function(pipe),
                                               input_forming_func,
                                               output_forming_func,
                                               state_mutator)
          except Exception, ex:
               pipe.terminate()
               pipe.wait()
               raise ex
Ejemplo n.º 2
0
     def test_pypeline_with_subprocess_and_function_components(self):
          if sys.platform.startswith('win'):
               self.fail("Currently only this unit test is only supported on non-Windows platforms")
               
          rev_msg_one = "reverse(1)"
          rev_msg_two = "reverse(2)"
          upper_msg = "upper"

          reverse_command = os.path.join("src", "pypeline", "helpers", "tests", "reverse.sh")

          comp_proc_one = PypelineHelperUnitTest.__cons_and_start_subprocess_component(
               reverse_command, tuple(),
               lambda a, s: str(a['input']),
               lambda a, s: {'output': str(a)},
               state_mutator = lambda s: s.append(rev_msg_one) or s)
          try:
               comp_proc_two = PypelineHelperUnitTest.__cons_and_start_subprocess_component(
                    reverse_command, tuple(),
                    lambda a, s: str(a['input']),
                    lambda a, s: {'output': str(a)},
                    state_mutator = lambda s: s.append(rev_msg_two) or s)
               try:
                    comp_one = comp_proc_one[0]
                    comp_two = comp_proc_two[0]

                    upper_func = lambda a_string, s: a_string.upper()
                    comp_three = cons_function_component(upper_func,
                                                         state_mutator = lambda s: s.append(upper_msg) or s)

                    input_wire_func = lambda a, s: {'input': a}
                    input_wire = cons_wire(input_wire_func)
    
                    wire = cons_dictionary_wire({'output': 'input'})

                    output_to_string_func = lambda a, s: str(a['output'])
                    to_upper_wire = cons_wire(output_to_string_func)

                    output_wire_func = lambda a, s: str(a['output'])
                    output_wire = cons_wire(output_wire_func)

                    pipeline = cons_pipeline(input_wire,
                                             cons_wired_components(comp_one, comp_two, wire),
                                             to_upper_wire)
                    pipeline = cons_composed_component(pipeline, comp_three)

                    value = "hello world"
                    target = (upper_func(value, None), [rev_msg_one, rev_msg_two, upper_msg])
                    result = run_pipeline(pipeline, "hello world", list())

                    self.assertEquals(target, result)
               finally:
                    comp_proc_two[1].terminate()
                    comp_proc_two[1].wait()
          finally:
               comp_proc_one[1].terminate()
               comp_proc_one[1].wait()
Ejemplo n.º 3
0
     def test_pypeline_with_split_and_unsplit_wires(self):
          if sys.platform.startswith('win'):
               self.fail("Currently only this unit test is only supported on non-Windows platforms")
               
          rev_msg_one = "reverse(subprocess)"
          rev_msg_two = "reverse(function)"

          reverse_command = os.path.join("src", "pypeline", "helpers", "tests", "reverse.sh")

          reverse_func = lambda a, s: a[::-1]
          input_func = lambda a, s: str(a['input'])
          output_func = lambda a, s: {'output': str(a)}

          comp_proc_one = PypelineHelperUnitTest.__cons_and_start_subprocess_component(
               reverse_command, tuple(),
               input_func,
               output_func,
               state_mutator = lambda s: s.append(rev_msg_one) or s)
          try:
               comp_one = comp_proc_one[0]
               comp_two = cons_function_component(
                    reverse_func,
                    input_func,
                    output_func,
                    state_mutator = lambda s: s.append(rev_msg_two) or s)

               parallel_reverse_comp = cons_parallel_component(comp_one, comp_two)
               split_wire = cons_split_wire()
               unsplit_func = lambda a, b: {'subprocess_output' : a['output'],
                                            'function_output': b['output']}
               unsplit_wire = cons_unsplit_wire(unsplit_func)
               input_wire = cons_wire(lambda a, s: {'input': a})
               pipeline = cons_pipeline(input_wire,
                                        cons_composed_component(split_wire, parallel_reverse_comp),
                                        unsplit_wire)

               value = "hello world"
               result = run_pipeline(pipeline, "hello world", list())
               target_dict = {'output': reverse_func(value, None)}
               target_value = unsplit_func(target_dict, target_dict)
               target = (target_value, [rev_msg_one, rev_msg_two])
               self.assertEquals(target, result)
          finally:
               comp_proc_one[1].terminate()
               comp_proc_one[1].wait()
Ejemplo n.º 4
0
def initialise(config):
  def _filter(limit, ifh1, ofh1, ifh2, ofh2):
    def _short(line):
      n = 0
      for c in line:
        if c == " ":
          n += 1
      #print(line, ":", n)
      return n < limit

    for (l1, l2) in zip(ifh1, ifh2):
      if _short(l1) and _short(l2):
        print(l1, end='', file=ofh1)
        print(l2, end='', file=ofh2)

  def _filter_main(config, value):
    limit = config['segment_length']
    (ifh1, ifh2, ofh1, ofh2) = (None, None, None, None)
    try:
      ifh1 = open(value['src_filename'], "r")
      ifh2 = open(value['trg_filename'], "r")
      ofh1 = open(value['cleaned_src_filename'], "w")
      ofh2 = open(value['cleaned_trg_filename'], "w")

      _filter(limit, ifh1, ofh1, ifh2, ofh2)

      return {'cleaned_src_filename': value['cleaned_src_filename'],
              'cleaned_trg_filename': value['cleaned_trg_filename']}
    finally:
      def _safe_close(fh):
        if fh is not None:
          fh.close()
      _safe_close(ifh1)
      _safe_close(ifh2)
      _safe_close(ofh1)
      _safe_close(ofh2)
    
  return cons_function_component(_filter_main)
Ejemplo n.º 5
0

if __name__ == '__main__':
    from pypeline.helpers.helpers import eval_pipeline, cons_function_component

    lm_dir = os.environ["PWD"]
    configuration = {
        'irstlm_root': os.environ["IRSTLM"],
        'irstlm_smoothing_method': 'improved-kneser-ney',
        'language_model_directory': lm_dir
    }
    component_config = configure(configuration)
    component = initialise(component_config)

    value = eval_pipeline(
        cons_function_component(component), {
            'input_filename':
            '/Users/ianjohnson/Dropbox/Documents/MTM2012/tokenised_files/news-commentary-v7.fr-en.tok.en'
        }, component_config)
    target = {
        'add_start_end_filename':
        os.path.join(lm_dir, 'news-commentary-v7.fr-en.sb.en'),
        'lm_filename':
        os.path.join(lm_dir, 'news-commentary-v7.fr-en.lm.en.gz'),
        'compiled_lm_filename':
        os.path.join(lm_dir, 'news-commentary-v7.fr-en.arpa.en')
    }
    print "Target: %s" % target
    if value != target:
        raise Exception("Massive fail!")
Ejemplo n.º 6
0
        output = {'add_start_end_filename': start_end_output_filename,
                  'lm_filename': lm_filename,
                  'compiled_lm_filename': compiled_lm_filename}

        print "IRSTLM Build: Output = %s" % output

        return output

    return process


if __name__ == '__main__':
    from pypeline.helpers.helpers import eval_pipeline, cons_function_component

    lm_dir = os.environ["PWD"]
    configuration = {'irstlm_root': os.environ["IRSTLM"],
                     'irstlm_smoothing_method': 'improved-kneser-ney',
                     'language_model_directory': lm_dir}
    component_config = configure(configuration)
    component = initialise(component_config)

    value = eval_pipeline(cons_function_component(component),
                          {'input_filename': '/Users/ianjohnson/Dropbox/Documents/MTM2012/tokenised_files/news-commentary-v7.fr-en.tok.en'},
                          component_config)
    target = {'add_start_end_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.sb.en'),
              'lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.lm.en.gz'),
              'compiled_lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.arpa.en')}
    print "Target: %s" % target
    if value != target:
        raise Exception("Massive fail!")