def __init__(self, config): # parse config file parser = SafeConfigParser() parser.read(config) self.Aligner_object = Aligner_IBM1(parser) self.Extractor_object = Extractor_Moses(parser) self.Annotator_object = Annotator_onlinecache(parser) self.logger = logging.getLogger('translation_log.updater')
def __init__(self, config): # parse config file parser = SafeConfigParser() parser.read(config) self.Aligner_object = Aligner_GIZA(parser) self.Extractor_object = Extractor_Moses(parser) self.Annotator_object = Annotator_onlinecache(parser) self.logger = logging.getLogger('translation_log.updater')
class UpdaterProc(object): def __init__(self, config): # parse config file parser = SafeConfigParser() parser.read(config) self.Aligner_object = Aligner_onlineGIZA(parser) self.Extractor_object = Extractor_Moses(parser) self.Annotator_object = Annotator_onlinecache(parser) self.logger = logging.getLogger('translation_log.updater') def update(self, source="", target=""): # get alignment information for the (source,correction) self.log("ALIGNER_INPUT source: " + str(source)) self.log("ALIGNER_INPUT correction: " + str(target)) aligner_output = self.Aligner_object.align(source=source, correction=target) self.log("ALIGNER_OUTPUT: " + str(aligner_output)) # get phrase pairs form the alignment information bias, new, full = self.Extractor_object.extract_phrases( source, target, aligner_output) self.log("BIAS: " + str(bias)) self.log("NEW: " + str(new)) self.log("FULL: " + str(full)) self.Annotator_object.cbtm_update(new=new, bias=bias, full=full) self.Annotator_object.cblm_update(target) # read and annotate the next sentence dummy_source = "" annotated_source = self.Annotator_object.annotate(dummy_source) return annotated_source def reset(self): annotated_source = '' annotated_source = annotated_source + '<dlt cblm-command="clear"/>' annotated_source = annotated_source + '<dlt cbtm-command="clear"/>' return annotated_source def log(self, message): self.logger.info(message)
class UpdaterProc(object): def __init__(self, config): # parse config file parser = SafeConfigParser() parser.read(config) self.Aligner_object = Aligner_GIZA(parser) self.Extractor_object = Extractor_Moses(parser) self.Annotator_object = Annotator_onlinecache(parser) self.logger = logging.getLogger('translation_log.updater') def update(self, source="", target=""): # get alignment information for the (source,correction) self.log("ALIGNER_INPUT source: "+str(source)) self.log("ALIGNER_INPUT correction: "+str(target)) aligner_output = self.Aligner_object.align(source=source,correction=target) self.log("ALIGNER_OUTPUT: "+str(aligner_output)) # get phrase pairs form the alignment information bias, new, full = self.Extractor_object.extract_phrases(source,target,aligner_output) self.log("BIAS: "+str(bias)) self.log("NEW: "+str(new)) self.log("FULL: "+str(full)) self.Annotator_object.cbtm_update(new=new, bias=bias, full=full) self.Annotator_object.cblm_update(target) # read and annotate the next sentence dummy_source = "" annotated_source = self.Annotator_object.annotate(dummy_source) return annotated_source def reset(self): annotated_source = '' annotated_source = annotated_source + '<dlt cblm-command="clear"/>' annotated_source = annotated_source + '<dlt cbtm-command="clear"/>' return annotated_source def log(self, message): self.logger.info(message)
sys.exit(1) if extractor_type == "Moses": Extractor_object = Extractor_Moses(parser) elif extractor_type == "Constrained_Search": Extractor_object = Extractor_Constrained_Search(parser) elif extractor_type == "Dummy": Extractor_object = Extractor_Dummy(parser) else: logging.info("This extractor tool is UNKNOWN") sys.exit(1) if annotator_type == "onlinexml": Annotator_object = Annotator_onlinexml(parser) elif annotator_type == "onlinecache": Annotator_object = Annotator_onlinecache(parser) else: logging.info("This annotation tool is UNKNOWN:") sys.exit(1) # main loop # initialize: first sentence has no history source = input.readline().strip() annotated_source = source s_id = 1 while source: logging.info(str(s_id)) # talk to decoder logging.info("DECODER_IN: " + annotated_source) decoder_out, decoder_err = Decoder_object.communicate(annotated_source) logging.info("DECODER_OUT: " + decoder_out)