def compare_hlds_variants(): """ TODO: kill bugs BUG1: sentence001-original-test contains 2(!) <item> sentences. """ hlds_reader = hlds.HLDSReader(hlds.testbed_file) for i, sentence in enumerate(hlds_reader.sentences): xml_sentence_test = hlds.create_hlds_file(sentence, mode="test", output="xml") util.write_to_file(xml_sentence_test, "xmltest/sentence{0}-converted-test.xml".format(str(i).zfill(3))) xml_sentence_realize = hlds.create_hlds_file(sentence, mode="test", output="xml") util.write_to_file(xml_sentence_test, "xmltest/sentence{0}-converted-realize.xml".format(str(i).zfill(3))) for i, item_etree in enumerate(hlds_reader.xml_sentences): root = etree.Element("regression") doc = etree.ElementTree(root) root.insert(0, item_etree) xml_sentence_original = hlds.etreeprint(doc) util.write_to_file(xml_sentence_original, "xmltest/sentence{0}-original-test.xml".format(str(i).zfill(3)))
def compare_hlds_variants(): """ TODO: kill bugs BUG1: sentence001-original-test contains 2(!) <item> sentences. """ hlds_reader = hlds.HLDSReader(hlds.testbed_file) for i, sentence in enumerate(hlds_reader.sentences): xml_sentence_test = hlds.create_hlds_file(sentence, mode="test", output="xml") util.write_to_file( xml_sentence_test, "xmltest/sentence{0}-converted-test.xml".format(str(i).zfill(3))) xml_sentence_realize = hlds.create_hlds_file(sentence, mode="test", output="xml") util.write_to_file( xml_sentence_test, "xmltest/sentence{0}-converted-realize.xml".format( str(i).zfill(3))) for i, item_etree in enumerate(hlds_reader.xml_sentences): root = etree.Element("regression") doc = etree.ElementTree(root) root.insert(0, item_etree) xml_sentence_original = hlds.etreeprint(doc) util.write_to_file( xml_sentence_original, "xmltest/sentence{0}-original-test.xml".format(str(i).zfill(3)))
def main(): """ This is the pypolibox commandline interface. It allows you to query the database and generate book recommendatins, which will either be handed to OpenCCG for generating sentences or printed to stdout in an XML format representing the text plans. """ query = Query(sys.argv[1:]) output_format = query.query_args.output_format valid_output_formats = ['openccg', 'hlds', 'textplan-xml', 'textplan-featstruct'] if output_format not in valid_output_formats: sys.stderr.write("Output format must be one of: {}\n".format(valid_output_formats)) sys.exit(1) textplans = generate_textplans(query) if output_format == 'openccg': openccg = initialize_openccg() print "{} text plans will be generated.".format(len(textplans.document_plans)) for i, textplan in enumerate(textplans.document_plans): print "Generating text plan #%i:\n" % i check_and_realize_textplan(openccg, textplan) elif output_format == 'hlds': from copy import deepcopy from hlds import (Diamond, Sentence, diamond2sentence, add_nom_prefixes, create_hlds_file) for i, textplan in enumerate(textplans.document_plans): print "Text plan #%i:\n" % i # TODO: refactor to avoid code duplication w/ # check_and_realize_textplan() msg_blocks = linearize_textplan(textplan) for msg_block in msg_blocks: try: lexicalized_msg_block = lexicalize_message_block(msg_block) print "The {0} message block can be realized " \ "as follows:\n".format(msg_block[Feature("msgType")]) for lexicalized_phrase in lexicalized_msg_block: lexicalized_sentence = phrase2sentence(lexicalized_phrase) # TODO: refactor to avoid duplication w/ OpenCCG.realize temp_sentence = deepcopy(lexicalized_sentence) if isinstance(lexicalized_sentence, Diamond): temp_sentence = diamond2sentence(temp_sentence) add_nom_prefixes(temp_sentence) print create_hlds_file(temp_sentence, mode="realize", output="xml") except NotImplementedError, err: print err print "The message block contains these messages:\n", msg_block, \ "\n\n**********\n\n"
def realize(self, featstruct, raw_output=True): """ converts a ``Diamond`` or ``Sentence`` feature structure into HLDS-XML, write it to a temporary file, realizes this file with ``tccg`` and parses the output it returns. :type featstruct: ``Diamond`` or ``Sentence`` """ temp_sentence = deepcopy(featstruct) if isinstance(featstruct, Diamond): temp_sentence = diamond2sentence(temp_sentence) add_nom_prefixes(temp_sentence) sentence_xml_str = create_hlds_file(temp_sentence, mode="realize", output="xml") tmp_file = open("pypolibox-tccg.tmp", "w") tmp_file.write(sentence_xml_str) tmp_file.close() tmp_file_path = os.path.abspath(tmp_file.name) self.tccg_output = self.realize_hlds(tmp_file_path) #os.remove(tmp_file_path) return parse_tccg_generator_output(self.tccg_output)
def realize(self, featstruct, raw_output=True): """ converts a C{Diamond} or C{Sentence} feature structure into HLDS-XML, write it to a temporary file, realizes this file with I{tccg} and parses the output it returns. @type featstruct: C{Diamond} or C{Sentence} """ temp_sentence = deepcopy(featstruct) if isinstance(featstruct, Diamond): temp_sentence = diamond2sentence(temp_sentence) add_nom_prefixes(temp_sentence) sentence_xml_str = create_hlds_file(temp_sentence, mode="realize", output="xml") tmp_file = open("pypolibox-tccg.tmp", "w") tmp_file.write(sentence_xml_str) tmp_file.close() tmp_file_path = os.path.abspath(tmp_file.name) self.tccg_output = self.realize_hlds(tmp_file_path) #os.remove(tmp_file_path) return parse_tccg_generator_output(self.tccg_output)
def main(): """ This is the pypolibox commandline interface. It allows you to query the database and generate book recommendatins, which will either be handed to OpenCCG for generating sentences or printed to stdout in an XML format representing the text plans. """ query = Query(sys.argv[1:]) output_format = query.query_args.output_format valid_output_formats = ['openccg', 'hlds', 'textplan-xml', 'textplan-featstruct'] if output_format not in valid_output_formats: sys.stderr.write("Output format must be one of: {}\n".format(valid_output_formats)) sys.exit(1) try: lexicalize_messageblocks = \ __import__("lexicalize_messageblocks_%s" % query.query_args.output_language, globals(), locals(), [], -1) except ImportError: raise try: lexicalization = \ __import__("lexicalization_%s" % query.query_args.output_language, globals(), locals(), [], -1) except ImportError: raise lexicalize_message_block = lexicalize_messageblocks.lexicalize_message_block phrase2sentence = lexicalization.phrase2sentence textplans = generate_textplans(query) if output_format == 'openccg': openccg = initialize_openccg(lang=query.query_args.output_language) print "{} text plans will be generated.".format(len(textplans.document_plans)) for i, textplan in enumerate(textplans.document_plans): print "Generating text plan #%i:\n" % i check_and_realize_textplan(openccg, textplan, lexicalize_message_block, phrase2sentence) elif output_format == 'hlds': from copy import deepcopy from hlds import (Diamond, Sentence, diamond2sentence, add_nom_prefixes, create_hlds_file) for i, textplan in enumerate(textplans.document_plans): print "Text plan #%i:\n" % i # TODO: refactor to avoid code duplication w/ # check_and_realize_textplan() msg_blocks = linearize_textplan(textplan) for msg_block in msg_blocks: try: lexicalized_msg_block = lexicalize_message_block(msg_block) print "The {0} message block can be realized " \ "as follows:\n".format(msg_block[Feature("msgType")]) for lexicalized_phrase in lexicalized_msg_block: lexicalized_sentence = phrase2sentence(lexicalized_phrase) # TODO: refactor to avoid duplication w/ OpenCCG.realize temp_sentence = deepcopy(lexicalized_sentence) if isinstance(lexicalized_sentence, Diamond): temp_sentence = diamond2sentence(temp_sentence) add_nom_prefixes(temp_sentence) print create_hlds_file(temp_sentence, mode="realize", output="xml") except NotImplementedError, err: print err print "The message block contains these messages:\n", msg_block, \ "\n\n**********\n\n"