Esempio n. 1
0
def compare_hlds_variants():
    """
    TODO: kill bugs
    
    BUG1: sentence001-original-test contains 2(!) <item> sentences.
    
    """
    hlds_reader = hlds.HLDSReader(hlds.testbed_file)
    for i, sentence in enumerate(hlds_reader.sentences):
        xml_sentence_test = hlds.create_hlds_file(sentence, mode="test", 
                                                     output="xml")
        util.write_to_file(xml_sentence_test, 
                      "xmltest/sentence{0}-converted-test.xml".format(str(i).zfill(3)))

        xml_sentence_realize = hlds.create_hlds_file(sentence, mode="test", 
                                                        output="xml")        
        util.write_to_file(xml_sentence_test, 
                      "xmltest/sentence{0}-converted-realize.xml".format(str(i).zfill(3)))

    for i, item_etree in enumerate(hlds_reader.xml_sentences):
        root = etree.Element("regression")
        doc = etree.ElementTree(root)
        root.insert(0, item_etree)
        xml_sentence_original = hlds.etreeprint(doc)
        util.write_to_file(xml_sentence_original, 
                      "xmltest/sentence{0}-original-test.xml".format(str(i).zfill(3)))
Esempio n. 2
0
def compare_hlds_variants():
    """
    TODO: kill bugs
    
    BUG1: sentence001-original-test contains 2(!) <item> sentences.
    
    """
    hlds_reader = hlds.HLDSReader(hlds.testbed_file)
    for i, sentence in enumerate(hlds_reader.sentences):
        xml_sentence_test = hlds.create_hlds_file(sentence,
                                                  mode="test",
                                                  output="xml")
        util.write_to_file(
            xml_sentence_test,
            "xmltest/sentence{0}-converted-test.xml".format(str(i).zfill(3)))

        xml_sentence_realize = hlds.create_hlds_file(sentence,
                                                     mode="test",
                                                     output="xml")
        util.write_to_file(
            xml_sentence_test,
            "xmltest/sentence{0}-converted-realize.xml".format(
                str(i).zfill(3)))

    for i, item_etree in enumerate(hlds_reader.xml_sentences):
        root = etree.Element("regression")
        doc = etree.ElementTree(root)
        root.insert(0, item_etree)
        xml_sentence_original = hlds.etreeprint(doc)
        util.write_to_file(
            xml_sentence_original,
            "xmltest/sentence{0}-original-test.xml".format(str(i).zfill(3)))
Esempio n. 3
0
def main():
    """
    This is the pypolibox commandline interface. It allows you to query
    the database and generate book recommendatins, which will either be
    handed to OpenCCG for generating sentences or printed to stdout in
    an XML format representing the text plans.
    """
    query = Query(sys.argv[1:])

    output_format = query.query_args.output_format
    valid_output_formats = ['openccg', 'hlds', 'textplan-xml', 'textplan-featstruct']
    if output_format not in valid_output_formats:
        sys.stderr.write("Output format must be one of: {}\n".format(valid_output_formats))
        sys.exit(1)

    textplans = generate_textplans(query)

    if output_format == 'openccg':
        openccg = initialize_openccg()
        print "{} text plans will be generated.".format(len(textplans.document_plans))
        for i, textplan in enumerate(textplans.document_plans):
            print "Generating text plan #%i:\n" % i
            check_and_realize_textplan(openccg, textplan)
    elif output_format == 'hlds':
        from copy import deepcopy
        from hlds import (Diamond, Sentence, diamond2sentence,
            add_nom_prefixes, create_hlds_file)

        for i, textplan in enumerate(textplans.document_plans):
            print "Text plan #%i:\n" % i

            # TODO: refactor to avoid code duplication w/
            # check_and_realize_textplan()
            msg_blocks = linearize_textplan(textplan)
            for msg_block in msg_blocks:
                try:
                    lexicalized_msg_block = lexicalize_message_block(msg_block)
                    print "The {0} message block can be realized " \
                          "as follows:\n".format(msg_block[Feature("msgType")])
                    for lexicalized_phrase in lexicalized_msg_block:
                        lexicalized_sentence = phrase2sentence(lexicalized_phrase)

                        # TODO: refactor to avoid duplication w/ OpenCCG.realize
                        temp_sentence = deepcopy(lexicalized_sentence)

                        if isinstance(lexicalized_sentence, Diamond):
                            temp_sentence = diamond2sentence(temp_sentence)

                        add_nom_prefixes(temp_sentence)
                        print create_hlds_file(temp_sentence,
                            mode="realize", output="xml")

                except NotImplementedError, err:
                    print err
                    print "The message block contains these messages:\n", msg_block, \
                          "\n\n**********\n\n"
Esempio n. 4
0
    def realize(self, featstruct, raw_output=True):
        """
        converts a ``Diamond`` or ``Sentence`` feature structure into HLDS-XML,
        write it to a temporary file, realizes this file with ``tccg`` and
        parses the output it returns.

        :type featstruct: ``Diamond`` or ``Sentence``
        """
        temp_sentence = deepcopy(featstruct)

        if isinstance(featstruct, Diamond):
            temp_sentence = diamond2sentence(temp_sentence)

        add_nom_prefixes(temp_sentence)
        sentence_xml_str = create_hlds_file(temp_sentence,
                                            mode="realize",
                                            output="xml")

        tmp_file = open("pypolibox-tccg.tmp", "w")
        tmp_file.write(sentence_xml_str)
        tmp_file.close()
        tmp_file_path = os.path.abspath(tmp_file.name)
        self.tccg_output = self.realize_hlds(tmp_file_path)
        #os.remove(tmp_file_path)
        return parse_tccg_generator_output(self.tccg_output)
Esempio n. 5
0
    def realize(self, featstruct, raw_output=True):
        """
        converts a C{Diamond} or C{Sentence} feature structure into HLDS-XML,
        write it to a temporary file, realizes this file with I{tccg} and
        parses the output it returns.

        @type featstruct: C{Diamond} or C{Sentence}
        """
        temp_sentence = deepcopy(featstruct)
        
        if isinstance(featstruct, Diamond):
            temp_sentence = diamond2sentence(temp_sentence)

        add_nom_prefixes(temp_sentence)
        sentence_xml_str = create_hlds_file(temp_sentence, mode="realize",
                                            output="xml")
    
        tmp_file = open("pypolibox-tccg.tmp", "w")
        tmp_file.write(sentence_xml_str)
        tmp_file.close()
        tmp_file_path = os.path.abspath(tmp_file.name)
        self.tccg_output = self.realize_hlds(tmp_file_path)
        #os.remove(tmp_file_path)
        return parse_tccg_generator_output(self.tccg_output)
Esempio n. 6
0
def main():
    """
    This is the pypolibox commandline interface. It allows you to query
    the database and generate book recommendatins, which will either be
    handed to OpenCCG for generating sentences or printed to stdout in
    an XML format representing the text plans.
    """
    query = Query(sys.argv[1:])

    output_format = query.query_args.output_format
    valid_output_formats = ['openccg', 'hlds', 'textplan-xml', 'textplan-featstruct']
    if output_format not in valid_output_formats:
        sys.stderr.write("Output format must be one of: {}\n".format(valid_output_formats))
        sys.exit(1)

    try:
        lexicalize_messageblocks = \
            __import__("lexicalize_messageblocks_%s" % query.query_args.output_language, globals(), locals(), [], -1)
    except ImportError:
        raise

    try:
        lexicalization = \
            __import__("lexicalization_%s" % query.query_args.output_language, globals(), locals(), [], -1)
    except ImportError:
        raise

    lexicalize_message_block = lexicalize_messageblocks.lexicalize_message_block
    phrase2sentence = lexicalization.phrase2sentence


    textplans = generate_textplans(query)

    if output_format == 'openccg':
        openccg = initialize_openccg(lang=query.query_args.output_language)
        print "{} text plans will be generated.".format(len(textplans.document_plans))
        for i, textplan in enumerate(textplans.document_plans):
            print "Generating text plan #%i:\n" % i
            check_and_realize_textplan(openccg, textplan, lexicalize_message_block, phrase2sentence)
    elif output_format == 'hlds':
        from copy import deepcopy
        from hlds import (Diamond, Sentence, diamond2sentence,
            add_nom_prefixes, create_hlds_file)

        for i, textplan in enumerate(textplans.document_plans):
            print "Text plan #%i:\n" % i

            # TODO: refactor to avoid code duplication w/
            # check_and_realize_textplan()
            msg_blocks = linearize_textplan(textplan)
            for msg_block in msg_blocks:
                try:
                    lexicalized_msg_block = lexicalize_message_block(msg_block)
                    print "The {0} message block can be realized " \
                          "as follows:\n".format(msg_block[Feature("msgType")])
                    for lexicalized_phrase in lexicalized_msg_block:
                        lexicalized_sentence = phrase2sentence(lexicalized_phrase)

                        # TODO: refactor to avoid duplication w/ OpenCCG.realize
                        temp_sentence = deepcopy(lexicalized_sentence)

                        if isinstance(lexicalized_sentence, Diamond):
                            temp_sentence = diamond2sentence(temp_sentence)

                        add_nom_prefixes(temp_sentence)
                        print create_hlds_file(temp_sentence,
                            mode="realize", output="xml")

                except NotImplementedError, err:
                    print err
                    print "The message block contains these messages:\n", msg_block, \
                          "\n\n**********\n\n"