def __message_strkey_tupleval2xml(msg, key, val): """ helper function for __message2xml(). converts those parts of a message to XML whose key is a string (i.e. most of them). @type xml_msg: C{etree._Element} @type msg_key: C{Feature} @type msg_val: C{frozenset} or C{str} """ value, rating = val if isinstance(value, frozenset): # authors, keywords, proglangs etc. msgkey = etree.SubElement(msg, key, rating=rating) for element in value: msgval = etree.SubElement(msgkey, "value") msgval.text = ensure_unicode(element) else: # isinstance(value, (str, int)) # title, language, year, pages etc. msgkey = etree.SubElement(msg, key, value=ensure_unicode(value), rating=rating)
def __diamond_fs2xml(diamond): """ converts a {Diamond} feature structure into its corresponding HLDS XML structure (stored in an etree element). :type diamond: ``Diamond`` :param diamond: a Diamond feature structure containing nom? prop? diamond* elements :rtype: ``etree._Element`` :return: a Diamond in HLDS XML tree notation, represented as an etree element """ E = ElementMaker() NOM = E.nom PROP = E.prop DIAMOND = E.diamond diamond_etree = DIAMOND(mode=ensure_unicode(diamond[Feature("mode")])) if "prop" in diamond: diamond_etree.insert(0, PROP(name=ensure_unicode(diamond["prop"]))) if "nom" in diamond: # if present, nom(inal) has to be the first argument/sub tag of a diamond diamond_etree.insert(0, NOM(name=ensure_unicode(diamond["nom"]))) subdiamonds = [] for key in sorted(diamond.keys()): # keys need to be sorted, otherwise Diamonds within a Sentence will have a # different order than before. Diamond keys seem ordered, but they aren't # (keys beginning with numbers seem to be in descending order, those # beginning with letters in ascending order) if isinstance(diamond[key], Diamond): subdiamonds.append(diamond[key]) etree_subdiamonds = [] for subdiamond in subdiamonds: etree_subdiamonds.append(__diamond_fs2xml(subdiamond)) for subdiamond in etree_subdiamonds: final_position = len(diamond_etree) diamond_etree.insert(final_position, subdiamond) return diamond_etree
def __diamond_fs2xml(diamond): """ converts a {Diamond} feature structure into its corresponding HLDS XML structure (stored in an etree element). @type diamond: C{Diamond} @param diamond: a Diamond feature structure containing nom? prop? diamond* elements @rtype: C{etree._Element} @return: a Diamond in HLDS XML tree notation, represented as an etree element """ E = ElementMaker() NOM = E.nom PROP = E.prop DIAMOND = E.diamond diamond_etree = DIAMOND(mode=ensure_unicode(diamond[Feature("mode")])) if "prop" in diamond: diamond_etree.insert(0, PROP(name=ensure_unicode(diamond["prop"])) ) if "nom" in diamond: # if present, nom(inal) has to be the first argument/sub tag of a diamond diamond_etree.insert(0, NOM(name=ensure_unicode(diamond["nom"])) ) subdiamonds = [] for key in sorted(diamond.keys()): # keys need to be sorted, otherwise Diamonds within a Sentence will have a # different order than before. Diamond keys seem ordered, but they aren't # (keys beginning with numbers seem to be in descending order, those # beginning with letters in ascending order) if isinstance(diamond[key], Diamond): subdiamonds.append(diamond[key]) etree_subdiamonds = [] for subdiamond in subdiamonds: etree_subdiamonds.append(__diamond_fs2xml(subdiamond)) for subdiamond in etree_subdiamonds: final_position = len(diamond_etree) diamond_etree.insert(final_position, subdiamond) return diamond_etree
def __sentence_fs2xml(sentence, mode="test"): """ transforms a sentence (in NLTK feature structure notation) into its corresponding HLDS XML <item></item> structure. :type sentence: ``Sentence`` :param sentence: a sentence in NLTK feature structure notation :type mode: ``str`` :param mode: "test", if the sentence will be part of a (regression) testbed file (ccg-test). "realize", if the sentence will be put in a file on its own (ccg-realize). :rtype: ``etree._Element`` :return: the input sentence in HLDS XML format (represented as an etree element) """ if mode is "test": expected_parses = sentence[Feature("expected_parses")] text = sentence[Feature("text")] item = etree.Element("item", numOfParses=str(expected_parses), string=ensure_unicode(text)) xml = etree.SubElement(item, "xml") lf = etree.SubElement(xml, "lf") else: # mode is "realize" lf = etree.Element("lf") root_nom = sentence[Feature("root_nom")] satop = etree.SubElement(lf, "satop", nom=root_nom) if Feature("root_prop") in sentence: root_prop = sentence[Feature("root_prop")] etree.SubElement(satop, "prop", name=root_prop) diamonds = [] for key in sorted(sentence.keys()): # keys need to be sorted, otherwise Diamonds within a Sentence will have a # different order than before. Diamond keys seem ordered, but they aren't # (keys beginning with numbers seem to be in descending order, those # beginning with letters in ascending order) if isinstance(sentence[key], Diamond): diamonds.append(sentence[key]) etree_diamonds = [] for diamond in diamonds: etree_diamonds.append(__diamond_fs2xml(diamond)) for diamond in etree_diamonds: final_position = len(satop) satop.insert(final_position, diamond) if mode is "test": return item else: return lf
def __sentence_fs2xml(sentence, mode="test"): """ transforms a sentence (in NLTK feature structure notation) into its corresponding HLDS XML <item></item> structure. @type sentence: C{Sentence} @param sentence: a sentence in NLTK feature structure notation @type mode: C{str} @param mode: "test", if the sentence will be part of a (regression) testbed file (ccg-test). "realize", if the sentence will be put in a file on its own (ccg-realize). @rtype: C{etree._Element} @return: the input sentence in HLDS XML format (represented as an etree element) """ if mode is "test": expected_parses = sentence[Feature("expected_parses")] text = sentence[Feature("text")] item = etree.Element("item", numOfParses=str(expected_parses), string=ensure_unicode(text)) xml = etree.SubElement(item, "xml") lf = etree.SubElement(xml, "lf") else: # mode is "realize" lf = etree.Element("lf") root_nom = sentence[Feature("root_nom")] satop = etree.SubElement(lf, "satop", nom=root_nom) if Feature("root_prop") in sentence: root_prop = sentence[Feature("root_prop")] etree.SubElement(satop, "prop", name=root_prop) diamonds = [] for key in sorted(sentence.keys()): # keys need to be sorted, otherwise Diamonds within a Sentence will have a # different order than before. Diamond keys seem ordered, but they aren't # (keys beginning with numbers seem to be in descending order, those # beginning with letters in ascending order) if isinstance(sentence[key], Diamond): diamonds.append(sentence[key]) etree_diamonds = [] for diamond in diamonds: etree_diamonds.append(__diamond_fs2xml(diamond)) for diamond in etree_diamonds: final_position = len(satop) satop.insert(final_position, diamond) if mode is "test": return item else: return lf
def answer_simple_qa(question, simple=True): if simple: ask_url = 'http://10.1.1.28:8000/api/graph/simple_qa/?q=' + ensure_unicode( question).encode('utf-8') else: ask_url = 'http://10.1.1.28:8000/api/graph/qa/?q=' + ensure_unicode( question).encode('utf-8') data = get_data_url(ask_url) if data == '\"No answer !\"': return None ans_dict = json.loads(data) ans_triples = [] for k, v in ans_dict.iteritems(): if k == 'triples': for triple in v: ans_triples.append([ delete_source_prefix(triple[0]['name']), triple[1], delete_source_prefix(triple[2]['name']) ]) return ans_triples
def __message_strkey_tupleval2xml(msg, key, val): """ helper function for __message2xml(). converts those parts of a message to XML whose key is a string (i.e. most of them). :type xml_msg: ``etree._Element`` :type msg_key: ``Feature`` :type msg_val: ``frozenset`` or ``str`` """ value, rating = val if isinstance(value, frozenset): # authors, keywords, proglangs etc. msgkey = etree.SubElement(msg, key, rating=rating) for element in value: msgval = etree.SubElement(msgkey, "value") msgval.text = ensure_unicode(element) else: # isinstance(value, (str, int)) # title, language, year, pages etc. msgkey = etree.SubElement(msg, key, value=ensure_unicode(value), rating=rating)
def __message_featurekey2xml(xml_msg, msg_key, msg_val): """ helper function for __message2xml(). converts those parts of a message to XML whose key is of type C{Feature}, i.e. *reference_authors* and *reference_title*. @type xml_msg: C{etree._Element} @type msg_key: C{Feature} @type msg_val: C{frozenset} or C{str} """ value, rating = msg_val if isinstance(value, frozenset): # *reference_authors* featkey = etree.SubElement(xml_msg, msg_key.name, feature="true", rating=rating) for element in value: featval = etree.SubElement(featkey, "value") featval.text = str(element) else: # isinstance(value, str) # *reference_title* featkey = etree.SubElement(xml_msg, msg_key.name, feature="true", value=ensure_unicode(value), rating=rating)
def add_nom_prefixes(diamond): """ Adds a prefix/index to the name attribute of every <nom> tag of a C{Diamond} or C{Sentence} structure. Without this, I{ccg-realize} will only produce gibberish. Every <nom> tag has a 'name' attribute, which contains a category/type-like description of the corresponding <prop> tag's name attribute, e.g.:: <diamond mode="PRÄP"> <nom name="v1:zugehörigkeit"/> <prop name="von"/> </diamond> Here 'zugehörigkeit' is the name of a category that the preposition 'von' belongs to. usually, the nom prefix is the first character of the prop name attribute with an added index. index iteration is done by a depth-first walk through all diamonds contained in the given feature structure. In this example 'v1:zugehörigkeit' means, that "von" is the first C{diamond} in the structure that starts with 'v' and belongs to the category 'zugehörigkeit'. """ prop_dict = defaultdict(int) elements = [element for element in diamond.walk()] for e in elements: if type(e) is Diamond: if "nom" in e.keys(): nom_prefix_char = __determine_nom_prefix(e) prop_dict[nom_prefix_char] += 1 nom_without_prefix = e["nom"] nom_type = type(nom_without_prefix) e["nom"] = "{0}{1}:{2}".format(ensure_utf8(nom_prefix_char), prop_dict[nom_prefix_char], ensure_utf8(nom_without_prefix)) if nom_type == unicode: # preserve unicode, if the string was unicode encoded before e["nom"] = ensure_unicode(e["nom"])
def add_nom_prefixes(diamond): """ Adds a prefix/index to the name attribute of every <nom> tag of a ``Diamond`` or ``Sentence`` structure. Without this, ``ccg-realize`` will only produce gibberish. Every <nom> tag has a 'name' attribute, which contains a category/type-like description of the corresponding <prop> tag's name attribute, e.g.:: <diamond mode="PRÄP"> <nom name="v1:zugehörigkeit"/> <prop name="von"/> </diamond> Here 'zugehörigkeit' is the name of a category that the preposition 'von' belongs to. usually, the nom prefix is the first character of the prop name attribute with an added index. index iteration is done by a depth-first walk through all diamonds contained in the given feature structure. In this example 'v1:zugehörigkeit' means, that "von" is the first ``diamond`` in the structure that starts with 'v' and belongs to the category 'zugehörigkeit'. """ prop_dict = defaultdict(int) elements = [element for element in diamond.walk()] for e in elements: if type(e) is Diamond: if "nom" in e.keys(): nom_prefix_char = __determine_nom_prefix(e) prop_dict[nom_prefix_char] += 1 nom_without_prefix = e["nom"] nom_type = type(nom_without_prefix) e["nom"] = "{0}{1}:{2}".format(ensure_utf8(nom_prefix_char), prop_dict[nom_prefix_char], ensure_utf8(nom_without_prefix)) if nom_type == unicode: # preserve unicode, if the string was unicode encoded before e["nom"] = ensure_unicode(e["nom"])
def answer_seq2seq_qa(question): seg_list, ner_list, simple_ner_list = ner_ques(question) print('crf: ' + ' '.join(ner_list)) print('simple: ' + ' '.join(simple_ner_list)) ques_pattern, entity_name = get_pattern(ner_list, seg_list) if ques_pattern == question: ques_pattern, entity_name = get_pattern(simple_ner_list, seg_list) print('|'.join((ques_pattern, entity_name))) sub_entity_dict = get_top_entity(entity_name) if not sub_entity_dict: return None rel_score_list = decode_ques(ques_pattern, 20) test_rel = None for rel, rel_score in rel_score_list: if rel in sub_entity_dict.keys(): test_rel = ensure_unicode(rel) break ans_triples = [] if test_rel: ans_triples = [[ sub_entity_dict['name'], test_rel, sub_entity_dict[test_rel] ]] return ans_triples
def __message_featurekey2xml(xml_msg, msg_key, msg_val): """ helper function for __message2xml(). converts those parts of a message to XML whose key is of type ``Feature``, i.e. *reference_authors* and *reference_title*. :type xml_msg: ``etree._Element`` :type msg_key: ``Feature`` :type msg_val: ``frozenset`` or ``str`` """ value, rating = msg_val if isinstance(value, frozenset): # *reference_authors* featkey = etree.SubElement(xml_msg, msg_key.name, feature="true", rating=rating) for element in value: featval = etree.SubElement(featkey, "value") featval.text = str(element) else: # isinstance(value, str) # *reference_title* featkey = etree.SubElement(xml_msg, msg_key.name, feature="true", value=ensure_unicode(value), rating=rating)