Exemple #1
0
    def analyze(self, text, level, keyword_level):
        in_text = nlp_pb2.InputText()
        in_text.text = text
        in_text.lang = lang_pb2.kor
        in_text.split_sentence = True
        in_text.use_tokenizer = False
        in_text.level = level
        in_text.keyword_frequency_level = keyword_level

        ret = self.stub.Analyze(in_text)

        # JSON Object 로 만들어 낸다.
        printer = json_format._Printer(True, True)
        doc = printer._MessageToJsonObject(ret)
        print doc

        # JSON text로 만들어낸다.
        json_text = json_format.MessageToJson(ret, True, True)
        print json_text
        for i in range(len(ret.sentences)):
            text = ret.sentences[i].text
            analysis = ret.sentences[i].morps
            morp = ""
            for j in range(len(analysis)):
                morp = morp + " " + analysis[j].lemma + "/" + analysis[j].type
            morp = morp.encode('utf-8').strip()
            addstr = 'morp -> ' + morp
            print addstr
            ner = ret.sentences[i].nes
            for j in range(len(ner)):
                ne = ner[j].text + "/" + ner[j].type
                ne = ne.encode('utf-8').strip()
                addNE = 'NE -> ' + ne
                print addNE
Exemple #2
0
    def get_json_data(self):
        """
        :return: The protobuf data created by the analysis as a json object.

        see get_protobuf_data for more details.
        The json fields are defined by https://github.com/SaltieRL/carball/tree/master/api
        """
        printer = _Printer()
        js = printer._MessageToJsonObject(self.protobuf_game)
        return js
Exemple #3
0
    def write_json_out_to_file(self, file: IO):
        """
        Writes the json data to the specified file, as text.

        NOTES:
            The data is written as text (i.e. string), and the buffer mode must be 'w'.
                E.g. open(file_name, 'w')

        :param file: The file object (or a buffer).
        """

        if 'b' in file.mode:
            raise IOError("Json files can not be binary use open(path,\"w\")")
        printer = _Printer()
        js = printer._MessageToJsonObject(self.protobuf_game)
        json.dump(js, file, indent=2, cls=CarballJsonEncoder)
Exemple #4
0
    def analyze(self, text, level, keyword_level):
        in_text = nlp_pb2.InputText()
        in_text.text = text
        in_text.lang = lang_pb2.kor
        in_text.split_sentence = True
        in_text.use_tokenizer = False
        in_text.level = level
        in_text.keyword_frequency_level = keyword_level

        ret = self.stub.Analyze(in_text)

        # JSON Object 로 만들어 낸다.
        printer = json_format._Printer(True, True)
        doc = printer._MessageToJsonObject(ret)
        #print doc

        # JSON text로 만들어낸다.
        json_text = json_format.MessageToDict(ret, True, True)
        return json_text
Exemple #5
0
 def analyze(self, text, level, keyword_level):
     in_text = nlp_pb2.InputText()
     in_text.text = text
     in_text.lang = lang_pb2.kor
     in_text.split_sentence = True
     in_text.use_tokenizer = False
     in_text.level = level
     in_text.keyword_frequency_level = keyword_level
     ret = self.stub.Analyze(in_text)
     # JSON Object 로 만들어 낸다.
     printer = json_format._Printer(True, True)
     doc = printer._MessageToJsonObject(ret)
     ret_txt = text_format.MessageToString(ret, False, False)
     # print doc
     # JSON text 로 만들어낸다.
     json_text = json_format.MessageToJson(ret, True, True)
     # print json_text
     readable_text = ''
     for idx in range(len(ret.sentences)):
         text = ret.sentences[idx].text
         analysis = ret.sentences[idx].morps
         morp = ""
         for ana_idx in range(len(analysis)):
             morp += " {0}/{1}".format(analysis[ana_idx].lemma,
                                       analysis[ana_idx].type)
         morp = morp.encode('utf-8').strip()
         add_morp = "morp -> {0}".format(morp)
         # print add str
         readable_text += add_morp + '\n'
         ner = ret.sentences[idx].nes
         for ner_idx in range(len(ner)):
             ne = "{0}/{1}".format(ner[ner_idx].text, ner[ner_idx].type)
             ne = ne.encode('utf-8').strip()
             add_ne = 'NE -> ' + ne
             # print add NE
             readable_text += add_ne + '\n'
     return readable_text, json_text, ret
Exemple #6
0
def getjson(message,
            including_default_value_fields=False,
            preserving_proto_field_name=False,
            indent=2,
            sort_keys=False,
            use_integers_for_enums=False,
            descriptor_pool=None):
    try:
        message = message()
    except:
        pass
    try:
        ms = message.ListFields()
        if not ms:
            printer = _Printer(including_default_value_fields,
                               preserving_proto_field_name,
                               use_integers_for_enums, descriptor_pool)
            return printer.ToJsonString(message, indent, sort_keys)
    except:
        pass
    printer = Printer(including_default_value_fields,
                      preserving_proto_field_name, use_integers_for_enums,
                      descriptor_pool)
    return printer.ToJsonString(message, indent, sort_keys)
Exemple #7
0
    def render_html(self):
        json_obj = {"nodes": [], "links": []}

        json_printer = _Printer()

        for op in self._proto.op:
            op_json = json_printer._MessageToJsonObject(op)
            op_json["id"] = op_json["name"]
            op_json["node_type"] = "op"
            json_obj["nodes"].append(op_json)

        for tensor in self._proto.tensors:
            tensor_json = json_printer._MessageToJsonObject(tensor)

            tensor_json["id"] = tensor_json["name"]
            if "floatData" in tensor_json and \
                    len(tensor_json["floatData"]) > THREASHOLD:
                del tensor_json["floatData"]
            if "int32Data" in tensor_json and \
                    len(tensor_json["int32Data"]) > THREASHOLD:
                del tensor_json["int32Data"]
            tensor_json["node_type"] = "tensor"
            json_obj["nodes"].append(tensor_json)

        node_ids = [node["id"] for node in json_obj["nodes"]]

        tensor_to_op = {}
        for op in self._proto.op:
            for tensor in op.output:
                tensor_to_op[tensor] = op.name

        for op in json_obj["nodes"]:
            if "input" in op:
                for input in op["input"]:
                    if input in node_ids and op["name"] in node_ids:
                        # for weights
                        json_obj["links"].append({
                            "source": input,
                            "target": op["name"]
                        })
                    elif input in tensor_to_op and \
                            tensor_to_op[input] in node_ids:
                        # for intermediate tensor
                        json_obj["links"].append({
                            "source": tensor_to_op[input],
                            "target": op["name"]
                        })
                    else:
                        # for input
                        json_obj["nodes"].append({
                            "id": input,
                            "name": input,
                            "node_type": "input"
                        })
                        json_obj["links"].append({
                            "source": input,
                            "target": op["name"]
                        })

        json_msg = json.dumps(json_obj, cls=NPEncoder)

        cwd = os.path.dirname(__file__)
        with open(cwd + "/index.html") as f:
            html = f.read()
            return html % json_msg
 def write_json_out_to_file(self, file):
     printer = _Printer()
     js = printer._MessageToJsonObject(self.protobuf_game)
     json.dump(js, file, indent=2, cls=CarballJsonEncoder)
Exemple #9
0
 def analyze(self, text, level, keyword_level):
     in_text = nlp_pb2.InputText()
     in_text.text = text
     in_text.lang = lang_pb2.kor
     in_text.split_sentence = True
     in_text.use_tokenizer = False
     in_text.level = level
     in_text.keyword_frequency_level = keyword_level
     ret = self.stub.Analyze(in_text)
     # JSON Object 로 만들어 낸다.
     printer = json_format._Printer(True, True)
     doc = printer._MessageToJsonObject(ret)
     ret_txt = text_format.MessageToString(ret, False, False)
     # print doc
     # JSON text 로 만들어낸다.
     json_text = json_format.MessageToJson(ret, True, True)
     # print json_text
     readable_text = ''
     for idx in range(len(ret.sentences)):
         text = ret.sentences[idx].text
         analysis = ret.sentences[idx].morps
         morp = ""
         for ana_idx in range(len(analysis)):
             if analysis[ana_idx].type in ['VV', 'VA', 'VX', 'VCP']:
                 morp += " {0}다/{1}".format(analysis[ana_idx].lemma,
                                            analysis[ana_idx].type)
             else:
                 morp += " {0}/{1}".format(analysis[ana_idx].lemma,
                                           analysis[ana_idx].type)
         morp = morp.encode('utf-8').strip()
         add_morp = "morp -> {0}".format(morp)
         # print add str
         readable_text += add_morp + '\n'
         ner = ret.sentences[idx].nes
         for ner_idx in range(len(ner)):
             if ner[ner_idx].type == 'VV' or ner[
                     ner_idx].type == 'VA' or ner[
                         ner_idx].type == 'VX' or ner[ner_idx].type == 'VCP':
                 ne = "{0}다/{1}".format(ner[ner_idx].text,
                                        ner[ner_idx].type)
             else:
                 ne = "{0}/{1}".format(ner[ner_idx].text, ner[ner_idx].type)
             ne = ne.encode('utf-8').strip()
             add_ne = 'NE -> ' + ne
             # print add NE
             readable_text += add_ne + '\n'
     # Make nlp sentence
     json_data = json.loads(json_text)
     word_list = list()
     for sentence in json_data['sentences']:
         for words in sentence['words']:
             tagged_text = words['tagged_text']
             tagged_text_list = tagged_text.split()
             for tagged_word in tagged_text_list:
                 word = tagged_word.split("/")[0]
                 tag = tagged_word.split("/")[1]
                 if tag in ['VV', 'VA', 'VX', 'VCP', 'VCN']:
                     word += u"\ub2e4"
                 word_list.append(word)
     nlp_sent = " ".join(word_list)
     # Modify json data
     for sentence in json_data['sentences']:
         for words in sentence['words']:
             tagged_text = words['tagged_text']
             if '/VV' in tagged_text:
                 words['tagged_text'] = words['tagged_text'].replace(
                     "/VV", u"\ub2e4/VV")
             if '/VA' in tagged_text:
                 words['tagged_text'] = words['tagged_text'].replace(
                     "/VA", u"\ub2e4/VA")
             if '/VX' in tagged_text:
                 words['tagged_text'] = words['tagged_text'].replace(
                     "/VX", u"\ub2e4/VX")
             if '/VCP' in tagged_text:
                 words['tagged_text'] = words['tagged_text'].replace(
                     "/VCP", u"\ub2e4/VCP")
             if '/VCN' in tagged_text:
                 words['tagged_text'] = words['tagged_text'].replace(
                     "/VCN", u"\ub2e4/VCN")
     for sentence in json_data['sentences']:
         for morps in sentence['morps']:
             if morps['type'] == 'VV':
                 morps['lemma'] += u"\ub2e4"
             elif morps['type'] == 'VA':
                 morps['lemma'] += u"\ub2e4"
             elif morps['type'] == 'VX':
                 morps['lemma'] += u"\ub2e4"
             elif morps['type'] == 'VCP':
                 morps['lemma'] += u"\ub2e4"
             elif morps['type'] == 'VCN':
                 morps['lemma'] += u"\ub2e4"
     return nlp_sent, json.dumps(json_data), ret