Beispiel #1
0
 def load_bd(self, filename: str) -> None:
     """
     Loads sentence view from big_dict structure file
     Args:
         filename: file name <str>
     """
     with open(filename) as f:
         data = json.load(f)
         column_names = [
             "word", "pos_start", "pos_end", "type", "tag", "class", "tone"
         ]
         self.columns = column_names
         for item in data:
             text = item['constant']['text']
             for i in range(0, len(text)):
                 sentence = text[i]
                 cur_sentence = graph.Node(self.graph, {})
                 cur_sentence["type"] = "sentence"
                 cur_sentence["words"] = graph.NodeList(self.graph)
                 for j in range(0, len(sentence)):
                     word = graph.Node(self.graph, {})
                     for c_name in column_names:
                         if c_name in ["class", "tone"]:
                             word[c_name] = sentence[j]["variable"][c_name][
                                 0]
                         elif c_name in ["pos_start", "pos_end"]:
                             word["pos_start"] = str(sentence[j]["pos"][0])
                             word["pos_end"] = str(sentence[j]["pos"][1])
                         else:
                             word[c_name] = sentence[j][c_name]
                     cur_sentence["words"].append(word)
                 self.sentences.append(cur_sentence)
Beispiel #2
0
    def get_text(self, text):
        words = load_words_save_separators(text)

        wsents = get_sentences(words, text)

        self.columns = ["type", "pos_start", "pos_end", "word"]
        for sent in wsents:
            cur_sentence = graph.Node(self.graph, {})
            cur_sentence["words"] = graph.NodeList(self.graph)
            cur_sentence["type"] = "sentence"
            for word_i in sent:
                word = graph.Node(self.graph, {})
                word["type"] = "word"
                word["word"] = word_i["word"]
                word["pos_start"] = word_i["pos_start"]
                word["pos_end"] = word_i["pos_end"]
                cur_sentence["words"].append(word)
            self.sentences.append(cur_sentence)
Beispiel #3
0
 def __init__(self, config=None):
     BaseView.__init__(self, config=config)
     self.texts = graph.NodeList(self.graph)
Beispiel #4
0
    def load_column(self,
                    filename: str,
                    separator=" ",
                    maxlen=1000,
                    default_class=None,
                    expand=False,
                    maxwords=None) -> None:
        '''loads sentence view from file with columns file'''
        f = open(filename, "r")
        column_names = split(f.readline().strip(), separator)
        cur_sentence = graph.Node(self.graph, {})
        cur_sentence["words"] = graph.NodeList(self.graph)
        self.columns = column_names
        line_num = 0
        self.extended = expand
        for line in f:
            line = line.strip()
            line = line.replace("\ufeff", "")
            line_num = line_num + 1
            if (maxwords is not None) and (line_num > maxwords):
                return
            if "<STOP>" in line or line == "" or len(
                    cur_sentence["words"]) > maxlen:
                if len(cur_sentence["words"]) > 0:
                    self.sentences.append(cur_sentence)
                    cur_sentence = graph.Node(self.graph, {})
                    cur_sentence["words"] = graph.NodeList(self.graph)
                    cur_sentence["type"] = "sentence"
            else:
                word = graph.Node(self.graph, {})
                words = split(line, separator)
                #        print (len(words))
                #       print (len(column_names))
                #TODO: This code is horrible: need to simplify
                #normal situation
                if len(words) == len(column_names):
                    for i in range(0, len(column_names)):
                        word[column_names[i]] = words[i]
                        if expand == True:
                            nn = graph.Node(self.graph, {
                                "type": column_names[i],
                                "value": words[i]
                            })
                            word.Connect(nn)
                #less words then colums - fill with default class if given
                if len(words) < len(column_names):

                    if default_class is None:
                        print(
                            "Error: on col " + str(line_num) +
                            " not enough data and no default class provided, skipping"
                        )
                    else:
                        #fill specified columns
                        for i in range(0, len(words)):
                            word[column_names[i]] = words[i]
                            if expand is True:
                                nn = graph.Node(self.graph, {
                                    "type": column_names[i],
                                    "value": words[i]
                                })
                                word.Connect(nn)
                        #fill unspecified colums with default value
                    #    print (range(len(words), len(self.columns)))
                    #   print (words)
                        for i in range(len(words), len(self.columns)):
                            word[column_names[i]] = default_class
                            if expand is True:
                                nn = graph.Node(
                                    self.graph, {
                                        "type": column_names[i],
                                        "value": default_class
                                    })
                                word.Connect(nn)

                if len(words) > len(column_names):
                    #we have too many classes
                    #fill normal range
                    for i in range(0, len(column_names)):
                        word[column_names[i]] = words[i]
                        if expand == True:
                            nn = graph.Node(self.graph, {
                                "type": column_names[i],
                                "value": words[i]
                            })
                            word.Connect(nn)
                    #if expand = true, fill load other classes into graph

                    for i in range(len(column_names), len(words)):
                        if expand == True:
                            nn = graph.Node(self.graph, {
                                "type": column_names[-1],
                                "value": words[i]
                            })
                            word.Connect(nn)
                #  print (word.children({"type":column_names[-1]}).Distinct("value"))

                word["type"] = "word"

                cur_sentence["words"].append(word)
        if len(cur_sentence["words"]) > 0:
            self.sentences.append(cur_sentence)
Beispiel #5
0
 def __init__(self, config=None):
     BaseView.__init__(self, config=config)
     self.sentences = graph.NodeList(self.graph)
     self.extended = False
     self.clean_text = ""