Beispiel #1
0
    def load_corpus(self, path) -> List[Example]:
        """
        self.examples를 만듦
        """
        if 'raw' in path:
            path = '/data/hate_raw/train/raw.json'
        preprocessed = []
        with open(path) as fp:
            for line in fp:
                if line:  ## line = {"syllable_contents": [3, 128, 200, 5, 30, 30, 268, 2, 130, 5, 69, 6, 6, 6, 6, 4], "eval_reply": 0}
                    ex = Example()
                    ## preprocessed리스트안에 ex를 부여
                    for k, v in json.loads(line).items():
                        setattr(ex, k, v)
                        ## ex.syllable_contents = [3, 128, 200, 5, 30, 30, 268, 2, 130, 5, 69, 6, 6, 6, 6, 4]
                        ## ex.eval_reply = 0
                        """새로 넣은 내용(3개 이상 중복되는 문자는 제거)"""
                        ## [1,1,1,2,2,3,3,3] -> [1,1,2,2,3,3]
                        ##########################################################################
                        inputs = ex.syllable_contents
                        max_len = len(inputs)
                        result = inputs.copy()
                        for i in range(max_len - 2):
                            check_tokens = inputs[i:i + 3]
                            if (check_tokens[0] == check_tokens[1]) and (
                                    check_tokens[0] == check_tokens[2]):
                                for j in range(i + 2, max_len):
                                    if check_tokens[0] == inputs[j]:
                                        result[i + 2] = 'temp'
                        ex.syllable_contents = [
                            x for x in result if x != 'temp'
                        ]
                        ###############################################################################
                        #                         ex.syllable_contents = ex.syllable_contents[1:-1]
                        index0 = [
                            i for i, value in enumerate(ex.syllable_contents)
                            if value == 0
                        ]
                        index1 = [
                            i for i, value in enumerate(ex.syllable_contents)
                            if value == 1
                        ]

                        for idx in index0:
                            ex.syllable_contents[idx] = 1
                        for idx in index1:
                            ex.syllable_contents[idx] = 0
                    preprocessed.append(ex)
        return preprocessed
Beispiel #2
0
 def load_corpus(self, path) -> List[Example]:
     """
     self.examples를 만듦
     """
     if 'raw' in path:
         path = '/data/hate_raw/train/raw.json'
     preprocessed = []
     with open(path) as fp:
         for line in fp:
             if line: ## line = {"syllable_contents": [3, 128, 200, 5, 30, 30, 268, 2, 130, 5, 69, 6, 6, 6, 6, 4], "eval_reply": 0}
                 ex = Example()
                 ## preprocessed리스트안에 ex를 부여
                 for k, v in json.loads(line).items():
                     setattr(ex, k, v)
                     ## ex.syllable_contents = [3, 128, 200, 5, 30, 30, 268, 2, 130, 5, 69, 6, 6, 6, 6, 4]
                     ## ex.eval_reply = 0
                     ex.syllable_contents = ex.syllable_contents[1:-1]
                     index0 = [i for i,value in enumerate(ex.syllable_contents) if value == 0]
                     index1 = [i for i,value in enumerate(ex.syllable_contents) if value == 1]
                     
                     for idx in index0:ex.syllable_contents[idx] = 1
                     for idx in index1:ex.syllable_contents[idx] = 0
                 preprocessed.append(ex)
     return preprocessed