def processLine(self, row):
     text, label, label_name = row
     text = " ".join(text.split()[:self.max_length])
     x = get_indices_tensor(text.split(), self.word_to_indx,
                            self.max_length)
     sample = {'text': text, 'x': x, 'y': label, 'y_name': label_name}
     return sample
Exemple #2
0
    def processLine(self, line, i):
        if isinstance(line, bytes):
            line = line.decode()
        label_t = [float(v) for v in line.split('\t')[0]]
        label = int(label_t[0])

        text_list = line.split('\t')[-1].split()[:self.max_length]
        text = " ".join(text_list)
        self.args.num_class = 2
        x = get_indices_tensor(text_list, self.word_to_indx, self.max_length)
        sample = {'text': text, 'x': x, 'y': label, 'i': i}
        return sample
Exemple #3
0
 def processLine(self, line, i):
     if isinstance(line, bytes):
         line = line.decode()
     label = float(line[0])
     if self.objective == 'mse':
         raise NotImplementedError(
             "SST2 dataset only allows binary classification")
     else:
         self.args.num_class = 2
     text_list = line[2:].split()[:self.max_length]
     text = " ".join(text_list)
     x = get_indices_tensor(text_list, self.word_to_indx, self.max_length)
     sample = {'text': text, 'x': x, 'y': label, 'i': i}
     return sample
Exemple #4
0
 def processLine(self, line, aspect_num, i):
     if isinstance(line, bytes):
         line = line.decode()
     labels = [ float(v) for v in line.split()[:5] ]
     if self.objective == 'mse':
         label = float(labels[aspect_num])
         self.args.num_class = 1
     else:
         label = int(self.class_map[ int(labels[aspect_num] *10) ])
         self.args.num_class = 3
     text_list = line.split('\t')[-1].split()[:self.max_length]
     text = " ".join(text_list)
     x =  get_indices_tensor(text_list, self.word_to_indx, self.max_length)
     sample = {'text':text,'x':x, 'y':label, 'i':i}
     return sample