def get_dev_examples(self, df: pd.DataFrame, train_column_names: list, label_column_names: list): """See base class.""" examples = [] for (i, line) in enumerate(df): if i == 0: continue guid = "dev-%d" % (i) text_a = tokenization.convert_to_unicode(line[train_column_names]) label = tokenization.convert_to_unicode(line[label_column_names]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
def get_test_examples(self, df: pd.DataFrame, test_column_names: list, label_num: int): """See base class.""" examples = [] #label = [0 for i in range(labels_num)] for (i, line) in df.iterrows(): guid = "test-%d" % (i) #print(line.loc[test_column_names].values[0]) text_a = tokenization.convert_to_unicode( line.loc[test_column_names].values[0]) label = np.array([0 for i in range(label_num)]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
def read_examples(self): ''' yield a line from input file''' unique_id = 0 with tf.io.gfile.GFile(self.input_file, "r") as reader: while True: line = tokenization.convert_to_unicode(reader.readline()) if not line: break line = line.strip() text_a = None text_b = None m = re.match(r"^(.*) \|\|\| (.*)$", line) if m is None: text_a = line else: text_a = m.group(1) text_b = m.group(2) unique_id += 1 yield InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)
def read_examples(input_file): """Read a list of `InputExample`s from an input file.""" examples = [] unique_id = 0 with tf.gfile.GFile(input_file, "r") as reader: while True: line = tokenization.convert_to_unicode(reader.readline()) if not line: break line = line.strip() text_a = None text_b = None m = re.match(r"^(.*) \|\|\| (.*)$", line) if m is None: text_a = line else: text_a = m.group(1) text_b = m.group(2) examples.append( InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)) unique_id += 1 return examples
def get_train_examples(self, df: pd.DataFrame, train_column_names: list, label_column_names: list): """See base class.""" # lines = self._read_tsv( # os.path.join(data_dir, "multinli", # "multinli.train.%s.tsv" % self.language)) examples = [] tf.logging.info("loading data ...") for (i, line) in df.iterrows(): label = line[label_column_names] label = label.values guid = "train-%d" % (i) text_a = tokenization.convert_to_unicode( line.loc[train_column_names].values[0]) #label = tokenization.convert_to_unicode(line.loc[label_column_names].values) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) tf.logging.info("loading data finished...") return examples