Ejemplo n.º 1
0
 def get_dev_examples(self, df: pd.DataFrame, train_column_names: list,
                      label_column_names: list):
     """See base class."""
     examples = []
     for (i, line) in enumerate(df):
         if i == 0:
             continue
         guid = "dev-%d" % (i)
         text_a = tokenization.convert_to_unicode(line[train_column_names])
         label = tokenization.convert_to_unicode(line[label_column_names])
         examples.append(
             InputExample(guid=guid,
                          text_a=text_a,
                          text_b=None,
                          label=label))
     return examples
Ejemplo n.º 2
0
 def get_test_examples(self, df: pd.DataFrame, test_column_names: list,
                       label_num: int):
     """See base class."""
     examples = []
     #label = [0 for i in range(labels_num)]
     for (i, line) in df.iterrows():
         guid = "test-%d" % (i)
         #print(line.loc[test_column_names].values[0])
         text_a = tokenization.convert_to_unicode(
             line.loc[test_column_names].values[0])
         label = np.array([0 for i in range(label_num)])
         examples.append(
             InputExample(guid=guid,
                          text_a=text_a,
                          text_b=None,
                          label=label))
     return examples
Ejemplo n.º 3
0
 def read_examples(self):
   ''' yield a line from input file'''
   unique_id = 0
   with tf.io.gfile.GFile(self.input_file, "r") as reader:
     while True:
       line = tokenization.convert_to_unicode(reader.readline())
       if not line:
         break
       line = line.strip()
       text_a = None
       text_b = None
       m = re.match(r"^(.*) \|\|\| (.*)$", line)
       if m is None:
         text_a = line
       else:
         text_a = m.group(1)
         text_b = m.group(2)
       unique_id += 1
       yield InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)
Ejemplo n.º 4
0
def read_examples(input_file):
  """Read a list of `InputExample`s from an input file."""
  examples = []
  unique_id = 0
  with tf.gfile.GFile(input_file, "r") as reader:
    while True:
      line = tokenization.convert_to_unicode(reader.readline())
      if not line:
        break
      line = line.strip()
      text_a = None
      text_b = None
      m = re.match(r"^(.*) \|\|\| (.*)$", line)
      if m is None:
        text_a = line
      else:
        text_a = m.group(1)
        text_b = m.group(2)
      examples.append(
          InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
      unique_id += 1
  return examples
Ejemplo n.º 5
0
 def get_train_examples(self, df: pd.DataFrame, train_column_names: list,
                        label_column_names: list):
     """See base class."""
     # lines = self._read_tsv(
     #     os.path.join(data_dir, "multinli",
     #                  "multinli.train.%s.tsv" % self.language))
     examples = []
     tf.logging.info("loading data ...")
     for (i, line) in df.iterrows():
         label = line[label_column_names]
         label = label.values
         guid = "train-%d" % (i)
         text_a = tokenization.convert_to_unicode(
             line.loc[train_column_names].values[0])
         #label = tokenization.convert_to_unicode(line.loc[label_column_names].values)
         examples.append(
             InputExample(guid=guid,
                          text_a=text_a,
                          text_b=None,
                          label=label))
     tf.logging.info("loading data finished...")
     return examples