def get_train_examples(self): """See base class.""" with open(os.path.join(self.data_dir, "train_titles.dat"), 'rb') as fin: train_titles = pickle.load(fin) with open(os.path.join(self.data_dir, "train_labels.dat"), 'rb') as fin: train_labels = pickle.load(fin) examples = [] for i in range(len(train_labels)): guid = "train-%d" % (i) text_a = tokenization.convert_to_unicode(train_titles[i]) text_b = None label = [tokenization.convert_to_unicode(train_label_one) for train_label_one in train_labels[i]] examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def get_test_examples(self): """Gets a collection of `InputExample`s for prediction.""" with open(os.path.join(self.data_dir, "test_titles.dat"), 'rb') as fin: test_titles = pickle.load(fin) with open(os.path.join(self.data_dir, "test_labels.dat"), 'rb') as fin: test_labels = pickle.load(fin) examples = [] for i in range(len(test_labels)): guid = "test-%d" % (i) text_a = tokenization.convert_to_unicode(test_titles[i]) text_b = None label = [tokenization.convert_to_unicode(test_label_one) for test_label_one in test_labels[i]] examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def get_test_examples(self): """Gets a collection of `InputExample`s for prediction.""" with open(os.path.join(self.data_dir, "test_texts.txt"), 'rt') as fin: test_texts = json.load(fin) with open(os.path.join(self.data_dir, "test_labels.txt"), 'rt') as fin: test_labels = json.load(fin) examples = [] for i in range(len(test_labels)): if any([label not in self.total_labels for label in test_labels[i]]): continue guid = "test-%d" % (i) text_a = tokenization.convert_to_unicode(test_texts[i]) text_b = None label = [tokenization.convert_to_unicode(test_label_one) for test_label_one in test_labels[i]] examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def get_dev_examples(self): """See base class.""" with open(os.path.join(self.data_dir, "valid_texts.txt"), 'rt') as fin: valid_texts = json.load(fin) with open(os.path.join(self.data_dir, "valid_labels.txt"), 'rt') as fin: valid_labels = json.load(fin) examples = [] for i in range(len(valid_labels)): if any([label not in self.total_labels for label in valid_labels[i]]): continue guid = "valid-%d" % (i) text_a = tokenization.convert_to_unicode(valid_texts[i]) text_b = None label = [tokenization.convert_to_unicode(valid_label_one) for valid_label_one in valid_labels[i]] examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples