def _create_examples(self, records, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, record) in enumerate(records): guid = "%s-%s-%s" % (set_type, record['InputStoryid'], record['ending']) beginning = record['InputSentence1'] ending = record['InputSentence5'] option1 = record['RandomMiddleSentenceQuiz1'] option2 = record['RandomMiddleSentenceQuiz2'] option3 = record['RandomMiddleSentenceQuiz3'] answer = int(record['AnswerRightEnding']) - 1 option1_context = convert_to_unicode(' '.join([beginning, option1])) option2_context = convert_to_unicode(' '.join([beginning, option2])) option3_context = convert_to_unicode(' '.join([beginning, option3])) label = convert_to_unicode(str(answer)) text_a = [option1_context, option2_context, option3_context] text_b = [ending, ending, ending] examples.append( InputExampleWithList(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, i) sentence = convert_to_unicode(line[1]).replace("\"", "") conj = convert_to_unicode(line[2]) idx = sentence.index(conj) context = sentence[:idx + len(conj)] option_str = sentence[idx + len(conj):].strip() name1 = convert_to_unicode(line[3]) name2 = convert_to_unicode(line[4]) option1 = option_str.replace("_", name1) option2 = option_str.replace("_", name2) text_a = [context, context] text_b = [option1, option2] label = convert_to_unicode(line[5]) examples.append( InputExampleWithList(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): guid = "%s-%s" % (set_type, i) text_a = convert_to_unicode(line[3]) label = convert_to_unicode(line[1]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for i, line in lines.iterrows(): guid = "%s-%s" % (set_type, i) text_a = convert_to_unicode(line['text']) label = convert_to_unicode(line['label']) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, convert_to_unicode(line[0])) text_a = convert_to_unicode(line[8]) text_b = convert_to_unicode(line[9]) label = convert_to_unicode(line[-1]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def _create_examples(self, records, set_type): """Creates examples for the training and dev sets.""" examples = [] num_fields = len([ x for x in list(records[0].keys()) if x.startswith('RandomMiddleSentenceQuiz') ]) self._labels = [str(idx) for idx in range(1, num_fields + 1)] for (i, record) in enumerate(records): guid = "%s-%s-%s" % (set_type, record['InputStoryid'], record['ending']) beginning = record['InputSentence1'] ending = record['InputSentence5'] text_a = [] text_b = [] for idx in range(1, num_fields + 1): text_a.append(beginning + " " + record["RandomMiddleSentenceQuiz" + str(idx)]) text_b.append(ending + " Because , " + record['CSK' + str(idx)]) answer = int(record['AnswerRightEnding']) - 1 label = convert_to_unicode(str(answer)) examples.append( InputExampleWithListFourFields(guid=guid, text_a=text_a, text_b=text_b, text_c=None, text_d=None, label=label)) return examples
def read_examples(data, no_label=False): """Read a list of `InputExample`s from an input file.""" examples = [] if no_label: for id, sent in zip(*data): examples.append( InputExample(unique_id=convert_to_unicode(str(id)), text_a=convert_to_unicode(sent[0]), text_b=convert_to_unicode(sent[1]), text_c=convert_to_unicode(sent[2]), label=convert_to_unicode('others'))) else: for id, sent, lab in zip(*data): examples.append( InputExample(unique_id=convert_to_unicode(str(id)), text_a=convert_to_unicode(sent[0]), text_b=convert_to_unicode(sent[1]), text_c=convert_to_unicode(sent[2]), label=convert_to_unicode(lab))) return examples
def _create_examples(self, records, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, record) in enumerate(records): guid = "%s-%s" % (set_type, i) beginning = record['InputSentence1'] ending = record['InputSentence5'] middle = record['RandomMiddleSentenceQuiz1'] label = str(record['AnswerRightEnding']) text_a = convert_to_unicode(beginning) text_b = convert_to_unicode(middle + " " + ending) examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def _create_examples(self, data, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, d) in enumerate(data): for k in range(3): if data[i][2 + k] == data[i][5]: answer = str(k) label = tokenization.convert_to_unicode(answer) for k in range(3): guid = "%s-%s-%s" % (set_type, i, k) text_a = tokenization.convert_to_unicode(data[i][0]) text_b = tokenization.convert_to_unicode(data[i][k + 2]) text_c = tokenization.convert_to_unicode(data[i][1]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label, text_c=text_c)) return examples
def parse_wsdm_fake_news_row(row): guid = "%s" % (convert_to_unicode(row[0])) ch_text_a = convert_to_unicode(row[3]) ch_text_b = convert_to_unicode(row[4]) en_text_a = convert_to_unicode(row[5]) en_text_b = convert_to_unicode(row[6]) label = convert_to_unicode(row[-1]) return guid, ch_text_a, ch_text_b, en_text_a, en_text_b, label
def convert_text_to_examples(text): examples = [] unique_id = 0 if True: for line in text: line = tokenization.convert_to_unicode(line) line = line.strip() text_a = None text_b = None m = re.match(r"^(.*) \|\|\| (.*)$", line) if m is None: text_a = line else: text_a = m.group(1) text_b = m.group(2) examples.append( InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)) unique_id += 1 return examples
def read_examples_df(df, col): """Read a list of `InputExample`s from an input file.""" examples = [] unique_id = 0 for i, row in df.iterrows(): line = convert_to_unicode(row[col]) if not line: break line = line.strip() text_a = None text_b = None m = re.match(r"^(.*) \|\|\| (.*)$", line) if m is None: text_a = line else: text_a = m.group(1) text_b = m.group(2) examples.append( InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)) unique_id += 1 return examples
def read_examples(input_file): """Read a list of `InputExample`s from an input file.""" examples = [] unique_id = 0 with open(input_file, "r") as reader: while True: line = convert_to_unicode(reader.readline()) if not line: break line = line.strip() text_a = None text_b = None m = re.match(r"^(.*) \|\|\| (.*)$", line) if m is None: text_a = line else: text_a = m.group(1) text_b = m.group(2) examples.append( InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)) unique_id += 1 return examples
def read_examples(list_of_rows): """ Takes in a list of rows with row format either [line1,line2] or just [line1] """ examples = [] unique_id = 0 for row in list_of_rows: # Simple conversion to unicode for processing for i in range(len(row)): row[i] = convert_to_unicode(row[i]) text_a = None text_b = None if len(row) == 1: text_a = row[0] elif len(row) > 2 or len(row) < 1: raise ValueError("Atleast 1 element is required inside the row and not more than two elements are valid") elif len(row) == 2: text_a = row[0] text_b = row[1] examples.append( InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)) unique_id += 1 return examples