def convert_single_example(self, ex_index, example): """Converts a single `InputExample` into a single `InputFeatures`.""" tokens, input_ids, input_mask, segment_ids = self.create_input_features(example) label_id = self.create_label_features(example, tokens) # start,end=label.split() # start=int(start) # end=int(end) if ex_index < 5: tf.logging.info("*** Example ***") tf.logging.info("guid: %s" % example.guid) tf.logging.info("tokens: %s" % " ".join( [tokenization.printable_text(x) for x in tokens])) tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask])) tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids])) tf.logging.info("label: {}".format(label_id)) feature = InputFeatures( input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, label_id=label_id, is_real_example=True) return feature
def extract_generated_target(output_tokens, tokenizer): """ Given some tokens that were generated, extract the target :param output_tokens: [num_tokens] thing that was generated :param encoder: how they were encoded :param target: the piece of metadata we wanted to generate! :return: """ # Filter out first instance of start token assert output_tokens.ndim == 1 start_ind = 0 end_ind = output_tokens.shape[0] return { 'extraction': tokenization.printable_text(''.join(tokenizer.convert_ids_to_tokens(output_tokens))), 'start_ind': start_ind, 'end_ind': end_ind, }