Ejemplo n.º 1
0
    def rollout_to_protobuf(document_words, document_tags):
        print("document_words")
        print(document_words)
        print("document_tags")
        print(document_tags)
        input_ids, input_masks, y_masks, text, ys = tokenize([document_words],
                                                             [document_tags])

        print("text")
        print(text)
        example_proto = Example(features=Features(
            feature={
                'text':
                Feature(bytes_list=tf.train.BytesList(
                    value=[w.encode('utf-8') for w in text[0]])),
                'input_ids':
                Feature(int64_list=tf.train.Int64List(value=input_ids)),
                'input_masks':
                Feature(int64_list=tf.train.Int64List(value=input_masks)),
                'y_masks':
                Feature(int64_list=tf.train.Int64List(value=y_masks)),
                'labels':
                Feature(int64_list=tf.train.Int64List(value=ys))
            }))
        writer.write(example_proto.SerializeToString())
Ejemplo n.º 2
0
def make_tf_examples(string_features, int_features, labels):
    int_features += [[label] for label in zero_norm_labels(labels)]
    string_features = [
        Feature(bytes_list=BytesList(value=val)) for val in string_features
    ]
    int_features = [
        Feature(int64_list=Int64List(value=val)) for val in int_features
    ]
    all_features = string_features + int_features
    return [
        Example(features=Features(
            feature={
                "left": left,
                "target": target,
                "right": right,
                "left_ids": left_ids,
                "target_ids": target_ids,
                "right_ids": right_ids,
                "labels": label,
            })) for (
                left,
                target,
                right,
                left_ids,
                target_ids,
                right_ids,
                label,
            ) in zip(*split_list(all_features, parts=7))
    ]
Ejemplo n.º 3
0
def create_examples(data,
                    bert_client,
                    training=True,
                    label2int=None,
                    class_weight=None):
    """
    data: pd.DataFrame
    label2int: dict
    class_weight: list

    yield examples
    """
    idx_start = data.index[0]

    A_encoded = bert_client.encode(data['title1_en'].tolist())
    B_encoded = bert_client.encode(data['title2_en'].tolist())

    for i in range(len(data)):
        feature = {
            'A_encoded': Feature(float_list=FloatList(value=A_encoded[i])),
            'B_encoded': Feature(float_list=FloatList(value=B_encoded[i]))
        }
        if training:
            label = label2int[data.loc[idx_start + i, 'label']]
            feature['label'] = Feature(int64_list=Int64List(value=[label]))
            feature['class_weight'] = Feature(float_list=FloatList(
                value=[class_weight[label]]))
        else:
            feature['id'] = Feature(int64_list=Int64List(
                value=[data.loc[idx_start + i, 'id']]))

        yield Example(features=Features(feature=feature))
Ejemplo n.º 4
0
def serialise_traj(data):
    
    features = {k: Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(v).numpy(),])) for k,v in data.items() if k not in ['seq_lens']}
    features['seq_lens'] =  Feature(int64_list=Int64List(value=[data['seq_lens'],]))

    example = Example(features=Features(feature=features))
    
    
    return example.SerializeToString()
Ejemplo n.º 5
0
def serialise_vid(data):
    
    # seq_lens, masks, imgs, goal_imgs,label, label_embedding, tag = data['seq_lens'], data['masks'], data['imgs'], data['goal_imgs'], data['label'], data['label_embedding'], data['tag']
    
    features = {k: Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(v).numpy(),])) for k,v in data.items() if k not in ['seq_lens']}
    features['seq_lens'] =  Feature(int64_list=Int64List(value=[data['seq_lens'],]))

    example = Example(features=Features(features))
    
    
    return example.SerializeToString()
Ejemplo n.º 6
0
def save(mask, img, save_dir, count=0):
    mask = tf.io.encode_jpeg(mask)
    img = tf.io.encode_jpeg(img)

    Image_Buffer = Example(features=Features(
        feature={
            "Image": Feature(bytes_list=BytesList(value=[img.numpy()])),
            "Mask": Feature(bytes_list=BytesList(value=[mask.numpy()]))
        }))

    with tf.io.TFRecordWriter(os.path.join(save_dir,
                                           f"Data-{count}.tfrecord")) as f:
        f.write(Image_Buffer.SerializeToString())
Ejemplo n.º 7
0
 def create_example(features: np.ndarray, label: np.int32):
     return Example(features=Features(
         feature={
             "features":
             NumpyToRecordConverter._bytes_feature(
                 tf.io.serialize_tensor(features)),
             "label":
             Feature(int64_list=Int64List(value=[label]))
         })).SerializeToString()
Ejemplo n.º 8
0
    def __encode_input(self, mr, input_encoder):
        """Encodes the input, and creates a TF Example record out of it."""

        input_ids = input_encoder.encode(mr)
        input_ids.append(text_encoder.EOS_ID)

        features = {'inputs': Feature(int64_list=Int64List(value=input_ids))}

        example = Example(features=Features(feature=features))

        return example.SerializeToString()
Ejemplo n.º 9
0
def get_cycle_example(cell_value, summary_idx, cycle_idx, scaling_factors):
    """
    Define the columns that should be written to tfrecords and converts the raw data
    to "Example" objects. Every Example contains data from one charging cycle.
    The data is scaled (divided) by the corresponding values in "scaling_factors".
    """
    # Summary feature values (scalars --> have to be wrapped in lists)
    ir_value = [
        cell_value["summary"][cst.INTERNAL_RESISTANCE_NAME][summary_idx] /
        scaling_factors[cst.INTERNAL_RESISTANCE_NAME]
    ]
    qd_value = [
        cell_value["summary"][cst.QD_NAME][summary_idx] /
        scaling_factors[cst.QD_NAME]
    ]
    rc_value = [
        cell_value["summary"][cst.REMAINING_CYCLES_NAME][summary_idx] /
        scaling_factors[cst.REMAINING_CYCLES_NAME]
    ]
    dt_value = [
        cell_value["summary"][cst.DISCHARGE_TIME_NAME][summary_idx] /
        scaling_factors[cst.DISCHARGE_TIME_NAME]
    ]
    cc_value = [float(cycle_idx) / scaling_factors[cst.REMAINING_CYCLES_NAME]
                ]  # Same scale --> same scaling factor

    # Detail feature values (arrays)
    qdlin_value = cell_value["cycles"][cycle_idx][
        cst.QDLIN_NAME] / scaling_factors[cst.QDLIN_NAME]
    tdlin_value = cell_value["cycles"][cycle_idx][
        cst.TDLIN_NAME] / scaling_factors[cst.TDLIN_NAME]

    # Wrapping as example
    cycle_example = Example(features=Features(
        feature={
            cst.INTERNAL_RESISTANCE_NAME:
            Feature(float_list=FloatList(value=ir_value)),
            cst.QD_NAME:
            Feature(float_list=FloatList(value=qd_value)),
            cst.REMAINING_CYCLES_NAME:
            Feature(float_list=FloatList(value=rc_value)),
            cst.DISCHARGE_TIME_NAME:
            Feature(float_list=FloatList(value=dt_value)),
            cst.QDLIN_NAME:
            Feature(float_list=FloatList(value=qdlin_value)),
            cst.TDLIN_NAME:
            Feature(float_list=FloatList(value=tdlin_value)),
            cst.CURRENT_CYCLE_NAME:
            Feature(float_list=FloatList(value=cc_value))
        }))
    return cycle_example
Ejemplo n.º 10
0
def main():

    model, signature, batch_file_path, sentence, target = parse_args()

    feat_dict = {"sentences": [], "targets": []}

    if batch_file_path is not None:
        with open(batch_file_path, "r") as batch_file:
            fieldnames = ["target", "sentence"]
            csvreader = DictReader(batch_file, fieldnames=fieldnames)
            for row in csvreader:
                feat_dict["targets"].append(row["target"].strip())
                feat_dict["sentences"].append(row["sentence"].strip())
    else:
        feat_dict["targets"].append(target)
        feat_dict["sentences"].append(sentence)

    l_ctxts, trgs, r_ctxts = FeatureProvider.partition_sentences(
        sentences=feat_dict["sentences"],
        targets=feat_dict["targets"],
        offsets=FeatureProvider.get_target_offset_array(feat_dict),
    )
    l_enc = [
        FeatureProvider.tf_encode_tokens(tokens)
        for tokens in FeatureProvider.tokenize_phrases(l_ctxts)
    ]
    trg_enc = [
        FeatureProvider.tf_encode_tokens(tokens)
        for tokens in FeatureProvider.tokenize_phrases(trgs)
    ]
    r_enc = [
        FeatureProvider.tf_encode_tokens(tokens)
        for tokens in FeatureProvider.tokenize_phrases(r_ctxts)
    ]

    tf_examples = []

    for left, target, right in zip(l_enc, trg_enc, r_enc):
        features = Features(
            feature={
                "left": Feature(bytes_list=BytesList(value=left)),
                "target": Feature(bytes_list=BytesList(value=target)),
                "right": Feature(bytes_list=BytesList(value=right)),
            }
        )
        tf_example = Example(features=features)
        tf_examples.append(tf_example.SerializeToString())

    tensor_proto = make_tensor_proto(
        tf_examples, dtype=tf_string, shape=[len(tf_examples)]
    )

    channel = insecure_channel("127.0.0.1:8500")
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

    # CLASSIFICATION
    classification_req = ClassificationRequest()
    inputs = Input(example_list=ExampleList(examples=[tf_example]))
    classification_req.input.CopyFrom(inputs)  # pylint: disable=E1101
    classification_req.model_spec.name = "lg"  # pylint: disable=E1101
    classification = stub.Classify(classification_req, 60.0)
    print(classification)

    # PREDICTION
    prediction_req = PredictRequest()
    prediction_req.inputs["instances"].CopyFrom(  # pylint: disable=E1101
        tensor_proto
    )
    prediction_req.model_spec.signature_name = (  # pylint: disable=E1101
        signature
    )
    prediction_req.model_spec.name = model  # pylint: disable=E1101
    prediction = stub.Predict(prediction_req, 60.0)
    print(prediction)
Ejemplo n.º 11
0
 def _bytes_feature(value):
     """Returns a bytes_list from a string / byte."""
     if isinstance(value, type(tf.constant(0))):
         value = value.numpy(
         )  # BytesList won't unpack a string from an EagerTensor.
     return Feature(bytes_list=BytesList(value=[value]))
Ejemplo n.º 12
0
def convert_to_example(
    adj,
    feature,
    label_data=None,
    label_mask=None,
):
    """
    Writes graph related data to disk.
    """
    adj_row, adj_col = np.nonzero(adj)
    adj_values = adj[adj_row, adj_col]
    adj_elem_len = len(adj_row)
    degrees = np.sum(adj, 0)
    adj_degrees = []
    for ar, ac in zip(adj_row, adj_col):
        if ar == ac:
            adj_degrees.append(0)
        else:
            adj_degrees.append(int(degrees[ar]))
    feature = np.array(feature)
    feature_row, feature_col = np.nonzero(feature)
    feature_values = feature[feature_row, feature_col]
    feature_elem_len = len(feature_row)
    feature = {
        'adj_row': Feature(int64_list=Int64List(value=list(adj_row))),
        'adj_column': Feature(int64_list=Int64List(value=list(adj_col))),
        'adj_values': Feature(float_list=FloatList(value=list(adj_values))),
        'adj_elem_len': Feature(int64_list=Int64List(value=[adj_elem_len])),
        'adj_degrees': Feature(int64_list=Int64List(value=adj_degrees)),
        'feature_row': Feature(int64_list=Int64List(value=list(feature_row))),
        'feature_column':
        Feature(int64_list=Int64List(value=list(feature_col))),
        'feature_values':
        Feature(float_list=FloatList(value=list(feature_values))),
        'feature_elem_len':
        Feature(int64_list=Int64List(value=[feature_elem_len])),
        'size': Feature(int64_list=Int64List(value=list(feature.shape)))
    }
    if label_data is not None:
        label_data = np.nan_to_num(label_data)
        feature['label'] = Feature(int64_list=Int64List(
            value=label_data.astype(int)))
        feature['mask_label'] = Feature(int64_list=Int64List(
            value=label_mask.astype(int))),
    features = Features(feature=feature)
    ex = Example(features=features)
    return ex.SerializeToString()
Ejemplo n.º 13
0
 def _create_int_feature(self, values):
     return Feature(int64_list=Int64List(value=list(values)))
Ejemplo n.º 14
0
def serialise(data):
    
    ID,pos, dimensions, color, border, fill, text, img, seq_len, seq_mask = data['ID'], data['pos'], data['dimensions'], data['color'], \
                                    data['border'], data['fill'], data['text'], data['img'], \
                                    int(data['seq_len']), data['seq_mask'] \

    ID = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(ID).numpy(),]))
    pos = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(pos, tf.float32)).numpy(),]))
    dimensions = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(dimensions, tf.float32)).numpy(),]))
    color = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(color, tf.float32)).numpy(),]))
    border = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(border, tf.float32)).numpy(),]))
    fill = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(fill, tf.float32)).numpy(),]))
    text = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(text, tf.float32)).numpy(),]))
    img = Feature(bytes_list=BytesList(value=[img.numpy(),]))
    seq_len =  Feature(int64_list=Int64List(value=[seq_len,]))
    seq_mask = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(seq_mask).numpy(),]))
    # img is already serialised because we never decode it!
    
    features = Features(feature={
                'ID' : ID,
                'pos' : pos,
                'dimensions' : dimensions,
                'color' : color,
                'border' : border,
                'fill' : fill,
                'text' : text,
                'img': img,
                'seq_len':seq_len,
                'seq_mask':seq_mask,
                })
    
    example = Example(features=features)
    
    return example.SerializeToString()
Ejemplo n.º 15
0
def serialise(data):

    obs, acts, goals, seq_lens, masks, dataset_path, tstep_idxs , imgs , goal_imgs, proprioceptive_features = data['obs'], \
    data['acts'], data['goals'], data['seq_lens'], data['masks'], data['dataset_path'], data['tstep_idxs'], data['imgs'], data['goal_imgs'], data['proprioceptive_features']

    # obs (1, 40, 18)
    # acts (1, 40, 7)
    # goals (1, 40, 11)
    # seq_lens (1,)
    # masks (1, 40)
    # dataset_path (1, 40)
    # tstep_idxs (1, 40)
    # imgs (1, 40, 200, 200, 3)
    # goal_imgs (1, 40, 200, 200, 3)
    # proprioceptive_features (1, 40, 7)

    goal_imgs = tf.expand_dims(
        goal_imgs[:, 0, :, :, :],
        1)  # crete a :, 1, :,:,: shaped goal images for less file IO

    obs = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(obs)).numpy(),
    ]))
    acts = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(acts)).numpy(),
    ]))
    goals = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(goals)).numpy(),
    ]))
    seq_lens = Feature(int64_list=Int64List(value=[
        seq_lens,
    ]))
    masks = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(masks)).numpy(),
    ]))

    imgs = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(imgs)).numpy(),
    ]))
    goal_imgs = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(goal_imgs)).numpy(),
    ]))
    proprioceptive_features = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(proprioceptive_features)).numpy(),
    ]))

    features = Features(
        feature={
            'obs': obs,
            'acts': acts,
            'goals': goals,
            'seq_lens': seq_lens,
            'masks': masks,
            'imgs': imgs,
            'goal_imgs': goal_imgs,
            'proprioceptive_features': proprioceptive_features
        })

    example = Example(features=features)

    return example.SerializeToString()


# Sample Usage
# r = lfp.data.PlayDataloader(include_imgs = args.images, batch_size=1,  window_size=args.window_size_max, min_window_size=args.window_size_min)
# rd = r.extract(TRAIN_DATA_PATHS, from_tfrecords=args.from_tfrecords)
# rd = r.load(rd)
# r_it = iter(rd)

# @tf.function
# def sample():
#   return r_it.next()

# data_paths = [str(STORAGE_PATH/'precompute')+f"/{x}.tfrecords" for x in range(0,8)]
# #@title write to gcs
# from tqdm import tqdm
# for path in data_paths:
#   with tf.io.TFRecordWriter(path) as file_writer:
#     print(path)
#     for i in tqdm(range(0,200)):
#         byte_stream = serialise(sample())
#         file_writer.write(byte_stream)
Ejemplo n.º 16
0
Archivo: prep.py Proyecto: wibrow/kGCN
def write_to_tfrecords(adj, feature, label_data, label_mask, tfrname):
    """
    Writes graph related data to disk.
    """
    adj_row, adj_col = np.nonzero(adj)
    adj_values = adj[adj_row, adj_col]
    adj_elem_len = len(adj_row)
    feature = np.array(feature)
    feature_row, feature_col = np.nonzero(feature)
    feature_values = feature[feature_row, feature_col]
    feature_elem_len = len(feature_row)
    features = Features(
        feature={
            'label':
            Feature(int64_list=Int64List(value=label_data)),
            'mask_label':
            Feature(int64_list=Int64List(value=label_mask)),
            'adj_row':
            Feature(int64_list=Int64List(value=list(adj_row))),
            'adj_column':
            Feature(int64_list=Int64List(value=list(adj_col))),
            'adj_values':
            Feature(float_list=FloatList(value=list(adj_values))),
            'adj_elem_len':
            Feature(int64_list=Int64List(value=[adj_elem_len])),
            'feature_row':
            Feature(int64_list=Int64List(value=list(feature_row))),
            'feature_column':
            Feature(int64_list=Int64List(value=list(feature_col))),
            'feature_values':
            Feature(float_list=FloatList(value=list(feature_values))),
            'feature_elem_len':
            Feature(int64_list=Int64List(value=[feature_elem_len])),
            'size':
            Feature(int64_list=Int64List(value=list(feature.shape)))
        })
    ex = Example(features=features)
    with TFRecordWriter(tfrname) as single_writer:
        single_writer.write(ex.SerializeToString())