コード例 #1
0
    def rollout_to_protobuf(document_words, document_tags):
        print("document_words")
        print(document_words)
        print("document_tags")
        print(document_tags)
        input_ids, input_masks, y_masks, text, ys = tokenize([document_words],
                                                             [document_tags])

        print("text")
        print(text)
        example_proto = Example(features=Features(
            feature={
                'text':
                Feature(bytes_list=tf.train.BytesList(
                    value=[w.encode('utf-8') for w in text[0]])),
                'input_ids':
                Feature(int64_list=tf.train.Int64List(value=input_ids)),
                'input_masks':
                Feature(int64_list=tf.train.Int64List(value=input_masks)),
                'y_masks':
                Feature(int64_list=tf.train.Int64List(value=y_masks)),
                'labels':
                Feature(int64_list=tf.train.Int64List(value=ys))
            }))
        writer.write(example_proto.SerializeToString())
コード例 #2
0
def make_tf_examples(string_features, int_features, labels):
    int_features += [[label] for label in zero_norm_labels(labels)]
    string_features = [
        Feature(bytes_list=BytesList(value=val)) for val in string_features
    ]
    int_features = [
        Feature(int64_list=Int64List(value=val)) for val in int_features
    ]
    all_features = string_features + int_features
    return [
        Example(features=Features(
            feature={
                "left": left,
                "target": target,
                "right": right,
                "left_ids": left_ids,
                "target_ids": target_ids,
                "right_ids": right_ids,
                "labels": label,
            })) for (
                left,
                target,
                right,
                left_ids,
                target_ids,
                right_ids,
                label,
            ) in zip(*split_list(all_features, parts=7))
    ]
コード例 #3
0
ファイル: preprocessing.py プロジェクト: George0828Zhang/NLP
def create_examples(data,
                    bert_client,
                    training=True,
                    label2int=None,
                    class_weight=None):
    """
    data: pd.DataFrame
    label2int: dict
    class_weight: list

    yield examples
    """
    idx_start = data.index[0]

    A_encoded = bert_client.encode(data['title1_en'].tolist())
    B_encoded = bert_client.encode(data['title2_en'].tolist())

    for i in range(len(data)):
        feature = {
            'A_encoded': Feature(float_list=FloatList(value=A_encoded[i])),
            'B_encoded': Feature(float_list=FloatList(value=B_encoded[i]))
        }
        if training:
            label = label2int[data.loc[idx_start + i, 'label']]
            feature['label'] = Feature(int64_list=Int64List(value=[label]))
            feature['class_weight'] = Feature(float_list=FloatList(
                value=[class_weight[label]]))
        else:
            feature['id'] = Feature(int64_list=Int64List(
                value=[data.loc[idx_start + i, 'id']]))

        yield Example(features=Features(feature=feature))
コード例 #4
0
def serialise_traj(data):
    
    features = {k: Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(v).numpy(),])) for k,v in data.items() if k not in ['seq_lens']}
    features['seq_lens'] =  Feature(int64_list=Int64List(value=[data['seq_lens'],]))

    example = Example(features=Features(feature=features))
    
    
    return example.SerializeToString()
コード例 #5
0
def serialise_vid(data):
    
    # seq_lens, masks, imgs, goal_imgs,label, label_embedding, tag = data['seq_lens'], data['masks'], data['imgs'], data['goal_imgs'], data['label'], data['label_embedding'], data['tag']
    
    features = {k: Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(v).numpy(),])) for k,v in data.items() if k not in ['seq_lens']}
    features['seq_lens'] =  Feature(int64_list=Int64List(value=[data['seq_lens'],]))

    example = Example(features=Features(features))
    
    
    return example.SerializeToString()
コード例 #6
0
ファイル: Data.py プロジェクト: Mr-Anyone/UNet
def save(mask, img, save_dir, count=0):
    mask = tf.io.encode_jpeg(mask)
    img = tf.io.encode_jpeg(img)

    Image_Buffer = Example(features=Features(
        feature={
            "Image": Feature(bytes_list=BytesList(value=[img.numpy()])),
            "Mask": Feature(bytes_list=BytesList(value=[mask.numpy()]))
        }))

    with tf.io.TFRecordWriter(os.path.join(save_dir,
                                           f"Data-{count}.tfrecord")) as f:
        f.write(Image_Buffer.SerializeToString())
コード例 #7
0
 def create_example(features: np.ndarray, label: np.int32):
     return Example(features=Features(
         feature={
             "features":
             NumpyToRecordConverter._bytes_feature(
                 tf.io.serialize_tensor(features)),
             "label":
             Feature(int64_list=Int64List(value=[label]))
         })).SerializeToString()
コード例 #8
0
ファイル: client_t2t.py プロジェクト: jjuraska/slug2slug
    def __encode_input(self, mr, input_encoder):
        """Encodes the input, and creates a TF Example record out of it."""

        input_ids = input_encoder.encode(mr)
        input_ids.append(text_encoder.EOS_ID)

        features = {'inputs': Feature(int64_list=Int64List(value=input_ids))}

        example = Example(features=Features(feature=features))

        return example.SerializeToString()
コード例 #9
0
def get_cycle_example(cell_value, summary_idx, cycle_idx, scaling_factors):
    """
    Define the columns that should be written to tfrecords and converts the raw data
    to "Example" objects. Every Example contains data from one charging cycle.
    The data is scaled (divided) by the corresponding values in "scaling_factors".
    """
    # Summary feature values (scalars --> have to be wrapped in lists)
    ir_value = [
        cell_value["summary"][cst.INTERNAL_RESISTANCE_NAME][summary_idx] /
        scaling_factors[cst.INTERNAL_RESISTANCE_NAME]
    ]
    qd_value = [
        cell_value["summary"][cst.QD_NAME][summary_idx] /
        scaling_factors[cst.QD_NAME]
    ]
    rc_value = [
        cell_value["summary"][cst.REMAINING_CYCLES_NAME][summary_idx] /
        scaling_factors[cst.REMAINING_CYCLES_NAME]
    ]
    dt_value = [
        cell_value["summary"][cst.DISCHARGE_TIME_NAME][summary_idx] /
        scaling_factors[cst.DISCHARGE_TIME_NAME]
    ]
    cc_value = [float(cycle_idx) / scaling_factors[cst.REMAINING_CYCLES_NAME]
                ]  # Same scale --> same scaling factor

    # Detail feature values (arrays)
    qdlin_value = cell_value["cycles"][cycle_idx][
        cst.QDLIN_NAME] / scaling_factors[cst.QDLIN_NAME]
    tdlin_value = cell_value["cycles"][cycle_idx][
        cst.TDLIN_NAME] / scaling_factors[cst.TDLIN_NAME]

    # Wrapping as example
    cycle_example = Example(features=Features(
        feature={
            cst.INTERNAL_RESISTANCE_NAME:
            Feature(float_list=FloatList(value=ir_value)),
            cst.QD_NAME:
            Feature(float_list=FloatList(value=qd_value)),
            cst.REMAINING_CYCLES_NAME:
            Feature(float_list=FloatList(value=rc_value)),
            cst.DISCHARGE_TIME_NAME:
            Feature(float_list=FloatList(value=dt_value)),
            cst.QDLIN_NAME:
            Feature(float_list=FloatList(value=qdlin_value)),
            cst.TDLIN_NAME:
            Feature(float_list=FloatList(value=tdlin_value)),
            cst.CURRENT_CYCLE_NAME:
            Feature(float_list=FloatList(value=cc_value))
        }))
    return cycle_example
コード例 #10
0
def main():

    model, signature, batch_file_path, sentence, target = parse_args()

    feat_dict = {"sentences": [], "targets": []}

    if batch_file_path is not None:
        with open(batch_file_path, "r") as batch_file:
            fieldnames = ["target", "sentence"]
            csvreader = DictReader(batch_file, fieldnames=fieldnames)
            for row in csvreader:
                feat_dict["targets"].append(row["target"].strip())
                feat_dict["sentences"].append(row["sentence"].strip())
    else:
        feat_dict["targets"].append(target)
        feat_dict["sentences"].append(sentence)

    l_ctxts, trgs, r_ctxts = FeatureProvider.partition_sentences(
        sentences=feat_dict["sentences"],
        targets=feat_dict["targets"],
        offsets=FeatureProvider.get_target_offset_array(feat_dict),
    )
    l_enc = [
        FeatureProvider.tf_encode_tokens(tokens)
        for tokens in FeatureProvider.tokenize_phrases(l_ctxts)
    ]
    trg_enc = [
        FeatureProvider.tf_encode_tokens(tokens)
        for tokens in FeatureProvider.tokenize_phrases(trgs)
    ]
    r_enc = [
        FeatureProvider.tf_encode_tokens(tokens)
        for tokens in FeatureProvider.tokenize_phrases(r_ctxts)
    ]

    tf_examples = []

    for left, target, right in zip(l_enc, trg_enc, r_enc):
        features = Features(
            feature={
                "left": Feature(bytes_list=BytesList(value=left)),
                "target": Feature(bytes_list=BytesList(value=target)),
                "right": Feature(bytes_list=BytesList(value=right)),
            }
        )
        tf_example = Example(features=features)
        tf_examples.append(tf_example.SerializeToString())

    tensor_proto = make_tensor_proto(
        tf_examples, dtype=tf_string, shape=[len(tf_examples)]
    )

    channel = insecure_channel("127.0.0.1:8500")
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

    # CLASSIFICATION
    classification_req = ClassificationRequest()
    inputs = Input(example_list=ExampleList(examples=[tf_example]))
    classification_req.input.CopyFrom(inputs)  # pylint: disable=E1101
    classification_req.model_spec.name = "lg"  # pylint: disable=E1101
    classification = stub.Classify(classification_req, 60.0)
    print(classification)

    # PREDICTION
    prediction_req = PredictRequest()
    prediction_req.inputs["instances"].CopyFrom(  # pylint: disable=E1101
        tensor_proto
    )
    prediction_req.model_spec.signature_name = (  # pylint: disable=E1101
        signature
    )
    prediction_req.model_spec.name = model  # pylint: disable=E1101
    prediction = stub.Predict(prediction_req, 60.0)
    print(prediction)
コード例 #11
0
 def _bytes_feature(value):
     """Returns a bytes_list from a string / byte."""
     if isinstance(value, type(tf.constant(0))):
         value = value.numpy(
         )  # BytesList won't unpack a string from an EagerTensor.
     return Feature(bytes_list=BytesList(value=[value]))
コード例 #12
0
ファイル: utils.py プロジェクト: celerinoue/kGCN
def convert_to_example(
    adj,
    feature,
    label_data=None,
    label_mask=None,
):
    """
    Writes graph related data to disk.
    """
    adj_row, adj_col = np.nonzero(adj)
    adj_values = adj[adj_row, adj_col]
    adj_elem_len = len(adj_row)
    degrees = np.sum(adj, 0)
    adj_degrees = []
    for ar, ac in zip(adj_row, adj_col):
        if ar == ac:
            adj_degrees.append(0)
        else:
            adj_degrees.append(int(degrees[ar]))
    feature = np.array(feature)
    feature_row, feature_col = np.nonzero(feature)
    feature_values = feature[feature_row, feature_col]
    feature_elem_len = len(feature_row)
    feature = {
        'adj_row': Feature(int64_list=Int64List(value=list(adj_row))),
        'adj_column': Feature(int64_list=Int64List(value=list(adj_col))),
        'adj_values': Feature(float_list=FloatList(value=list(adj_values))),
        'adj_elem_len': Feature(int64_list=Int64List(value=[adj_elem_len])),
        'adj_degrees': Feature(int64_list=Int64List(value=adj_degrees)),
        'feature_row': Feature(int64_list=Int64List(value=list(feature_row))),
        'feature_column':
        Feature(int64_list=Int64List(value=list(feature_col))),
        'feature_values':
        Feature(float_list=FloatList(value=list(feature_values))),
        'feature_elem_len':
        Feature(int64_list=Int64List(value=[feature_elem_len])),
        'size': Feature(int64_list=Int64List(value=list(feature.shape)))
    }
    if label_data is not None:
        label_data = np.nan_to_num(label_data)
        feature['label'] = Feature(int64_list=Int64List(
            value=label_data.astype(int)))
        feature['mask_label'] = Feature(int64_list=Int64List(
            value=label_mask.astype(int))),
    features = Features(feature=feature)
    ex = Example(features=features)
    return ex.SerializeToString()
コード例 #13
0
ファイル: emotion_model.py プロジェクト: jakeane/emotion_api
 def _create_int_feature(self, values):
     return Feature(int64_list=Int64List(value=list(values)))
コード例 #14
0
def serialise(data):
    
    ID,pos, dimensions, color, border, fill, text, img, seq_len, seq_mask = data['ID'], data['pos'], data['dimensions'], data['color'], \
                                    data['border'], data['fill'], data['text'], data['img'], \
                                    int(data['seq_len']), data['seq_mask'] \

    ID = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(ID).numpy(),]))
    pos = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(pos, tf.float32)).numpy(),]))
    dimensions = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(dimensions, tf.float32)).numpy(),]))
    color = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(color, tf.float32)).numpy(),]))
    border = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(border, tf.float32)).numpy(),]))
    fill = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(fill, tf.float32)).numpy(),]))
    text = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(tf.cast(text, tf.float32)).numpy(),]))
    img = Feature(bytes_list=BytesList(value=[img.numpy(),]))
    seq_len =  Feature(int64_list=Int64List(value=[seq_len,]))
    seq_mask = Feature(bytes_list=BytesList(value=[tf.io.serialize_tensor(seq_mask).numpy(),]))
    # img is already serialised because we never decode it!
    
    features = Features(feature={
                'ID' : ID,
                'pos' : pos,
                'dimensions' : dimensions,
                'color' : color,
                'border' : border,
                'fill' : fill,
                'text' : text,
                'img': img,
                'seq_len':seq_len,
                'seq_mask':seq_mask,
                })
    
    example = Example(features=features)
    
    return example.SerializeToString()
コード例 #15
0
def serialise(data):

    obs, acts, goals, seq_lens, masks, dataset_path, tstep_idxs , imgs , goal_imgs, proprioceptive_features = data['obs'], \
    data['acts'], data['goals'], data['seq_lens'], data['masks'], data['dataset_path'], data['tstep_idxs'], data['imgs'], data['goal_imgs'], data['proprioceptive_features']

    # obs (1, 40, 18)
    # acts (1, 40, 7)
    # goals (1, 40, 11)
    # seq_lens (1,)
    # masks (1, 40)
    # dataset_path (1, 40)
    # tstep_idxs (1, 40)
    # imgs (1, 40, 200, 200, 3)
    # goal_imgs (1, 40, 200, 200, 3)
    # proprioceptive_features (1, 40, 7)

    goal_imgs = tf.expand_dims(
        goal_imgs[:, 0, :, :, :],
        1)  # crete a :, 1, :,:,: shaped goal images for less file IO

    obs = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(obs)).numpy(),
    ]))
    acts = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(acts)).numpy(),
    ]))
    goals = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(goals)).numpy(),
    ]))
    seq_lens = Feature(int64_list=Int64List(value=[
        seq_lens,
    ]))
    masks = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(masks)).numpy(),
    ]))

    imgs = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(imgs)).numpy(),
    ]))
    goal_imgs = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(goal_imgs)).numpy(),
    ]))
    proprioceptive_features = Feature(bytes_list=BytesList(value=[
        tf.io.serialize_tensor(tf.squeeze(proprioceptive_features)).numpy(),
    ]))

    features = Features(
        feature={
            'obs': obs,
            'acts': acts,
            'goals': goals,
            'seq_lens': seq_lens,
            'masks': masks,
            'imgs': imgs,
            'goal_imgs': goal_imgs,
            'proprioceptive_features': proprioceptive_features
        })

    example = Example(features=features)

    return example.SerializeToString()


# Sample Usage
# r = lfp.data.PlayDataloader(include_imgs = args.images, batch_size=1,  window_size=args.window_size_max, min_window_size=args.window_size_min)
# rd = r.extract(TRAIN_DATA_PATHS, from_tfrecords=args.from_tfrecords)
# rd = r.load(rd)
# r_it = iter(rd)

# @tf.function
# def sample():
#   return r_it.next()

# data_paths = [str(STORAGE_PATH/'precompute')+f"/{x}.tfrecords" for x in range(0,8)]
# #@title write to gcs
# from tqdm import tqdm
# for path in data_paths:
#   with tf.io.TFRecordWriter(path) as file_writer:
#     print(path)
#     for i in tqdm(range(0,200)):
#         byte_stream = serialise(sample())
#         file_writer.write(byte_stream)
コード例 #16
0
ファイル: prep.py プロジェクト: wibrow/kGCN
def write_to_tfrecords(adj, feature, label_data, label_mask, tfrname):
    """
    Writes graph related data to disk.
    """
    adj_row, adj_col = np.nonzero(adj)
    adj_values = adj[adj_row, adj_col]
    adj_elem_len = len(adj_row)
    feature = np.array(feature)
    feature_row, feature_col = np.nonzero(feature)
    feature_values = feature[feature_row, feature_col]
    feature_elem_len = len(feature_row)
    features = Features(
        feature={
            'label':
            Feature(int64_list=Int64List(value=label_data)),
            'mask_label':
            Feature(int64_list=Int64List(value=label_mask)),
            'adj_row':
            Feature(int64_list=Int64List(value=list(adj_row))),
            'adj_column':
            Feature(int64_list=Int64List(value=list(adj_col))),
            'adj_values':
            Feature(float_list=FloatList(value=list(adj_values))),
            'adj_elem_len':
            Feature(int64_list=Int64List(value=[adj_elem_len])),
            'feature_row':
            Feature(int64_list=Int64List(value=list(feature_row))),
            'feature_column':
            Feature(int64_list=Int64List(value=list(feature_col))),
            'feature_values':
            Feature(float_list=FloatList(value=list(feature_values))),
            'feature_elem_len':
            Feature(int64_list=Int64List(value=[feature_elem_len])),
            'size':
            Feature(int64_list=Int64List(value=list(feature.shape)))
        })
    ex = Example(features=features)
    with TFRecordWriter(tfrname) as single_writer:
        single_writer.write(ex.SerializeToString())