def get_example_proto(row, features): """ Get an Example protobuf from a dataframe row Args: - row: pandas DataFrame row - features: configuration for all features """ features_dict = dict() for feature_info in features: feature_name = feature_info["name"] feature_fn = _get_feature_fn(feature_info["dtype"]) # FIXME # When applying functions with axis=1, pandas performs upcasting, # so if we have a mix of floats/ints, converts everything to float # that breaks this part of the code. Example: # https://stackoverflow.com/questions/47143631/ # how-do-i-preserve-datatype-when-using-apply-row-wise-in-pandas-dataframe features_dict[feature_name] = feature_fn( [row[feature_name]] if not pd.isna(row[feature_name]) else [feature_info["default_value"]] ) return train.Example(features=train.Features(feature=features_dict))
def get_sequence_example_proto(group, context_features, sequence_features): """ Get a sequence example protobuf from a dataframe group Args: - group: pandas dataframe group - context_features: feature configuration for context - sequence_features: feature configuration for sequence """ sequence_features_dict = dict() context_features_dict = dict() for feature_info in context_features: feature_name = feature_info["name"] feature_fn = _get_feature_fn(feature_info["dtype"]) context_features_dict[feature_name] = feature_fn([group[feature_name].tolist()[0]]) for feature_info in sequence_features: feature_name = feature_info["name"] feature_fn = _get_feature_fn(feature_info["dtype"]) if feature_info["tfrecord_type"] == SequenceExampleTypeKey.SEQUENCE: sequence_features_dict[feature_name] = train.FeatureList( feature=[feature_fn(group[feature_name].tolist())] ) return train.SequenceExample( context=train.Features(feature=context_features_dict), feature_lists=train.FeatureLists(feature_list=sequence_features_dict), )
def _get_sequence_example_proto(group, feature_config: FeatureConfig): """ Get a sequence example protobuf from a dataframe group Args: - group: pandas dataframe group """ sequence_features_dict = dict() context_features_dict = dict() for feature_info in feature_config.get_context_features(): feature_name = feature_info["name"] feature_fn = _get_feature_fn(feature_info["dtype"]) context_features_dict[feature_name] = feature_fn( [group[feature_name].tolist()[0]]) for feature_info in feature_config.get_sequence_features(): feature_name = feature_info["name"] feature_fn = _get_feature_fn(feature_info["dtype"]) if feature_info["tfrecord_type"] == TFRecordTypeKey.SEQUENCE: sequence_features_dict[feature_name] = train.FeatureList( feature=[feature_fn(group[feature_name].tolist())]) sequence_example_proto = train.SequenceExample( context=train.Features(feature=context_features_dict), feature_lists=train.FeatureLists(feature_list=sequence_features_dict), ) return sequence_example_proto
def process_folder(folder_path, folder_csv, img_label, test_writer, train_writer): folder_files = filter(lambda f: f.endswith('.jpg'), listdir(folder_path)) ex_test, ex_train = 0, 0 for img_name in folder_files: img_data = folder_csv.get(img_name, None) if img_data is None: print('Unable to retrieve data for {}'.format(img_name)) else: img = load_img(path.join(folder_path, img_name), img_data) if img is not None: assert img.shape == (300, 300, 3) feature = { 'label': int64_feature(img_label), 'image': bytes_feature(img.tostring()) } example = train.Example(features=train.Features( feature=feature)) if put_in_training_set(): train_writer.write(example.SerializeToString()) ex_train += 1 else: test_writer.write(example.SerializeToString()) ex_test += 1 return ex_test, ex_train
def create_example(image_path): assert type(image_path) is types.StringType, 'image_path: passed object of incorrect type' image_data = open(image_path, 'rb').read() class_label, class_name = _get_tf_class(os.path.split(image_path)[0]) return tft.Example(features=tft.Features(feature={ 'image/label': _int64_feature(class_label), 'image/encoded': _bytes_feature(tfc.as_bytes(image_data)), }))
def record_writer(self, label, name, writer): """ Write an specific example on a writer tensor record :param label: image label :param name: image filename :param writer: tensor record object :return: """ image = load_img(name, target_size=self.shape) feature = dict(image=_bytes_feature( img_to_array(image, dtype='uint8').tostring()), label=_int64_feature(label)) features = train.Features(feature=feature) samples = train.Example(features=features) writer.write(samples.SerializeToString())
def get_example_proto(row, features): """ Get an Example protobuf from a dataframe row Args: - row: pandas DataFrame row - features: configuration for all features """ features_dict = dict() for feature_info in features: feature_name = feature_info["name"] feature_fn = _get_feature_fn(feature_info["dtype"]) features_dict[feature_name] = feature_fn([row[feature_name]]) return train.Example(features=train.Features(feature=features_dict))
def get_sequence_example_proto(group, context_features, sequence_features): """ Get a SequenceExample protobuf from a dataframe group Parameters ---------- group : pandas dataframe group context_features : dict dictionary containing the configuration for all the context features sequence_features : dict dictionary containing the configuration for all the sequence features Returns ------- `SequenceExample` object SequenceExample object loaded the dataframe group """ sequence_features_dict = dict() context_features_dict = dict() for feature_info in context_features: feature_name = feature_info["name"] feature_fn = _get_feature_fn(feature_info["dtype"]) feature_val = group[feature_name].tolist()[0] context_features_dict[feature_name] = feature_fn( feature_val if isinstance(feature_val, list) else [feature_val] ) for feature_info in sequence_features: feature_name = feature_info["name"] feature_fn = _get_feature_fn(feature_info["dtype"]) if feature_info["tfrecord_type"] == SequenceExampleTypeKey.SEQUENCE: sequence_features_dict[feature_name] = train.FeatureList( feature=[ feature_fn(f) if isinstance(f, list) else feature_fn([f]) for f in group[feature_name].tolist() ] ) return train.SequenceExample( context=train.Features(feature=context_features_dict), feature_lists=train.FeatureLists(feature_list=sequence_features_dict), )
def get_example_proto(row, features): """ Get an Example protobuf from a pandas dataframe row Parameters ---------- row : pandas DataFrame row pandas dataframe row to be converted to an example proto features : dict dictionary containing configuration for all features Returns ------- `Example` protobuffer object Example object loaded from the specified row """ features_dict = dict() for feature_info in features: feature_name = feature_info["name"] feature_fn = _get_feature_fn(feature_info["dtype"]) # FIXME # When applying functions with axis=1, pandas performs upcasting, # so if we have a mix of floats/ints, converts everything to float # that breaks this part of the code. Example: # https://stackoverflow.com/questions/47143631/ # how-do-i-preserve-datatype-when-using-apply-row-wise-in-pandas-dataframe if feature_name not in row: raise Exception( "Could not find column {} in record: {}".format(feature_name, str(row)) ) feature_val = ( row[feature_name] if not pd.isna(row[feature_name]) else feature_info["default_value"] ) features_dict[feature_name] = feature_fn( feature_val if isinstance(feature_val, list) else [feature_val] ) return train.Example(features=train.Features(feature=features_dict))