def _get_array_attributes(self, prefix=''): """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples. Parameters ---------- prefix : str Any additional prefix string/dictionary keys start with. Defaults to no additional prefix. Returns ------- array_attributes : dict The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example. """ att_dict = {} att_dict['nums'] = { 'shape': list([self.top_frequencies]), 'tf_type': feat.select_tf_dtype(self.dtype), 'size': feat.size_from_shape([self.top_frequencies]), 'feature_func': feat.select_feature_func(self.dtype), 'np_type': self.dtype } att_dict['amps'] = { 'shape': [], 'tf_type': feat.select_tf_dtype(self.dtype), 'size': feat.size_from_shape([]), 'feature_func': feat.select_feature_func(self.dtype), 'np_type': self.dtype } att_dict['div'] = { 'shape': list([self.top_frequencies]), 'tf_type': feat.select_tf_dtype(self.dtype), 'size': feat.size_from_shape([self.top_frequencies]), 'feature_func': feat.select_feature_func(self.dtype), 'np_type': self.dtype } att_dict['nats'] = { 'shape': list([len(self.cols)]), 'tf_type': tf.int64, 'size': feat.size_from_shape([len(self.cols)]), 'feature_func': feat._int_feat, 'np_type': np.bool } att_dict['diff'] = { 'shape': list([len(self.cols)]), 'tf_type': tf.int64, 'size': feat.size_from_shape([len(self.cols)]), 'feature_func': feat._int_feat, 'np_type': np.int64 } att_dict = self._pre(att_dict, prefix) return att_dict
def _get_feature_dicts(self, tap_dict): func_dict = {} feature_dict = {} for key in tap_dict: array = tap_dict[key] num_examples = array.shape[0] if array.shape[0] != num_examples: raise ValueError( "All arrays must have the same size first dimesion in order to split them up into individual examples" ) if array.dtype not in (np.int32, np.int64, np.bool, np.float32, np.float64) and array.dtype.type not in ( np.string_, np.unicode_): raise TypeError( "Only string and number types are supported. Got " + str(array.dtype)) feature_dict[key] = {} feature_dict[key]['np_dtype'] = array.dtype feature_dict[key]['tf_dtype'] = feat.select_tf_dtype(array.dtype) feature_dict[key]['shape'] = array.shape[1:] func_dict[key] = feat.select_feature_func(array.dtype) return feature_dict, func_dict
def write_examples(self, array, file_name): """Pours the array then writes the examples to tfrecords. It creates one example per 'row', i.e. axis=0 of the arrays. All arrays must have the same axis=0 dimension and must be of a type that can be written to a tfrecord Parameters ---------- array : np.ndarray The array to transform to examples, then write to disk. file_name : str The name of the tfrecord file to write to. """ writer = tf.python_io.TFRecordWriter(file_name) tap_dict = self.pour(array) att_dict = {} num_examples = None for key in tap_dict: array = tap_dict[key] if num_examples is None: num_examples = array.shape[0] if array.shape[0] != num_examples: raise ValueError( "All arrays must have the same size first dimesion in order to split them up into individual examples" ) if array.dtype not in (np.int32, np.int64, np.bool, np.float32, np.float64) and array.dtype.type not in ( np.string_, np.unicode_): raise TypeError( "Only string and number types are supported. Got " + str(array.dtype)) att_dict[key]['dtype'] = str(array.dtype) att_dict[key]['shape'] = list(array.shape[1:]) att_dict[key]['size'] = np.prod(att_dict[key]['shape']) att_dict[key]['feature_func'] = feat.select_feature_func( array.dtype) for row_num in xrange(num_examples): example_dict = {} for key in tap_dict: flat = tap_dict[key][row_num].flatten() example_dict[os.path.join( key, 'vals')] = att_dict[key]['feature_func'](flat) example_dict[os.path.join(key, 'shape')] = feat._int_feat( att_dict[key]['shape']) example = tf.train.Example(features=tf.train.Features( feature=example_dict)) writer.write(example.SerializeToString()) writer.close()
def _get_array_attributes(self, prefix=''): """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples. Parameters ---------- prefix : str Any additional prefix string/dictionary keys start with. Defaults to no additional prefix. Returns ------- dict The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example. """ att_dict = {} att_dict['missing_vals'] = { 'shape': list(self.input_shape[1:]), 'tf_type': feat.select_tf_dtype(self.input_dtype), 'size': feat.size_from_shape(self.input_shape[1:]), 'feature_func': feat.select_feature_func(self.input_dtype), 'np_type': self.input_dtype } one_hots_shape = list( self.input_shape[1:]) + [len(self.index_to_cat_val)] att_dict['one_hots'] = { 'shape': one_hots_shape, 'tf_type': feat.select_tf_dtype(self.dtype), 'size': feat.size_from_shape(one_hots_shape), 'feature_func': feat.select_feature_func(self.dtype), 'np_type': self.dtype } att_dict['indices'] = { 'shape': list(self.input_shape[1:]), 'tf_type': tf.int64, 'size': feat.size_from_shape(self.input_shape[1:]), 'feature_func': feat._int_feat, 'np_type': np.int64 } att_dict = self._pre(att_dict, prefix) return att_dict
def _get_array_attributes(self, prefix=''): """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples. Parameters ---------- prefix : str Any additional prefix string/dictionary keys start with. Defaults to no additional prefix. Returns ------- dict The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example. """ att_dict = {} att_dict['nums'] = { 'shape': list(self.input_shape[1:]), 'tf_type': feat.select_tf_dtype(self.dtype), 'size': feat.size_from_shape(self.input_shape[1:]), 'feature_func': feat.select_feature_func(self.dtype), 'np_type': np.int64 if self.input_dtype in (int, np.int32, np.int64) else np.float32 # 'np_type': self.dtype } att_dict['nans'] = { 'shape': list(self.input_shape[1:]), 'tf_type': tf.int64, 'size': feat.size_from_shape(self.input_shape[1:]), 'feature_func': feat._int_feat, 'np_type': np.bool } att_dict = self._pre(att_dict, prefix) return att_dict