Esempio n. 1
0
  def _get_array_attributes(self, prefix=''):
    """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples.

    Parameters
    ----------
    prefix : str
      Any additional prefix string/dictionary keys start with. Defaults to no additional prefix.

    Returns
    -------
    array_attributes : dict
      The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example.

    """
    att_dict = {}
    att_dict['nums'] = {
      'shape': list([self.top_frequencies]),
      'tf_type': feat.select_tf_dtype(self.dtype),
      'size': feat.size_from_shape([self.top_frequencies]),
      'feature_func': feat.select_feature_func(self.dtype),
      'np_type': self.dtype
    }
    att_dict['amps'] = {
      'shape': [],
      'tf_type': feat.select_tf_dtype(self.dtype),
      'size': feat.size_from_shape([]),
      'feature_func': feat.select_feature_func(self.dtype),
      'np_type': self.dtype
    }
    att_dict['div'] = {
      'shape': list([self.top_frequencies]),
      'tf_type': feat.select_tf_dtype(self.dtype),
      'size': feat.size_from_shape([self.top_frequencies]),
      'feature_func': feat.select_feature_func(self.dtype),
      'np_type': self.dtype
    }
    att_dict['nats'] = {
      'shape': list([len(self.cols)]),
      'tf_type': tf.int64,
      'size': feat.size_from_shape([len(self.cols)]),
      'feature_func': feat._int_feat,
      'np_type': np.bool
    }
    att_dict['diff'] = {
      'shape': list([len(self.cols)]),
      'tf_type': tf.int64,
      'size': feat.size_from_shape([len(self.cols)]),
      'feature_func': feat._int_feat,
      'np_type': np.int64
    }

    att_dict = self._pre(att_dict, prefix)
    return att_dict
Esempio n. 2
0
    def _get_feature_dicts(self, tap_dict):
        func_dict = {}
        feature_dict = {}
        for key in tap_dict:
            array = tap_dict[key]
            num_examples = array.shape[0]

            if array.shape[0] != num_examples:
                raise ValueError(
                    "All arrays must have the same size first dimesion in order to split them up into individual examples"
                )

            if array.dtype not in (np.int32, np.int64, np.bool, np.float32,
                                   np.float64) and array.dtype.type not in (
                                       np.string_, np.unicode_):
                raise TypeError(
                    "Only string and number types are supported. Got " +
                    str(array.dtype))
            feature_dict[key] = {}
            feature_dict[key]['np_dtype'] = array.dtype
            feature_dict[key]['tf_dtype'] = feat.select_tf_dtype(array.dtype)
            feature_dict[key]['shape'] = array.shape[1:]
            func_dict[key] = feat.select_feature_func(array.dtype)

        return feature_dict, func_dict
Esempio n. 3
0
    def write_examples(self, array, file_name):
        """Pours the array then writes the examples to tfrecords. It creates one example per 'row', i.e. axis=0 of the arrays. All arrays must have the same axis=0 dimension and must be of a type that can be written to a tfrecord

    Parameters
    ----------
    array : np.ndarray
      The array to transform to examples, then write to disk.
    file_name : str
      The name of the tfrecord file to write to.

    """
        writer = tf.python_io.TFRecordWriter(file_name)
        tap_dict = self.pour(array)

        att_dict = {}

        num_examples = None
        for key in tap_dict:
            array = tap_dict[key]

            if num_examples is None:
                num_examples = array.shape[0]

            if array.shape[0] != num_examples:
                raise ValueError(
                    "All arrays must have the same size first dimesion in order to split them up into individual examples"
                )

            if array.dtype not in (np.int32, np.int64, np.bool, np.float32,
                                   np.float64) and array.dtype.type not in (
                                       np.string_, np.unicode_):
                raise TypeError(
                    "Only string and number types are supported. Got " +
                    str(array.dtype))

            att_dict[key]['dtype'] = str(array.dtype)
            att_dict[key]['shape'] = list(array.shape[1:])
            att_dict[key]['size'] = np.prod(att_dict[key]['shape'])
            att_dict[key]['feature_func'] = feat.select_feature_func(
                array.dtype)

        for row_num in xrange(num_examples):
            example_dict = {}
            for key in tap_dict:
                flat = tap_dict[key][row_num].flatten()

                example_dict[os.path.join(
                    key, 'vals')] = att_dict[key]['feature_func'](flat)

                example_dict[os.path.join(key, 'shape')] = feat._int_feat(
                    att_dict[key]['shape'])

            example = tf.train.Example(features=tf.train.Features(
                feature=example_dict))
            writer.write(example.SerializeToString())

        writer.close()
Esempio n. 4
0
    def _get_array_attributes(self, prefix=''):
        """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples.

    Parameters
    ----------
    prefix : str
      Any additional prefix string/dictionary keys start with. Defaults to no additional prefix.

    Returns
    -------
    dict
      The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example.

    """
        att_dict = {}
        att_dict['missing_vals'] = {
            'shape': list(self.input_shape[1:]),
            'tf_type': feat.select_tf_dtype(self.input_dtype),
            'size': feat.size_from_shape(self.input_shape[1:]),
            'feature_func': feat.select_feature_func(self.input_dtype),
            'np_type': self.input_dtype
        }
        one_hots_shape = list(
            self.input_shape[1:]) + [len(self.index_to_cat_val)]
        att_dict['one_hots'] = {
            'shape': one_hots_shape,
            'tf_type': feat.select_tf_dtype(self.dtype),
            'size': feat.size_from_shape(one_hots_shape),
            'feature_func': feat.select_feature_func(self.dtype),
            'np_type': self.dtype
        }
        att_dict['indices'] = {
            'shape': list(self.input_shape[1:]),
            'tf_type': tf.int64,
            'size': feat.size_from_shape(self.input_shape[1:]),
            'feature_func': feat._int_feat,
            'np_type': np.int64
        }

        att_dict = self._pre(att_dict, prefix)
        return att_dict
Esempio n. 5
0
    def _get_array_attributes(self, prefix=''):
        """Get the dictionary that contain the original shapes of the arrays before being converted into tfrecord examples.

    Parameters
    ----------
    prefix : str
      Any additional prefix string/dictionary keys start with. Defaults to no additional prefix.

    Returns
    -------
    dict
      The dictionary with keys equal to those that are found in the Transform's example dicts and values are the shapes of the arrays of a single example.

    """
        att_dict = {}
        att_dict['nums'] = {
            'shape':
            list(self.input_shape[1:]),
            'tf_type':
            feat.select_tf_dtype(self.dtype),
            'size':
            feat.size_from_shape(self.input_shape[1:]),
            'feature_func':
            feat.select_feature_func(self.dtype),
            'np_type':
            np.int64 if self.input_dtype in (int, np.int32,
                                             np.int64) else np.float32
            # 'np_type': self.dtype
        }
        att_dict['nans'] = {
            'shape': list(self.input_shape[1:]),
            'tf_type': tf.int64,
            'size': feat.size_from_shape(self.input_shape[1:]),
            'feature_func': feat._int_feat,
            'np_type': np.bool
        }

        att_dict = self._pre(att_dict, prefix)
        return att_dict