def _preprocess(self, hp, x, y, fit=False): x = nest.flatten(x) q = queue.Queue() for input_node, data in zip(self.inputs, x): q.put((input_node, data)) new_x = [] while not q.empty(): node, data = q.get() if self._is_model_inputs(node): new_x.append((self._node_to_id[node], data)) if fit: node.shape = utils.dataset_shape(data) for hypermodel in node.out_hypermodels: if isinstance(hypermodel, processor.HyperPreprocessor): if fit: hypermodel.fit(hp, data) q.put( (hypermodel.outputs[0], hypermodel.transform(hp, data))) # Sort by id. new_x = sorted(new_x, key=lambda a: a[0]) # Remove the id from the list. return_x = [] for node_id, data in new_x: self._nodes[node_id].shape = utils.dataset_shape(data) return_x.append(data) return return_x, y
def fit_before_convert(self, dataset): # If in tf.data.Dataset, must be encoded already. if isinstance(dataset, tf.data.Dataset): if not self.num_classes: shape = utils.dataset_shape(dataset)[0] # Single column with 0s and 1s. if shape == 1: self.num_classes = 2 else: self.num_classes = shape return if isinstance(dataset, pd.DataFrame): dataset = dataset.values if isinstance(dataset, pd.Series): dataset = dataset.values.reshape(-1, 1) # Not label. if len(dataset.flatten()) != len(dataset): self.num_classes = dataset.shape[1] return labels = set(dataset.flatten()) if self.num_classes is None: self.num_classes = len(labels) if self.num_classes == 2: self.label_encoder = encoders.LabelEncoder() elif self.num_classes > 2: self.label_encoder = encoders.OneHotEncoder() elif self.num_classes < 2: raise ValueError( 'Expect the target data for {name} to have ' 'at least 2 classes, but got {num_classes}.'.format( name=self.name, num_classes=self.num_classes)) self.label_encoder.fit(dataset)
def set_node_shapes(self, dataset): # TODO: Set the shapes only if they are not provided by the user when # initiating the HyperHead or Block. x_shapes, y_shapes = utils.dataset_shape(dataset) for x_shape, input_node in zip(x_shapes, self._model_inputs): input_node.shape = tuple(x_shape.as_list()) for y_shape, output_node in zip(y_shapes, self.outputs): output_node.shape = tuple(y_shape.as_list()) output_node.in_hypermodels[0].output_shape = output_node.shape
def set_io_shapes(self, dataset): """Set the input and output shapes to the nodes. Args: dataset: tf.data.Dataset. The input dataset before preprocessing. """ # TODO: Set the shapes only if they are not provided by the user when # initiating the HyperHead or Block. x_shapes, y_shapes = utils.dataset_shape(dataset) for x_shape, input_node in zip(x_shapes, self.inputs): input_node.shape = tuple(x_shape.as_list()) for y_shape, output_node in zip(y_shapes, self.outputs): output_node.shape = tuple(y_shape.as_list()) output_node.in_blocks[0].output_shape = output_node.shape
def set_io_shapes(self, dataset): """Set the input and output shapes to the nodes. # Arguments dataset: tf.data.Dataset. The input dataset before preprocessing. """ x_shapes, y_shapes = utils.dataset_shape(dataset) for x_shape, input_node in zip(x_shapes, self.inputs): if input_node.shape is None: input_node.shape = tuple(x_shape.as_list()) for y_shape, output_node in zip(y_shapes, self.outputs): if output_node.shape is None: output_node.shape = tuple(y_shape.as_list()) output_node.in_blocks[0].output_shape = output_node.shape
def fit(self, hp, data): shape = utils.dataset_shape(data) axis = tuple(range(len(shape) - 1)) def sum_up(old_state, new_elem): return old_state + new_elem def sum_up_square(old_state, new_elem): return old_state + tf.square(new_elem) num_instance = data.reduce(np.float64(0), lambda x, _: x + 1) total_sum = data.reduce(np.float64(0), sum_up) / num_instance self.mean = tf.reduce_mean(total_sum, axis=axis) total_sum_square = data.reduce(np.float64(0), sum_up_square) / num_instance square_mean = tf.reduce_mean(total_sum_square, axis=axis) self.std = tf.sqrt(square_mean - tf.square(self.mean))
def _record_dataset_shape(self, dataset): self.output_shape = utils.dataset_shape(dataset)
def record_dataset_shape(self, dataset): self.shape = utils.dataset_shape(dataset)