def _feed_dict_fn(): # TODO: option for with/without replacement (dev version of dask) sample = self.df.random_split([self.sample_fraction, 1-self.sample_fraction], random_state=self.random_state) inp = extract_pandas_matrix(sample[0][self.X_columns].compute()).tolist() out = extract_pandas_matrix(sample[0][self.y_columns].compute()).tolist() return {input_placeholder.name: inp, output_placeholder.name: out}
def _feed_dict_fn(): # TODO: option for with/without replacement (dev version of dask) sample = self.df.random_split([self.sample_fraction, 1-self.sample_fraction], random_state=self.random_state) inp = extract_pandas_matrix(sample[0][self.X_columns].compute()).tolist() out = extract_pandas_matrix(sample[0][self.y_columns].compute()) # convert to correct dtype inp = np.array(inp, dtype=self.input_dtype) # one-hot encode out for each class for cross entropy loss if HAS_PANDAS: import pandas as pd if not isinstance(out, pd.Series): out = out.flatten() out_max = self.y.max().compute().values[0] encoded_out = np.zeros((out.size, out_max+1), dtype=self.output_dtype) encoded_out[np.arange(out.size), out] = 1 return {input_placeholder.name: inp, output_placeholder.name: encoded_out}
def setup_processor_data_feeder(X): """Sets up processor iterable. Args: X: numpy, pandas or iterable. Returns: Iterable of data to process. """ if HAS_PANDAS: X = extract_pandas_matrix(X) return X