def getdataset(datasetname, onehot_encode_strings=True): # load dataset = fetch_mldata(datasetname) # get X and y X = dshape(dataset.data) try: target = dshape(dataset.target) except: print("WARNING: No target found. Taking last column of data matrix as target") target = X[:, -1] X = X[:, :-1] if len(target.shape) > 1 and target.shape[1] > X.shape[1]: # some mldata sets are mixed up... X = target target = dshape(dataset.data) if len(X.shape) == 1 or X.shape[1] <= 1: for k in dataset.keys(): if k != 'data' and k != 'target' and len(dataset[k]) == X.shape[1]: X = np.hstack((X, dshape(dataset[k]))) # one-hot for categorical values if onehot_encode_strings: cat_ft = [i for i in range(X.shape[1]) if 'str' in str( type(unpack(X[0, i]))) or 'unicode' in str(type(unpack(X[0, i])))] if len(cat_ft): for i in cat_ft: X[:, i] = tonumeric(X[:, i]) X = OneHotEncoder(categorical_features=cat_ft).fit_transform(X) # if sparse, make dense try: X = X.toarray() except: pass # convert y to monotonically increasing ints y = tonumeric(target).astype(int) return np.nan_to_num(X.astype(float)), y
def single_output_multiclass_one_hot(): y = np.random.randint(low=0, high=3, size=(1000,)) X = y.reshape(-1, 1) # For compatibility with Keras, accept one-hot-encoded inputs # with categorical_crossentropy loss y = OneHotEncoder(sparse=False).fit_transform(y.reshape(-1, 1)) sklearn_est = MLPClassifier(**mlp_kwargs) scikeras_est = KerasClassifier( create_model("softmax", [3]), **scikeras_kwargs, loss="categorical_crossentropy" ) for dtype in ("float32", "float64", "int64", "int32", "uint8", "uint16"): y_ = y.astype(dtype) yield TestParams( sklearn_est=sklearn_est, scikeras_est=scikeras_est, X=X, y=y_, X_expected_dtype_keras=X.dtype, y_expected_dtype_keras=tf.keras.backend.floatx(), min_score=0.95, scorer=accuracy_score, )
# one-hot encoding of the dependent variable y_eval = OneHotEncoder().fit_transform(y_eval.reshape(-1,1)) # it's a scipy.sparse.csr.csr_mat y_eval = y_eval.toarray() # converting to ndarray # accuracy on eval dataset with tf.Session() as sess: # as 46th step gives the best result saver = tf.train.import_meta_graph('./model_ae_fc_1_2/ae_99.8_fc-46.meta') saver.restore(sess,'./model_ae_fc_1_2/ae_99.8_fc-46') graph = tf.get_default_graph() y_ph = graph.get_tensor_by_name('Placeholders/Y_ph:0') x_ph = graph.get_tensor_by_name('Placeholders/X_ph:0') logits = graph.get_tensor_by_name('output_layer/Add:0') correct_pred = tf.equal(tf.math.argmax(logits,axis=1),tf.math.argmax(y_ph,axis=1)) acc = tf.reduce_mean(tf.cast(correct_pred,'float')) feed_eval = {} X_eval_scaled = sc.transform(x_eval) #scaling the eval set feed_eval[x_ph] = X_eval_scaled.astype(np.float32) feed_eval[y_ph] = y_eval.astype(np.float32) print("Accuracy for the 40% unseen data is : ", acc.eval(feed_eval))
# In[1]: import numpy as np from sklearn.datasets import load_iris iris = load_iris() X = iris.data.astype(np.float32) y_true = iris.target.astype(np.int32) # In[2]: from sklearn.preprocessing import OneHotEncoder y_onehot = OneHotEncoder().fit_transform(y_true.reshape(-1, 1)) y_onehot = y_onehot.astype(np.int64).todense() # In[4]: # divide data into test set and training set from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, random_state=14) # In[5]: input_layer_size, hidden_layer_size, output_layer_size = 4, 6, 3 # In[7]:
import os import numpy as np batches = [] for i in range(1, 6): batch_filename = os.path.join(data_folder, 'data_batch_{}'.format(i)) #导入 1-6 batches.append(unpickle(batch_filename)) X = np.vstack([batch['data'] for batch in batches]) X = np.array(X) / X.max() X = X.astype(np.float32) from sklearn.preprocessing import OneHotEncoder y = np.hstack(batch['labels'] for batch in batches).flatten() y = OneHotEncoder().fit_transform(y.reshape(y.shape[0], 1)).todense() y = y.astype(np.float32) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) X_train = X_train.reshape(-1, 3, 32, 32) X_test = X_test.reshape(-1, 3, 32, 32) #reshape(-1,...) 计算完后面维度后自动生成 -1 #bulid from lasagne import layers layers = [ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer),