def getdataset(datasetname, onehot_encode_strings=True):
    # load
    dataset = fetch_mldata(datasetname)
    # get X and y
    X = dshape(dataset.data)
    try:
        target = dshape(dataset.target)
    except:
        print("WARNING: No target found. Taking last column of data matrix as target")
        target = X[:, -1]
        X = X[:, :-1]
    if len(target.shape) > 1 and target.shape[1] > X.shape[1]:  # some mldata sets are mixed up...
        X = target
        target = dshape(dataset.data)
    if len(X.shape) == 1 or X.shape[1] <= 1:
        for k in dataset.keys():
            if k != 'data' and k != 'target' and len(dataset[k]) == X.shape[1]:
                X = np.hstack((X, dshape(dataset[k])))
    # one-hot for categorical values
    if onehot_encode_strings:
        cat_ft = [i for i in range(X.shape[1]) if 'str' in str(
            type(unpack(X[0, i]))) or 'unicode' in str(type(unpack(X[0, i])))]
        if len(cat_ft):
            for i in cat_ft:
                X[:, i] = tonumeric(X[:, i])
            X = OneHotEncoder(categorical_features=cat_ft).fit_transform(X)
    # if sparse, make dense
    try:
        X = X.toarray()
    except:
        pass
    # convert y to monotonically increasing ints
    y = tonumeric(target).astype(int)
    return np.nan_to_num(X.astype(float)), y
Ejemplo n.º 2
0
def single_output_multiclass_one_hot():
    y = np.random.randint(low=0, high=3, size=(1000,))
    X = y.reshape(-1, 1)
    # For compatibility with Keras, accept one-hot-encoded inputs
    # with categorical_crossentropy loss
    y = OneHotEncoder(sparse=False).fit_transform(y.reshape(-1, 1))
    sklearn_est = MLPClassifier(**mlp_kwargs)
    scikeras_est = KerasClassifier(
        create_model("softmax", [3]), **scikeras_kwargs, loss="categorical_crossentropy"
    )
    for dtype in ("float32", "float64", "int64", "int32", "uint8", "uint16"):
        y_ = y.astype(dtype)
        yield TestParams(
            sklearn_est=sklearn_est,
            scikeras_est=scikeras_est,
            X=X,
            y=y_,
            X_expected_dtype_keras=X.dtype,
            y_expected_dtype_keras=tf.keras.backend.floatx(),
            min_score=0.95,
            scorer=accuracy_score,
        )
Ejemplo n.º 3
0
    
    # one-hot encoding of the dependent variable
    y_eval = OneHotEncoder().fit_transform(y_eval.reshape(-1,1)) # it's a scipy.sparse.csr.csr_mat
    y_eval = y_eval.toarray() # converting to ndarray

    # accuracy on eval dataset    
    with tf.Session() as sess:
        
        # as 46th step gives the best result
        saver = tf.train.import_meta_graph('./model_ae_fc_1_2/ae_99.8_fc-46.meta')
        saver.restore(sess,'./model_ae_fc_1_2/ae_99.8_fc-46')
        
        graph = tf.get_default_graph()
        
        y_ph = graph.get_tensor_by_name('Placeholders/Y_ph:0')
        x_ph = graph.get_tensor_by_name('Placeholders/X_ph:0')
        logits = graph.get_tensor_by_name('output_layer/Add:0')
                
        correct_pred = tf.equal(tf.math.argmax(logits,axis=1),tf.math.argmax(y_ph,axis=1))
        acc = tf.reduce_mean(tf.cast(correct_pred,'float'))

               
        feed_eval = {}
        X_eval_scaled = sc.transform(x_eval)  #scaling the eval set
        feed_eval[x_ph] = X_eval_scaled.astype(np.float32)
        feed_eval[y_ph] = y_eval.astype(np.float32)
        
                
        print("Accuracy for the 40% unseen data is : ", acc.eval(feed_eval))
        
    
Ejemplo n.º 4
0
# In[1]:

import numpy as np
from sklearn.datasets import load_iris 
iris = load_iris() 
X = iris.data.astype(np.float32) 
y_true = iris.target.astype(np.int32)


# In[2]:

from sklearn.preprocessing import OneHotEncoder

y_onehot = OneHotEncoder().fit_transform(y_true.reshape(-1, 1))
y_onehot = y_onehot.astype(np.int64).todense()


# In[4]:
# divide data into test set and training set
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, random_state=14)


# In[5]:

input_layer_size, hidden_layer_size, output_layer_size = 4, 6, 3


# In[7]:
Ejemplo n.º 5
0
import os
import numpy as np
batches = []
for i in range(1, 6):
    batch_filename = os.path.join(data_folder,
                                  'data_batch_{}'.format(i))  #导入 1-6
    batches.append(unpickle(batch_filename))

X = np.vstack([batch['data'] for batch in batches])
X = np.array(X) / X.max()
X = X.astype(np.float32)

from sklearn.preprocessing import OneHotEncoder
y = np.hstack(batch['labels'] for batch in batches).flatten()
y = OneHotEncoder().fit_transform(y.reshape(y.shape[0], 1)).todense()
y = y.astype(np.float32)

from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train = X_train.reshape(-1, 3, 32, 32)
X_test = X_test.reshape(-1, 3, 32, 32)  #reshape(-1,...) 计算完后面维度后自动生成 -1

#bulid
from lasagne import layers
layers = [
    ('input', layers.InputLayer),
    ('conv1', layers.Conv2DLayer),
    ('pool1', layers.MaxPool2DLayer),
    ('conv2', layers.Conv2DLayer),
    ('pool2', layers.MaxPool2DLayer),