def __init__( self, use_mnist=False ):
   self.use_mnist = use_mnist
   if self.use_mnist:
     #self.digits = fetch_mldata('MNIST original')
     self.mnist_digits_train = fetch_mldata('MNIST original', subset='train')
     self.mnist_digits_test = fetch_mldata('MNIST original', subset='test')
   else:
     self.digits = load_digits()
   self.X = self.digits.data
   self.y = self.digits.target
   self.best_f1_score = 0
   self.best_score = 0
   """
def testScript():
	print "\n---> Started Logistic Regression - Iris dataset - Own function - k class...\n"
	attributes, outcomes = getDataFromFile("../Data/iriskc.data.shuffled")
	min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
	attributes, outcomes = min_max_scaler.fit_transform(np.array(attributes)), np.array(outcomes)
	#attributes, outcomes = np.array(attributes), np.array(outcomes)

	accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=750, threshold=0.005, ownFunction=True)
	for itr in range(10):
		print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
	print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
																												np.mean(recallValues),np.mean(fMeasValues))
															
																												
	print "---> Started Logistic Regression - Iris dataset - Inbuilt function - k class...\n"
	attributes, outcomes = getDataFromFile("../Data/iriskc.data.shuffled")
	min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
	attributes, outcomes = min_max_scaler.fit_transform(np.array(attributes)), np.array(outcomes)
	#attributes, outcomes = np.array(attributes), np.array(outcomes)

	accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=750, threshold=0.005, ownFunction=False)
	for itr in range(10):
		print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
	print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
																												np.mean(recallValues),np.mean(fMeasValues))


	print "---> Started Logistic Regression - Digits dataset - Own function - k class...\n"																									
	mnist = datasets.fetch_mldata('MNIST original')
	X, y = mnist.data / 255., mnist.target
	attributes = X[:20000]
	outcomes = y[:20000]
	#print list(set(outcomes))
	accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=100, threshold=0.005, ownFunction=False)
	for itr in range(10):
		print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
	print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
																												np.mean(recallValues),np.mean(fMeasValues))


	print "---> Started Logistic Regression - Digits dataset - Inbuilt function - k class...\n"																									
	mnist = datasets.fetch_mldata('MNIST original')
	X, y = mnist.data / 255., mnist.target
	attributes = X[:20000]
	outcomes = y[:20000]
	#print list(set(outcomes))
	accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=100, threshold=0.005, ownFunction=False)
	for itr in range(10):
		print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
	print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
																												np.mean(recallValues),np.mean(fMeasValues))
Ejemplo n.º 3
0
def get_data():
    """
    Get MNIST data ready to learn with.

    Returns
    -------
    dict
        With keys 'train' and 'test'. Both do have the keys 'X' (features)
        and'y' (labels)
    """
    from sklearn.datasets import fetch_mldata
    mnist = fetch_mldata('MNIST original')

    x = mnist.data
    y = mnist.target

    # Scale data to [-1, 1] - This is of major importance!!!
    x = x/255.0*2 - 1

    from sklearn.cross_validation import train_test_split
    x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                        test_size=0.33,
                                                        random_state=42)
    data = {'train': {'X': x_train,
                      'y': y_train},
            'test': {'X': x_test,
                     'y': y_test}}
    return data
Ejemplo n.º 4
0
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_mldata
    from sklearn.model_selection import cross_val_score

    db_name = 'iris'
    hid_num = 1000
    data_set = fetch_mldata(db_name)
    data_set.data = preprocessing.scale(data_set.data)

    print(db_name)
    print('ECOBELM', hid_num)
    e = ECOBELM(hid_num, c=2**5)
    ave = 0
    for i in range(10):
        scores = cross_val_score(
            e, data_set.data, data_set.target, cv=5, scoring='accuracy')
        ave += scores.mean()
    ave /= 10
    print("Accuracy: %0.2f " % (ave))

    print('ELM', hid_num)
    e = ELM(hid_num)
    ave = 0
    for i in range(10):
        scores = cross_val_score(
            e, data_set.data, data_set.target, cv=5, scoring='accuracy')
        ave += scores.mean()
    ave /= 10
    print("Accuracy: %0.2f " % (ave))
Ejemplo n.º 5
0
def get_data(downsample=10,plshow=False):
	global data, target, mnist

	print("get_data ...")

	# Get the MNist Database from Internet or local disk
	# This is very powerful for me (the author renyuan)
	custom_data_home = '.' # the current directory
	mnist = datasets.fetch_mldata('MNIST original', data_home= custom_data_home)

	# Downsample mnist as the training set
	# I know that there are 70000 pictures in MNist database
	# I wish sample a small fraction of the pictures
	data =   mnist.data[0:60000:downsample]
	target = mnist.target[0:60000:downsample]

	if plshow:
		n_sample = len(data)
		data_image = data.reshape(n_sample,28,28)
		image_and_target = list(zip(data_image, target))

		pl.figure()
		for i, (im, tg) in enumerate(image_and_target):
			if i>=100: break
			pl.subplot(10, 10, i+ 1)
			pl.axis('off')
			pl.imshow(im, cmap=pl.cm.gray_r)
			pl.title('tg=%d'%tg, color='blue')

		pl.show()

	return data, target
Ejemplo n.º 6
0
def main():
    files = [
        join(SCRIPT_DIR, "train_x.npy"),
        join(SCRIPT_DIR, "train_y.npy"),
        join(SCRIPT_DIR, "validate_x.npy"),
        join(SCRIPT_DIR, "validate_y.npy"),
        join(SCRIPT_DIR, "test_x.npy"),
        join(SCRIPT_DIR, "test_y.npy")
    ]
    if all([exists(fname) and stat(fname).st_size > 100 for fname in files]):
        print("Already downloaded. Skipping")
    else:
        mnist = fetch_mldata('MNIST original')
        np.random.seed(1234)

        data = mnist.data
        target = mnist.target
        indices = np.arange(len(data))
        np.random.shuffle(indices)

        data = data[indices]
        target = target[indices]

        train_x, train_y = (data[:-10000].astype(np.float32) / 255.0).astype(np.float32), target[:-10000].astype(np.int32)
        test_x, test_y = (data[-10000:].astype(np.float32) / 255.0).astype(np.float32), target[-10000:].astype(np.int32)

        np.save(join(SCRIPT_DIR, "train_x.npy"), train_x[:int(0.9 * train_x.shape[0])])
        np.save(join(SCRIPT_DIR, "train_y.npy"), train_y[:int(0.9 * train_y.shape[0])])
        np.save(join(SCRIPT_DIR, "validate_x.npy"), train_x[int(0.9 * train_x.shape[0]):])
        np.save(join(SCRIPT_DIR, "validate_y.npy"), train_y[int(0.9 * train_y.shape[0]):])

        np.save(join(SCRIPT_DIR, "test_x.npy"), test_x)
        np.save(join(SCRIPT_DIR, "test_y.npy"), test_y)
        print("Done.")
Ejemplo n.º 7
0
def load(config, test=False):
    """Load MNIST dataset using scikit-learn.  Returns a dict with the
    following entries:

      - images:  n x 28 x 28 array
      - data:    n x 784  array
      - target:  n  array
    """
    dataset = fetch_mldata('mnist-original')
    X, y = dataset.data, dataset.target
    X = X.astype(np.float32) / 255.0

    if test:
        idx_start, idx_end = config['test_set']
    else:
        idx_start, idx_end = config['train_set']

    X, y = shuffle(X, y, random_state=42)
    X = X[idx_start:idx_end]
    y = y[idx_start:idx_end]

    return {
        'images': X.reshape(-1, 28, 28),
        'data': X,
        'target': y,
        }
def run(data_path):
    print "Reading the dataset:", data_path
    mnist = fetch_mldata('MNIST original')
    mnist.data, mnist.target = shuffle(mnist.data, mnist.target)

    # Trunk the data
    n_train = 600
    n_test = 400

    # Define training and testing sets
    indices = arange(len(mnist.data))
    random.seed(0)
    train_idx = random.sample(indices, n_train)
    test_idx = random.sample(indices, n_test)
    X_train, y_train = mnist.data[train_idx], mnist.target[train_idx]
    X_test, y_test = mnist.data[test_idx], mnist.target[test_idx]

    # Apply a learning algorithm
    print "Applying a learning algorithm..."
    clf = RandomForestClassifier(n_estimators=10, n_jobs=1)
    clf.fit(X_train, y_train)

    # Make a prediction
    print "Making predictions..."
    y_pred = clf.predict(X_test)

    print y_pred

    # Evaluate the prediction
    print "Evaluating results..."
    print "Precision: \t", metrics.precision_score(y_test, y_pred)
    print "Recall: \t", metrics.recall_score(y_test, y_pred)
    print "F1 score: \t", metrics.f1_score(y_test, y_pred)
    print "Mean accuracy: \t", clf.score(X_test, y_test)
Ejemplo n.º 9
0
def get_mnist(start=None, end=None, random=False, num=None):
    mnist = fetch_mldata('MNIST original', data_home='~/diss/mnist')
    if random is not None and num is not None:
        idx = np.random.choice(range(mnist.data.shape[0]), num)
    elif start is not None and end is not None:
        idx = range(start, end)
    return mnist.data[idx], mnist.target[idx]
def getdataset(datasetname, onehot_encode_strings=True):
    # load
    dataset = fetch_mldata(datasetname)
    # get X and y
    X = dshape(dataset.data)
    try:
        target = dshape(dataset.target)
    except:
        print("WARNING: No target found. Taking last column of data matrix as target")
        target = X[:, -1]
        X = X[:, :-1]
    if len(target.shape) > 1 and target.shape[1] > X.shape[1]:  # some mldata sets are mixed up...
        X = target
        target = dshape(dataset.data)
    if len(X.shape) == 1 or X.shape[1] <= 1:
        for k in dataset.keys():
            if k != 'data' and k != 'target' and len(dataset[k]) == X.shape[1]:
                X = np.hstack((X, dshape(dataset[k])))
    # one-hot for categorical values
    if onehot_encode_strings:
        cat_ft = [i for i in range(X.shape[1]) if 'str' in str(
            type(unpack(X[0, i]))) or 'unicode' in str(type(unpack(X[0, i])))]
        if len(cat_ft):
            for i in cat_ft:
                X[:, i] = tonumeric(X[:, i])
            X = OneHotEncoder(categorical_features=cat_ft).fit_transform(X)
    # if sparse, make dense
    try:
        X = X.toarray()
    except:
        pass
    # convert y to monotonically increasing ints
    y = tonumeric(target).astype(int)
    return np.nan_to_num(X.astype(float)), y
Ejemplo n.º 11
0
def main(description, gpu, output):
	logging.basicConfig(level=logging.INFO)

	logging.info('fetch MNIST dataset')
	mnist = fetch_mldata(description)
	mnist.data = mnist.data.astype(numpy.float32)
	mnist.data /= 255
	mnist.target = mnist.target.astype(numpy.int32)

	data_train, data_test, target_train, target_test = train_test_split(mnist.data, mnist.target)

	data = data_train, data_test
	target = target_train, target_test

	start_time = time.time()

	if gpu >= 0:
		cuda.check_cuda_available()
		cuda.get_device(gpu).use()
		logging.info("Using gpu device {}".format(gpu))
	else:
		logging.info("Not using gpu device")

	mlp = MLP(data=data, target=target, gpu=gpu)
	mlp.train_and_test(n_epoch=1)

	end_time = time.time()

	logging.info("time = {} min".format((end_time - start_time) / 60.0))
	logging.info('saving trained mlp into {}'.format(output))
	with open(output, 'wb') as fp:
		pickle.dump(mlp, fp)
Ejemplo n.º 12
0
def get_mnist():
    np.random.seed(1234) # set seed for deterministic ordering
    mnist = fetch_mldata('MNIST original', data_home='../../data')
    p = np.random.permutation(mnist.data.shape[0])
    X = mnist.data[p].astype(np.float32)*0.02
    Y = mnist.target[p]
    return X, Y
Ejemplo n.º 13
0
def test_classifier_chain_vs_independent_models():
    # Verify that an ensemble of classifier chains (each of length
    # N) can achieve a higher Jaccard similarity score than N independent
    # models
    yeast = fetch_mldata('yeast')
    X = yeast['data']
    Y = yeast['target'].transpose().toarray()
    X_train = X[:2000, :]
    X_test = X[2000:, :]
    Y_train = Y[:2000, :]
    Y_test = Y[2000:, :]

    ovr = OneVsRestClassifier(LogisticRegression())
    ovr.fit(X_train, Y_train)
    Y_pred_ovr = ovr.predict(X_test)

    chain = ClassifierChain(LogisticRegression(),
                            order=np.array([0, 2, 4, 6, 8, 10,
                                            12, 1, 3, 5, 7, 9,
                                            11, 13]))
    chain.fit(X_train, Y_train)
    Y_pred_chain = chain.predict(X_test)

    assert_greater(jaccard_similarity_score(Y_test, Y_pred_chain),
                   jaccard_similarity_score(Y_test, Y_pred_ovr))
Ejemplo n.º 14
0
Archivo: utils.py Proyecto: pajkossy/nn
def get_datasets():

    mnist = fetch_mldata('MNIST original')
    data = mnist['data']
    target = mnist['target']

    data = (data - data.mean(axis=0))
    std = data.std(axis=0)
    data[:, std > 0] /= std[std > 0]

    train_split = 60000
    output_size = 10

    train_ordered = data[:train_split]
    train_labels_ordered = target[:train_split]
    training_data = zip(train_ordered, train_labels_ordered)
    random.shuffle(training_data)
    train = np.array([p[0] for p in training_data])
    train_labels = np.array([p[1] for p in training_data])

    train_outs = np.array([one_hot(i, output_size)
                           for i in train_labels])
    test = data[train_split:]
    test_labels = target[train_split:]
    test_outs = np.array([one_hot(i, output_size)
                          for i in test_labels])
    return train, train_outs, test, test_outs
Ejemplo n.º 15
0
def load_script(script_vars):
  def define(var_name, fun, overwrite=False):
    if script_vars.has_key(var_name) and not overwrite:
      print('%s is already defined' % var_name)
      return script_vars[var_name]
    else:
      print('computing variables %s' % var_name)
      value = fun()
      script_vars[var_name] = value
      globals()[var_name] = value      
      return value
  
    print(globals().keys())
    custom_data_home="/home/stefan2/mnistdata"

  custom_data_home="/home/stefan2/mnistdata"

  define('mnist', lambda: fetch_mldata('MNIST original', data_home=custom_data_home))
  data = mnist.data.astype(float) #[0:100,:] #convert to float
  labels = mnist.target #[0:100]
  n,m = data.shape
  print("num data points %s" % n)
  #run the method after successive othogonalization
  for j in range(0, 50):
    print("iteration: " + str(j))
    res = find_dominant_directions(data)
    plot_vector_png("pattern_" + str(j), res)
    for i in range(0, n):
      v = data[i,:]
      proj = np.reshape(v, (1, m)).dot(np.reshape(res, (m,1)))[0,0]
      data[i,:] = v - proj*res
Ejemplo n.º 16
0
def load_dataset(randomize = False, overfit = False):
    mnist = fetch_mldata('mnist-original', data_home=DATA_DIR)
    data = mnist.data
    target = mnist.target
    data = data.reshape((-1, 28, 28))
    target = target.astype(np.uint8)

    operator_train = pickle.load(open(DATA_DIR + 'four_operators.pickle', 'rb'))
    operator_data = np.array([x[0].reshape((28, 28)) for x in operator_train])
    operator_target = np.array([y[1] for y in operator_train])

    # # # Overfitting meme
    if overfit == True:
        temp_data = operator_data
        temp_target = operator_target
        while operator_data.shape[0] < 20000:
            operator_data = np.concatenate((operator_data, temp_data))
            operator_target = np.concatenate((operator_target, temp_target))
    # # # Overfitting meme

    data = np.concatenate((data, operator_data))
    target = np.concatenate((target, operator_target))

    if randomize:
        print 'shuffling data'
        data, target = shuffle(data, target, random_state=0)

    target = target.astype(np.uint8)

    return data, target
Ejemplo n.º 17
0
def get_mnist_data(data_home=None):
    """
    load data on your directry ~/scikit_learn_data/mldata/
    if data doesn't exist, it downloads the data from site.
    """
    mnist = fetch_mldata('MNIST original')
    return mnist
Ejemplo n.º 18
0
def run():
    logging.info("Starting test")
    # cone_pipeline = Pipeline([('feature selection', SelectKBest(k=100)),
    #                           ('classification', ConeEstimator())])
    # cone_pipeline = Pipeline([('random PCA', RandomizedPCA(n_components=50)),
    #                           ('classification', ConeEstimator(3))])
    # classifiers = [DecisionTreeClassifier(),
    #                MultinomialNB(),
    #                LinearSVC(),
    #                ConeEstimator(10)]
    classifiers = [ConeEstimator(10)]
        #cone_pipeline]

    dataset = fetch_mldata('mnist-original')
    #dataset = fetch_mldata('sonar')
    print "Dataset size: ", len(dataset.data)
    print "Features: ", len(dataset.data[0])

    binary_map = np.vectorize(lambda x : 1 if x == 1 else 0)
    dataset.target = binary_map(dataset.target)

    for classifier in classifiers:
        method = ShuffleSplit(len(dataset.data), n_iterations = 1, train_size=400, test_size=400)
        result = cross_val_score(
            classifier, dataset.data,
            dataset.target,
            cv = method,
            score_func = f1_score)
        print classifier, result
    logging.info("Test complete")
Ejemplo n.º 19
0
def load_pure_mnist():
    mnist = fetch_mldata('mnist-original', data_home=DATA_DIR)
    data = mnist.data
    target = mnist.target
    data = data.reshape((-1, 28, 28))
    target = target.astype(np.uint8)
    return data, target
Ejemplo n.º 20
0
def MNIST():
    add_fit_and_score(RegularizedNet)
    from sklearn.datasets import fetch_mldata
    mnist = fetch_mldata('MNIST original')
    X = numpy.asarray(mnist.data, dtype='float32')
    #X = numpy.asarray(mnist.data, dtype='float64')
    if SCALE:
        #X = preprocessing.scale(X)
        X /= 255.
    y = numpy.asarray(mnist.target, dtype='int32')
    #y = numpy.asarray(mnist.target, dtype='int64')
    print("Total dataset size:")
    print("n samples: %d" % X.shape[0])
    print("n features: %d" % X.shape[1])
    print("n classes: %d" % len(set(y)))
    from sklearn import cross_validation, preprocessing
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2, random_state=42)
    dnn=RegularizedNet(numpy_rng=numpy.random.RandomState(123), theano_rng=None, 
        n_ins=x_train.shape[1],
        layers_types=[ReLU, ReLU, LogisticRegression],
        layers_sizes=[200, 200],
        n_outs=10,
        rho=0.95, 
        eps=1.E-6,
        max_norm=0.,
        debugprint=False,
        L1_reg=0.,
        L2_reg=1./x_train.shape[0])#,
    dnn.fit(x_train, y_train, max_epochs=60, method='adadelta', verbose=True, plot=False)
    test_error = dnn.score(x_test, y_test)
    print("score: %f" % (1. - test_error))
Ejemplo n.º 21
0
def load_banana():
    data = da.fetch_mldata("Banana IDA")
    x = data.data
    y = data.target
    x_test = x
    y_test = y
    return x, x_test, y, y_test
Ejemplo n.º 22
0
def main():
    """TODO: Docstring for main.
    :returns: TODO

    """
    alpha = 1.
    decay = 0.0006
    iter_num = 600
    finetune_iter = 220
    hyper_params = {
            'hidden_layers_sizes':[196,], 'iter_nums':[400,],
            'alphas':[1.,], 'decays':[0.003,],
            'betas':[3,], 'rhos':[0.1,]
            }

    enc = OneHotEncoder(sparse=False)
    mnist = fetch_mldata('MNIST original', data_home='./')
    x_train, x_test, y_train, y_test = \
            train_test_split(scale(mnist.data.astype(float)).astype('float32'),
                             mnist.target.astype('float32'),
                             test_size=0.5, random_state=0)
    x_unlabeled = scale(mnist.data[mnist.target>=5,:].astype(float)).astype('float32')
    y_train = enc.fit_transform(y_train.reshape(y_train.shape[0],1)).astype('float32')

    t_x = T.matrix()
    params, extracted = pretrain_sae(x_unlabeled, hyper_params)
    extracted = function(inputs=[t_x], outputs=[sae_extract(t_x, params)])(x_train)[0]
    params.append(train_softmax(extracted, y_train, iter_num, alpha, decay))
    weights = finetune_sae(x_train, y_train, params, finetune_iter, alpha, decay)

    all_label = np.array(range(0, 10))
    pred = all_label[softmax2class_max(sae_predict(x_test, weights))]
    print accuracy_score(y_test, pred)
    print classification_report(y_test, pred)
    print confusion_matrix(y_test, pred)
Ejemplo n.º 23
0
def run(data_path):
    print "Reading the dataset:", data_path

    ## http://continuum.io/blog/wiserf-use-cases-and-benchmarks

    mnist = fetch_mldata('MNIST original')

    # Define training and testing sets
    inds = arange(len(mnist.data))
    test_i = random.sample(xrange(len(inds)), int(0.1 * len(inds)))
    train_i = numpy.delete(inds, test_i)

    X_train = mnist.data[train_i].astype(numpy.double)
    y_train = mnist.target[train_i].astype(numpy.double)

    X_test = mnist.data[test_i].astype(numpy.double)
    y_test = mnist.target[test_i].astype(numpy.double)

    # Trunk the data
    X_digits, y_digits = shuffle(X_train, y_train)

    X_digits_train = X_digits[:1000]
    y_digits_train = y_digits[:1000]
    X_digits_valid = X_digits[1000:2000]
    y_digits_valid = y_digits[1000:2000]
    X_digits_test = X_digits[2000:3000]
    y_digits_test = y_digits[2000:3000]

    knn_digits = KNeighborsClassifier(n_neighbors=10)
    knn_digits.fit(X_digits_train, y_digits_train)
    print "KNN validation accuracy on MNIST digits: ",
    print knn_digits.score(X_digits_valid, y_digits_valid)
Ejemplo n.º 24
0
def load(train_n, test_n):
    mnist = fetch_mldata('MNIST original', data_home='.')
    mnist.data = mnist.data.astype(np.float32) / 256.0
    mnist.target = mnist.target.astype(np.int32)
    N = len(mnist.data)

    order = np.random.permutation(N)

    train = {i: [] for i in range(10)}
    test = {i: [] for i in range(10)}

    train_m = math.ceil(train_n / 10)
    train_sum = 0

    test_m = math.ceil(test_n / 10)
    test_sum = 0

    for i in range(N):
        x = mnist.data[order[i]]
        y = mnist.target[order[i]]

        if train_sum < train_n and len(train[y]) < train_m:
            train[y].append(x)
            train_sum += 1

        if test_sum < test_n and len(test[y]) < test_m:
            test[y].append(x)
            test_sum += 1

    return train, test
Ejemplo n.º 25
0
def main():
    print '... get mnist data'
    mnist = fetch_mldata('MNIST original', data_home='.')

    fig, axes = plt.subplots(5, 3, figsize=(6, 8))

    data = mnist.data[[0, 7000, 14000, 21000, 28000]]

    print '... start training'
    for i, (axrow, img) in enumerate(zip(axes, data)):
        img = img.reshape(28, 28)
        img = (img >= 128).astype(int)

        corrupted = get_corrupted_input(img, 0.05)
        mrf = MRF(corrupted)

        if i == 0:
            axes[i][0].set_title('元画像')
            axes[i][1].set_title('ノイズあり')
            axes[i][2].set_title('ノイズ除去')
        axes[i][0].imshow(img, cmap=cm.Greys_r)
        axes[i][1].imshow(corrupted, cmap=cm.Greys_r)
        axes[i][2].imshow(mrf.denoised, cmap=cm.Greys_r)
        for ax in axrow:
            ax.xaxis.set_visible(False)
            ax.yaxis.set_visible(False)
    plt.show()
Ejemplo n.º 26
0
Archivo: dbn.py Proyecto: kirInFPGA/DBN
def make_data(N):
    print("fetch MNIST dataset")

    mnist = fetch_mldata('MNIST original',data_home='.')

    mnist.data = mnist.data.astype(np.float32)
    mnist.data /= 255

    mnist.taret = mnist.target.astype(np.int32)

    # make y label
    mnist_target = np.zeros((mnist.target.shape[0],10))

    for index, num in enumerate(mnist.target):
        mnist_target[index][num] = 1.

    # print(mnist_target)
    # mazemaze
    index = random.sample(range(mnist.target.shape[0]), (mnist.target.shape[0]))
    tmp_target = [mnist_target[i] for i in index]
    tmp_data = [mnist.data[i] for i in index]
    # print("N : ", len(tmp_target))
    # print("tmp_target : ", tmp_target)

    x_train, x_test = np.split(tmp_data, [N])
    y_train, y_test = np.split(tmp_target, [N])

    return [x_train, x_test, y_train, y_test]
Ejemplo n.º 27
0
 def download__by_category():
     # mnist = fetch_mldata('MNIST original')
     mnist = fetch_mldata('MNIST original')
     # mnist.data = random.sample(mnist.data, 1000)
     # mnist.target = random.sample(mnist.target, 1000)
     # mnist.data (70000, 784), mnist.target (70000, 1)
     trainX, trainY = mnist.data[:-10000], mnist.target[:-10000]
     testX, testY = mnist.data[-10000:], mnist.target[-10000:]
     if not exists('train'):
         os.makedirs('train')
     x = {i:[] for i in range(10)}
     for i in range(len(trainY)):
         tmp = x[trainY[i]]
         tmp.append(trainX[i])
         x[trainY[i]] = tmp
     for i in range(10):
         cPickle.dump(x[i], open(join('train', '{}.pkl'.format(i)), 'w+'))
     if not exists('test'):
         os.makedirs('test')
     x = {i:[] for i in range(10)}
     for i in range(len(testY)):
         tmp = x[testY[i]]
         tmp.append(testX[i])
         x[testY[i]] = tmp
     for i in range(10):
         cPickle.dump(x[i], open(join('test', '{}.pkl'.format(i)), 'w+'))
Ejemplo n.º 28
0
def prepare_dataset():
    print('load MNIST dataset')
    mnist = fetch_mldata('MNIST original')
    mnist['data'] = mnist['data'].astype(np.float32)
    mnist['data'] /= 255
    mnist['target'] = mnist['target'].astype(np.int32)
    return mnist
Ejemplo n.º 29
0
def iris_binary():
    iris = fetch_mldata('iris')
    X = iris.data
    y = iris.target
    idx = y < 3  # only binary
    y[y == 2] = -1
    return X[idx, :], y[idx]
Ejemplo n.º 30
0
def test_configs():    
    from sklearn import datasets
    from datetime import datetime
    import sys
    import os    
    import logging
    log = logging.getLogger()
    handler = logging.StreamHandler(sys.stdout)
    fmt = logging.Formatter('%(asctime)s %(levelname)s: %(message)s','%Y-%m-%d %H:%M:%S')
    handler.setFormatter(fmt)
    log.addHandler(handler)
    log.setLevel(logging.DEBUG)

    custom_data_home = os.getcwd() + '/sk_data'
    digits = datasets.fetch_mldata('MNIST original', data_home=custom_data_home)
    X = np.asarray(digits.data, 'float32')
    X = X
    # images = [imresize(im.reshape(28, 28), (32, 32)) for im in X]
    # X = np.vstack([im.flatten() for im in images])
    X[X < 128] = 0
    X[X >= 128] = 1
    X /= 256.
    models = []
    for w_sigma in [.1, .5, 1, 2, 5]:
        for sparsity in [.001, .01, .05, .1, .5]:
            log.info('Building RBM_dl:\n  w_sigma=%s\n  sparsity=%s' %(w_sigma,sparsity,))
            model = ConvRBM((28, 28), 40, w_size=11, n_iter=3, verbose=True, w_sigma=w_sigma, sparsity=sparsity)
            model.fit(X)
            models.append({
                'model' : model,
                'w_sigma' : w_sigma,
                'sparsity' : sparsity,
            })
    log.info('Done')
    return models
Ejemplo n.º 31
0
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Fetch Data
mnist = fetch_mldata('MNIST original', data_home='data/mnist')

# Show parts of Image
# counter = 1
# for i in range(0,10):
#     for j in range(1,11):
#         plt.subplot(10,10,counter)
#         plt.imshow(mnist.data[i*7000+j].reshape(28,28),cmap=plt.cm.gray)
#         plt.axis('off')
#         counter += 1
# plt.show()

# Data
X, y = mnist.data, mnist.target
X = X / 255.0 * 2 - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=11)
print(y_train)
# SVC
clf = SVC(kernel='rbf', C=3, gamma=0.01)
clf.fit(X_train[:10000], y_train[:10000])

# Prediction
predictions = clf.predict(X_test)
Ejemplo n.º 32
0
random.seed(1)
np.random.seed(1)
NUM_USERS = 50

# Setup directory for train/test data
train_path = './data/train/all_data_0_niid_0_keep_10_train_9.json'
test_path = './data/test/all_data_0_niid_0_keep_10_test_9.json'
dir_path = os.path.dirname(train_path)
if not os.path.exists(dir_path):
    os.makedirs(dir_path)
dir_path = os.path.dirname(test_path)
if not os.path.exists(dir_path):
    os.makedirs(dir_path)

# Get MNIST data, normalize, and divide by level
mnist = fetch_mldata('MNIST original', data_home='./data')
mu = np.mean(mnist.data.astype(np.float32), 0)
sigma = np.std(mnist.data.astype(np.float32), 0)
mnist.data = (mnist.data.astype(np.float32) - mu) / (sigma + 0.001)
mnist_data = []
for i in trange(10):
    idx = mnist.target == i
    mnist_data.append(mnist.data[idx])

print("\nNumb samples of each label:\n", [len(v) for v in mnist_data])

###### CREATE USER DATA SPLIT #######
# Assign 100 samples to each user
X = [[] for _ in range(NUM_USERS)]
y = [[] for _ in range(NUM_USERS)]
idx = np.zeros(10, dtype=np.int64)
Ejemplo n.º 33
0
    n_labels = y_.shape[1]
    mi = np.zeros((n_labels, n_labels))
    for i in xrange(n_labels):
        for j in xrange(n_labels):
            mi[i, j] = mutual_info_score(y_[:, i], y_[:, j])
    mst = minimum_spanning_tree(sparse.csr_matrix(-mi))
    edges = np.vstack(mst.nonzero()).T
    edges.sort(axis=1)
    return edges


dataset = "scene"
#dataset = "yeast"

if dataset == "yeast":
    yeast = fetch_mldata("yeast")

    X = yeast.data
    X = np.hstack([X, np.ones((X.shape[0], 1))])
    y = yeast.target.toarray().astype(np.int).T

    X_train, X_test = X[:1500], X[1500:]
    y_train, y_test = y[:1500], y[1500:]

else:
    scene = load_scene()
    X_train, X_test = scene['X_train'], scene['X_test']
    y_train, y_test = scene['y_train'], scene['y_test']

n_labels = y_train.shape[1]
full = np.vstack([x for x in itertools.combinations(range(n_labels), 2)])
                                       beta2=0.999)
    train_step = optimizer.minimize(loss)
    return train_step


def accuracy(y, t):
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(t, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return accuracy


if __name__ == '__main__':
    '''
    データの生成
    '''
    mnist = datasets.fetch_mldata('MNIST original', data_home='.')

    n = len(mnist.data)
    N = 30000  # MNISTの一部を使う
    N_train = 20000
    N_validation = 4000
    indices = np.random.permutation(range(n))[:N]  # ランダムにN枚を選択

    X = mnist.data[indices]
    X = X / 255.0
    X = X - X.mean(axis=1).reshape(len(X), 1)
    y = mnist.target[indices]
    Y = np.eye(10)[y.astype(int)]  # 1-of-K 表現に変換

    X_train, X_test, Y_train, Y_test = \
        train_test_split(X, Y, train_size=N_train)
Ejemplo n.º 35
0
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import pydot, io
import time

#######################End imports###################################

####################Do not change anything below
# Load MNIST data. fetch_mldata will download the dataset and put it in a folder called mldata.
# Some things to be aware of:
#   The folder mldata will be created in the folder in which you started the notebook
#   So to make your life easy, always start IPython notebook from same folder.
#   Else the following code will keep downloading MNIST data
try:
    mnist = fetch_mldata("MNIST original")

except Exception as ex:
    import tensorflow.examples.tutorials.mnist.input_data as input_data

    m = input_data.read_data_sets("MNIST")
    data = np.concatenate((m.train.images, m.test.images))
    target = np.concatenate((m.train.labels, m.test.labels))

    class dataFrame:
        def __init__(self, data, target):
            self.data = data
            self.target = target

    mnist = dataFrame(data, target)
# mnist = fetch_mldata("MNIST original")
Ejemplo n.º 36
0
################################
# Set hyperparameters
################################

no_of_hidden_units = 200
learning_rate = 1
batch_size = 100

################################
# Prepare train and test sets
################################

# Fetching the dataset and performing minor normalization
# to help with training
print('Fetching MNIST dataset. Please wait...\n')
dataset = fetch_mldata('MNIST original', data_home='datasets')
dataset.data = dataset.data / 255

# Shuffling the ids to prepare for creation of train and test sets
ids = np.arange(len(dataset.data))
np.random.shuffle(ids)

# The full dataset consists of 70000 labelled examples.
# We will use 60000 examples for training and 10000 for our test set.
n_rows_train = 60000
n_rows_test = len(dataset.target) - n_rows_train
data_train = np.c_[np.ones((n_rows_train, 1)),
                   dataset.data[ids[:n_rows_train], :]]
targets_train = np.zeros((n_rows_train, 10))
targets_train[np.arange(n_rows_train),
              dataset.target[ids[:n_rows_train]].astype(int)] = 1
#9.	Load the MNIST dataset (introduced in Chapter 3) and split it into a training set and a test set (take the first 60,000
#instances for training, and the remaining 10,000 for testing). Train a Random Forest classifier on the dataset and time how long it
#takes, then evaluate the resulting model on the test set. Next, use PCA to reduce the dataset’s dimensionality, with an explained
#variance ratio of 95%. Train a new Random Forest classifier on the reduced dataset and see how long it takes. Was training much
#faster? Next evaluate the classifier on the test set: how does it compare to the previous classifier?
import time
start = time.time()

from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')  #70,000 numbers between 0-9

X, y = mnist['data'], mnist[
    'target']  #Rows are intances, Columns are features (784 features=28x28pixeles). Each pixel (0:white -> 255:black)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    train_size=60000,
                                                    random_state=42)

from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(random_state=42)  #Three Model Classifiers
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
reduced_clf = Pipeline([('pca', PCA(n_components=0.95)),
                        ('rnd_reduced_clf',
                         RandomForestClassifier(random_state=42))])
from sklearn.metrics import accuracy_score

for clf in (rnd_clf, reduced_clf):  #Final performance on the test set
    start = time.time()
Ejemplo n.º 38
0
from sklearn import datasets
from neupy import algorithms, layers, environment

environment.reproducible()
theano.config.floatX = 'float32'


def reduce_dimension(network, data):
    """ Function minimize input data dimention using
    pre-trained autoencoder.
    """
    minimized_data = network.input_layer.output(data)
    return minimized_data.eval()


mnist = datasets.fetch_mldata('MNIST original')

data = mnist.data / 255.
features_mean = data.mean(axis=0)
data = (data - features_mean).astype(np.float32)

np.random.shuffle(data)
x_train, x_test = data[:60000], data[60000:]

autoencoder = algorithms.Momentum(
    [
        layers.Dropout(proba=0.5),
        layers.Sigmoid(784),
        layers.Sigmoid(100),
        layers.Output(784),
    ],
from sklearn.datasets import fetch_mldata
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (RandomForestClassifier, ExtraTreesClassifier,
                              AdaBoostClassifier)
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.utils import shuffle

import matplotlib.pyplot as plt
import numpy as np

mnist = fetch_mldata(
    'MNIST original',
    data_home='/Users/henryliu/mspc/ml_dev/ml_quantitative/data')

markers = ['o', '*', 's', 'd', 'D', '8', '.']
start0 = time.time()
data, target = shuffle(mnist.data / 255, mnist.target, random_state=0)
n = data.shape[0]
#n = 10000
X, y = data[0:n], target[0:n]

classifiers = [("ada_boost_10",
                AdaBoostClassifier(DecisionTreeClassifier(criterion='entropy',
                                                          splitter='best'),
                                   n_estimators=10)),
               ("ada_boost_50",
                AdaBoostClassifier(DecisionTreeClassifier(criterion='entropy',
Ejemplo n.º 40
0
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 21 17:56:23 2018

@author: TIM
"""

import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.neural_network import MLPClassifier
mnist = fetch_mldata("MNIST")
# rescale the data, use the traditional train/test split
X, y = mnist.data / 255., mnist.target
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

mlp = MLPClassifier(hidden_layer_sizes=(100, 100),
                    max_iter=400,
                    alpha=1e-4,
                    solver='sgd',
                    verbose=10,
                    tol=1e-4,
                    random_state=1)
#mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
#solver='sgd', verbose=10, tol=1e-4, random_state=1,
#learning_rate_init=.1)
mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))
fig, axes = plt.subplots(4, 4)
# use global min / max to ensure all weights are shown on the same scale
def download():
    mnist = fetch_mldata('MNIST original')
    X = mnist.data.astype('float64')
    y = mnist.target
    print ('MNIST:', X.shape, y.shape)
    return (X, y)
Ejemplo n.º 42
0
import numpy.random as rand
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.cross_validation import train_test_split

__doc__ = "See newcomparison.m"

l1_ratio = 0.5
k_fold = 10
test_frac = 0.5
data_root = path.expanduser('~/data')

# Load MNIST data
mnist = fetch_mldata('MNIST original', data_home=data_root)
X = mnist.data
y = mnist.target

# Split into train/test_frac
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=test_frac,
                                                    random_state=0)

# Construct and fit model
en = ElasticNetCV(cv=k_fold, n_jobs=-1, random_state=0)
en.fit(X_train, y_train)

# Evaluate performance
y_pred = np.round(en.predict(X_test))
Ejemplo n.º 43
0
def set_datasets(data="mnist", is_one_hot=True, is_normalize=True, **kwarg):
    data_home = "/".join(__file__.split("/")[:-1]) + "/data_dir_for_optimizer"
    if data == "mnist":
        data_dic = fetch_mldata('MNIST original', data_home=data_home)
        if is_one_hot == True:
            idx = data_dic["target"]
            num = int(idx.max() + 1)
            arr = np.zeros((idx.shape[0], num)).flatten()
            arr[idx.flatten().astype(int) + np.arange(idx.shape[0]) * num] = 1
            data_dic["target"] = arr.reshape(idx.size, num)
        if is_normalize == True:
            data_dic["data"] = data_dic["data"] / 255
    elif data == "boston":
        data_dic = load_boston()
        if is_normalize == True:
            data_dic["data"] = data_dic["data"] / data_dic["data"].max(axis=0)
    elif data == "digits":
        data_dic = load_digits()
    elif data == "iris":
        data_dic = load_iris()
        if is_one_hot == True:
            data_dic["target"] = gen_one_hot(data_dic["target"])
        if is_normalize == True:
            data_dic["data"] = data_dic["data"] / data_dic["data"].max(axis=0)
    elif data == "linnerud":
        data_dic = load_linnerud()
    elif data == "wine":
        arr = np.loadtxt(data_home + "/wine.csv", delimiter=",", skiprows=1)
        data_dic = {"data": arr[:, :-1], "target": arr[:, -1]}
        if is_one_hot == True:
            data_dic["target"] = gen_one_hot(data_dic["target"])
        if is_normalize == True:
            data_dic["data"] = data_dic["data"] / data_dic["data"].max(axis=0)
    elif data == "xor":
        data_dic = {
            "data": np.array([[0, 0], [0, 1], [1, 0],
                              [1, 1]]),  ##.repeat(20, axis=0),
            "target": np.array([0, 1, 1, 0])
        }  #.repeat(20, axis=0)}
    elif data == "serial":
        data_dic = {
            "data": np.array(np.arange(20).reshape(5, 4)).repeat(20, axis=0),
            "target": np.arange(5).repeat(20, axis=0)
        }
# elif data == "sin":
#     data_dic = {"data": np.arange(0,10,0.01)[:,None],
#                     "target": np.sin(np.arange(0,10,0.01) * np.pi)}
#
# elif data == "sin":
#
#     data_dic = {"data": np.arange(0,10,0.01)[:,None],
#                     "target": np.sin(np.arange(0,10,0.01) * np.pi)}
    elif data == "sin":
        v = np.sin(np.pi * np.arange(1000) / 100)
        if not "data_length" in kwarg:
            data_length = 100
        else:
            data_length = kwarg["data_length"]

        if not "predict_length" in kwarg:
            predict_length = 1
        else:
            data_length = kwarg["data_length"]

        x, y, xidx, yidx = gen_time_series(v, data_length, predict_length)
        data_dic = {"data": x, "target": y}
    elif data == "decay":
        v = np.sin(np.pi * np.arange(10000) / np.arange(1, 10001)[::-1] *
                   10) * np.arange(10000)[::-1]
        v = v[:-1000]
        x, y, xidx, yidx = gen_time_series(v, 10, 1)
        data_dic = {"data": x, "target": y}

    if "data_only" in kwarg:
        data_dic["target"] = data_dic["data"]

    return data_dic["data"], data_dic["target"]
Ejemplo n.º 44
0
import numpy
from sklearn.utils import shuffle
from sklearn.datasets import fetch_mldata
from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB

from sklearn.preprocessing import StandardScaler

from machine_learning import MyGaussian

print("Loading ... ")

mnist = fetch_mldata('MNIST original')  #, data_home='data' )

X = mnist.data
Y = mnist.target

print("Transforming  ... ")

new_X = X
#new_X = numpy.ndarray( [ len(X), 56 ] )
##for n in range(len(X)):
##    sample = X[n].reshape(28,28)
##    for i in range(28): new_X[n,i] = sample[i,:].sum()
##    for j in range(28): new_X[n,28+j] = sample[:,j].sum()
###    new_X[n] = new_X[n] / new_X[n].sum()
#X2=X.reshape( len(X), 28, 28 )
#new_X[:,:28] = X2.sum(axis=1)
#new_X[:,28:] = X2.sum(axis=2)
#new_X[:,:] = new_X[:,:] / new_X.sum(axis=1)[:,numpy.newaxis]
Ejemplo n.º 45
0
from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
import argparse

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-o", "--output", required=True,
        help="path to the output loss/accuracy plot")
args = vars(ap.parse_args())

# grab the MNIST dataset (if this is your first time running this
# script, the download may take a minute -- the 55MB MNIST dataset
# will be downloaded)
print("[INFO] loading MNIST (full) dataset...")
dataset = datasets.fetch_mldata("MNIST Original")

# scale the raw pixel intensities to the range [0, 1.0], then 
# construct the training and testing splits
data = dataset.data.astype("float") / 255.0
(trainX, testX, trainY, testY) = train_test_split(data,
        dataset.target, test_size=0.25)

# convert the labels from integers to vectors
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.fit_transform(testY)

# define the 784-256-128-10 architecture using Keras
model = Sequential()
model.add(Dense(256, input_shape=(784,), activation='sigmoid'))
Ejemplo n.º 46
0
Archivo: q2.py Proyecto: Magneseus/4107
import numpy as np
import random
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import scikitplot as skplt

# Get the MNIST data, if not already there
DATA_DIR = "./"
mnist = fetch_mldata('MNIST original', data_home=DATA_DIR)

# Initiate the random generator
rnd = random.Random()

# Separate the mnist data into two arrays
mnist_train_1 = np.array(
    [mnist.data[i] for i in range(60000) if mnist.target[i] == 1.0])
mnist_train_5 = np.array(
    [mnist.data[i] for i in range(60000) if mnist.target[i] == 5.0])
mnist_test_1 = np.array(
    [mnist.data[i] for i in range(60000, 70000) if mnist.target[i] == 1.0])
mnist_test_5 = np.array(
    [mnist.data[i] for i in range(60000, 70000) if mnist.target[i] == 5.0])

x = np.concatenate((mnist_train_1, mnist_train_5))


def kmeans(data, k, lam=0.001, lam_dec=0.00005):
    # Pick some centroids
    centroids = np.array([np.copy(data[rnd.randint(0, data.shape[0] - 1)])])
Ejemplo n.º 47
0
from sklearn import datasets, svm, metrics
from sklearn.datasets import fetch_mldata
from sklearn.externals import joblib

MODEL_PATH = 'mnist_svm_model_full.pkl'

mnist = fetch_mldata('MNIST original', data_home='./scikit_learn_data')
X_data = mnist.data / 255.0
Y = mnist.target
# print('svm')
classifier = svm.SVC(C=5, gamma=0.05)
classifier.fit(X_data, Y)
joblib.dump(classifier, MODEL_PATH, compress=3)
Ejemplo n.º 48
0
    parameters, losses, test_losses = \
        StochasticMLP(X, y, layer_dims, 'multiclass', X_test, y_test, optimizer, lr, batch_size,
                  beta1, beta2, eps, num_epochs, print_loss, add_del, reg_param,
                  delta,prob,epsilon,max_hidden_size,tau)
    
    return parameters, losses, test_losses

if __name__ == '__main__':
#    data_size = 7
#    num_features = 10
#    num_classes = 3
#    
#    X_train = 10.*np.random.rand(num_features,data_size)
#    y_train = np.array([[1,0,0],[0,1,0],[0,0,1],[1,0,0],[0,1,0],[0,0,1],[1,0,0]]).T

    mnist = fetch_mldata('MNIST original', data_home=os.getcwd())    
    X = mnist.data.astype(np.float32) / 255.
    y_orig = mnist.target
    # one-hot encode the labels y_orig: i=0,...,9 --> [0,...,1,...,0]
    y = pd.get_dummies(y_orig).values.astype(np.float32)
    
#    pca = PCA(n_components=324)
#    pca.fit(X)
#    X_pca = pca.transform(X)
    
    X,y = shuffle(X,y)
    
    down_sample = 5000
    X_ds = X[:down_sample,:]
    y_ds = y[:down_sample,:]
    
Ejemplo n.º 49
0
wavelet = 'db5'
level = 1
psi = np.load('./wavelet_mat/{}_{}.npz'.format(wavelet, level))['psi']
def_params = dict(rho = rho,
                  psi = psi)


# L-inf attack budget, corresponding to images in the range [-1, 1]
epsilon = 0.2
proj_iter = False # Change to True to run attack with iterated projections

# Read MNIST data
digit_1 = 3
digit_2 = 7
fetch_mnist()
mnist = datasets.fetch_mldata("MNIST original")
digit_1_data = 2.0*mnist.data[mnist.target==digit_1]/255.0 - 1.0
digit_2_data = 2.0*mnist.data[mnist.target==digit_2]/255.0 - 1.0
data = np.vstack([digit_1_data, digit_2_data])  
labels = np.hstack([np.repeat(digit_1, digit_1_data.shape[0]), np.repeat(digit_2, digit_2_data.shape[0])])  
data, labels = utils.shuffle(data, labels, random_state=1234)
x_train, x_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=0.25, random_state=1234, stratify=labels)
num_test = x_test.shape[0]

print("\n*************************************")
print("{:} vs. {:} classification via linear SVM".format(digit_1,digit_2))
print("*************************************")
print("Attacks use epsilon = {:.2f} \nImages are in the range [-1, 1]\n".format(epsilon))
print("**********")
print("No defense")
print("**********")
Ejemplo n.º 50
0
if len(sys.argv) == 1:
    print(
        "ERROR: Please specify implementation to benchmark, 'sknn' or 'nolearn'."
    )
    sys.exit(-1)

np.set_printoptions(precision=4)
np.set_printoptions(suppress=True)

from sklearn.base import clone
from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_mldata
from sklearn.metrics import classification_report

mnist = fetch_mldata('mnist-original')
X_train, X_test, y_train, y_test = train_test_split(
    (mnist.data / 255.0).astype(np.float32),
    mnist.target.astype(np.int32),
    test_size=1.0 / 7.0,
    random_state=1234)

classifiers = []

if 'sknn' in sys.argv:
    from sknn.platform import gpu32
    from sknn.mlp import Classifier, Layer, Convolution

    clf = Classifier(
        layers=[
            # Convolution("Rectifier", channels=10, pool_shape=(2,2), kernel_shape=(3, 3)),
Ejemplo n.º 51
0
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import confusion_matrix, classification_report

import renom as rm
from renom.cuda import cuda
from renom.optimizer import Sgd, Adam
from renom.core import DEBUG_NODE_STAT, DEBUG_GRAPH_INIT, DEBUG_NODE_GRAPH
from renom.operation import sum

DEBUG_GRAPH_INIT(True)

np.random.seed(10)

cuda.set_cuda_active(True)

mnist = fetch_mldata('MNIST original', data_home="dataset")

X = mnist.data
y = mnist.target

X = X.astype(np.float32)
X /= X.max()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
labels_train = LabelBinarizer().fit_transform(y_train).astype(np.float32)
labels_test = LabelBinarizer().fit_transform(y_test).astype(np.float32)


class MNist(rm.Model):
    def __init__(self):
        super(MNist, self).__init__()
Ejemplo n.º 52
0
# Import the modules
import os
from sklearn.externals import joblib
from sklearn import datasets
from skimage.feature import hog
from sklearn.svm import LinearSVC
import numpy as np
from collections import Counter
from sklearn_porter import Porter
from sklearn.model_selection import train_test_split
import cv2

# Load the dataset
custom_data_home = 'D:\Christian-Data\Proyectos\Python\data'
dataset = datasets.fetch_mldata('MNIST original', data_home=custom_data_home)
# Extract the features and labels
features = np.array(dataset.data, 'int16')
labels = np.array(dataset.target, 'int')
list_hog_fd = []
for feature in features:
    fd = hog(feature.reshape((28, 28)),
             orientations=9,
             pixels_per_cell=(14, 14),
             cells_per_block=(1, 1),
             visualise=False)
    list_hog_fd.append(fd)
hog_features = np.array(list_hog_fd, 'float64')
print("Features inicial:" + str(len(hog_features)))
print("Elementos inicial :" + str(labels.size))

Ejemplo n.º 53
0
# Import the modules
from sklearn.externals import joblib
from sklearn import datasets
from skimage.feature import hog
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from collections import Counter
from sklearn.model_selection import GridSearchCV
# Load the dataset
dataset = datasets.fetch_mldata("MNIST Original",
                                data_home='/home/sahil/virtualenvs/ALPR/')

# Extract the features and labels
features = np.array(dataset.data, 'int16')
labels = np.array(dataset.target, 'int')

# Extract the hog features
list_hog_fd = []
for feature in features:
    fd = hog(feature.reshape((28, 28)),
             orientations=9,
             pixels_per_cell=(14, 14),
             cells_per_block=(1, 1),
             visualise=False)
    list_hog_fd.append(fd)
hog_features = np.array(list_hog_fd, 'float64')

print("Count of digits in dataset", Counter(labels))

# Create an linear SVM object
Ejemplo n.º 54
0
def get_MNIST():
    """Returns a (name, data, target) tuple of the MNIST dataset (70 000 items)"""
    mnist = fetch_mldata('MNIST original', data_home='./data/')
    return ('MNIST', pd.DataFrame(mnist.data / 255.), pd.DataFrame(mnist.target))
Ejemplo n.º 55
0
def get_mnist_data():
    mnist = fetch_mldata('MNIST Original')
    X = mnist['data']
    y = mnist['target']
    return random_split(X, y, ratio=0.2)
Ejemplo n.º 56
0
from sklearn import datasets
import numpy as np
from sklearn.svm import LinearSVC
from skimage.feature import hog
from sklearn.externals import joblib
from collections import Counter

dataset = datasets.fetch_mldata('MNIST Original')
features = np.array(dataset.data, 'int16')
labels = np.array(dataset.target, 'int')

list_hog_fd = []
for feature in features:
    fd = hog(feature.reshape((28, 28)),
             orientations=9,
             pixels_per_cell=(14, 14),
             cells_per_block=(1, 1))
    list_hog_fd.append(fd)
hog_features = np.array(list_hog_fd, 'float64')
print('done with count', Counter(labels))

for i in range(1, 2, 1):
    clf = LinearSVC(C=3.0, max_iter=5000, random_state=1, tol=1e-5)
    clf.fit(hog_features, labels)

    joblib.dump(clf,
                'C:\\Users\\tusha\Desktop\ocrclf{}.pkl'.format(i),
                compress=3)
    print('{} done'.format(i))
Ejemplo n.º 57
0
import numpy as np

from matplotlib import pyplot as plt

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels \
    import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared
from sklearn.datasets import fetch_mldata

data = fetch_mldata('mauna-loa-atmospheric-co2').data
X = data[:, [1]]
y = data[:, 0]

# Kernel with parameters given in GPML book
k1 = 66.0**2 * RBF(length_scale=67.0)  # long term smooth rising trend
k2 = 2.4**2 * RBF(length_scale=90.0) \
    * ExpSineSquared(length_scale=1.3, periodicity=1.0)  # seasonal component
# medium term irregularity
k3 = 0.66**2 \
    * RationalQuadratic(length_scale=1.2, alpha=0.78)
k4 = 0.18**2 * RBF(length_scale=0.134) \
    + WhiteKernel(noise_level=0.19**2)  # noise terms
kernel_gpml = k1 + k2 + k3 + k4

gp = GaussianProcessRegressor(kernel=kernel_gpml,
                              alpha=0,
                              optimizer=None,
                              normalize_y=True)
gp.fit(X, y)

print("GPML kernel: %s" % gp.kernel_)
Ejemplo n.º 58
0
    def train(self, n_epochs, batch_size=128, save_interval=50):

        mnist = fetch_mldata('MNIST original')

        X = mnist.data
        y = mnist.target

        # Rescale [-1, 1]
        X = (X.astype(np.float32) - 127.5) / 127.5

        half_batch = int(batch_size / 2)

        for epoch in range(n_epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            self.discriminator.set_trainable(True)

            # Select a random half batch of images
            idx = np.random.randint(0, X.shape[0], half_batch)
            imgs = X[idx]

            # Sample noise to use as generator input
            noise = np.random.normal(0, 1, (half_batch, self.latent_dim))

            # Generate a half batch of images
            gen_imgs = self.generator.predict(noise)

            # Valid = [1, 0], Fake = [0, 1]
            valid = np.concatenate((np.ones(
                (half_batch, 1)), np.zeros((half_batch, 1))),
                                   axis=1)
            fake = np.concatenate((np.zeros(
                (half_batch, 1)), np.ones((half_batch, 1))),
                                  axis=1)

            # Train the discriminator
            d_loss_real, d_acc_real = self.discriminator.train_on_batch(
                imgs, valid)
            d_loss_fake, d_acc_fake = self.discriminator.train_on_batch(
                gen_imgs, fake)
            d_loss = 0.5 * (d_loss_real + d_loss_fake)
            d_acc = 0.5 * (d_acc_real + d_acc_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            # We only want to train the generator for the combined model
            self.discriminator.set_trainable(False)

            # Sample noise and use as generator input
            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # The generator wants the discriminator to label the generated samples as valid
            valid = np.concatenate((np.ones(
                (batch_size, 1)), np.zeros((batch_size, 1))),
                                   axis=1)

            # Train the generator
            g_loss, g_acc = self.combined.train_on_batch(noise, valid)

            # Display the progress
            print("%d [D loss: %f, acc: %.2f%%] [G loss: %f, acc: %.2f%%]" %
                  (epoch, d_loss, 100 * d_acc, g_loss, 100 * g_acc))

            # If at save interval => save generated image samples
            if epoch % save_interval == 0:
                self.save_imgs(epoch)
Ejemplo n.º 59
0
# -*- coding: utf-8 -*-
from sklearn.datasets import fetch_mldata
from matplotlib.pyplot import *
from numpy import *

mnist = fetch_mldata('MNIST original')
data = array(mnist.data != 0, dtype=bool)  # 二値化

# 適当に15サンプル表示
N = len(data)
choice = random.choice(arange(N), 15)
figure(figsize=(18, 8))
gray()
for i in range(15):
    subplot(3, 5, i + 1)
    imshow(data[choice[i]].reshape(28, 28), interpolation='none')
savefig('fig19-2.png')
Ejemplo n.º 60
0
from cnn.neural_network import CNN
from keras.utils import np_utils
from keras.optimizers import SGD
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split

# Parse the Arguments
ap = argparse.ArgumentParser()
ap.add_argument("-s", "--save_model", type=int, default=-1)
ap.add_argument("-l", "--load_model", type=int, default=-1)
ap.add_argument("-w", "--save_weights", type=str)
args = vars(ap.parse_args())

# Read/Download MNIST Dataset
print('Loading MNIST Dataset...')
dataset = fetch_mldata('MNIST Original')

# Read the MNIST data as array of 784 pixels and convert to 28x28 image matrix
mnist_data = dataset.data.reshape((dataset.data.shape[0], 28, 28))
mnist_data = mnist_data[:, np.newaxis, :, :]

# Divide data into testing and training sets.
train_img, test_img, train_labels, test_labels = train_test_split(
    mnist_data / 255.0, dataset.target.astype("int"), test_size=0.1)

# Now each image rows and columns are of 28x28 matrix type.
img_rows, img_columns = 28, 28

# Transform training and testing data to 10 classes in range [0,classes] ; num. of classes = 0 to 9 = 10 classes
total_classes = 10  # 0 to 9 labels
train_labels = np_utils.to_categorical(train_labels, 10)