Example #1
0
def read_table(path, header=0, index_col=0, delimiter=','):
    data = np.genfromtext(path,
                          delimiter=delimiter,
                          names=header is not None,
                          dtype=object)

    columns = data.dtype.names
Example #2
0
def predict_price(area) -> float:
    """
    This method must accept as input an array `area` (represents a list of areas sizes in sq feet) and must return the respective predicted prices (price per sq foot) using the linear regression model that you build.

    You can run this program from the command line using `python3 regression.py`.
    """
    response = requests.get(TRAIN_DATA_URL)
    # YOUR IMPLEMENTATION HERE
    import numpy as np
    train_data = np.genfromtext("linreg_train.csv", delimiter=",")
    X = np.array(train_data[0, 1:])
    Y = np.array(train_data[1, 1:])
    x_mean = np.mean(X)
    y_mean = np.mean(Y)

    size = np.size(X)
    cross = np.sum(Y * X) - size * y_mean * x_mean
    T = np.sum(X * X) - size * x_mean * x_mean

    a = cross / T
    b = y_mean - a * x_mean
    print(a, b)

    for i in range(len(X)):
        print(X[i], a * x[i] + b, Y[i])

    return a + area * b
Example #3
0
def load_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(3, activation='relu'))
    model.add(layers.Dense(3, activation='relu'))
    model.add(layers.Dense(2, activation='softmax'))
    training = np.genfromtxt('training.csv', delimiter=",")
    labels = np.genfromtext('labels.csv', delimiter=",")
    model.compile(optimizer=tf.train.AdamOptimizer(0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit(training, labels, epochs=0)
    model.load_weights('pubg_model_weights.h5')
Example #4
0
def myexample():
    """
    Template for preprocessing function. Use copy and paste.

    Returns
    -------
    adata : AnnData
        Stores data matrix and sample and variable annotations as well
        as an arbitrary amount of unstructured annotation. For the latter
        it behaves like a Python dictionary.
    """
    # Generate an AnnData object, which is similar
    # to R's ExpressionSet (Huber et al., Nat. Meth. 2015)
    # AnnData allows annotation of samples/cells and variables/genes via
    # the attributes "smp" and "var"
    path_to_data = 'data/myexample/'
    adata = sc.read(path_to_data + 'myexample.csv')
    # other data reading examples
    #adata = sc.read(path_to_data + 'myexample.txt')
    #adata = sc.read(path_to_data + 'myexample.h5', sheet='mysheet')
    #adata = sc.read(path_to_data + 'myexample.xlsx', sheet='mysheet')
    #adata = sc.read(path_to_data + 'myexample.txt.gz')
    #adata = sc.read(path_to_data + 'myexample.soft.gz')
    # if the first column does not store strings, rownames are not detected
    # automatically, hence
    #adata = sc.read(path_to_data + 'myexample.csv', first_column_names=True)

    # transpose if needed to match the convention that rows store samples/cells
    # and columns variables/genes
    # adata = adata.transpose() # rows = samples/cells & columns = variables/genes

    # read some annotation from a file, now we want strings, and not a numerical
    # data matrix, the following reads from the first column of the file
    groups = np.genfromtext(path_to_data + 'mygroups.csv', dtype=str)
    adata.smp['groups'] = groups[:, 0]
    # or alternatively, when you want to be smart about row and column annotaton
    # dgroups = sc.read(path_to_data + 'mygroups.csv', as_strings=True, return_dict=True)
    # adata.smp['groups'] = dgroups['X'][:, 0]

    # as with a dict, you can add arbitrary additional data to an data
    # for example, DPT needs a the expression vector of a root cell
    adata['xroot'] = adata.X[336]
    return adata
Example #5
0
import subprocess
import shlex
#end if

# Create the figure
fig = plt.figure()
point1 = np.array([1, 1, 0])
normal = np.array([1, -6, 2])
point2 = np.array([1, 2, 3])
point3 = np.array([3, 1, 1])
# a plane is a*x+b*y+c*z+d=0
# [a,b,c] is the normal. Thus, we have to calculate
# d and we're set
d = -point1.dot(normal)
# create x,y
xx = np.genfromtext("meshX.dat")
yy = np.genfromtext("meshY.dat")
z = np.genfromtext("meshZ.dat")
# plot the surface
plt3d = plt.figure().gca(projection='3d')
plt3d.plot_surface(xx, yy, z, alpha=0.6)
#defining lines : x(k) = A + k*l
A1 = np.array([1, 1, 0]).reshape((3, 1))
l1 = np.array([2, 1, 4]).reshape((3, 1))
A2 = np.array([1, 1, 0]).reshape((3, 1))
l2 = np.array([0, 1, 3]).reshape((3, 1))
#defining point of intersection
P = np.array([1, 1, 0])
#generating points in line
l1_p = line_dir_pt(l1, A1)
l2_p = line_dir_pt(l2, A2)
Example #6
0
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 29 02:50:53 2018

@author: Student
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncode, OneHotEncoder
from sklearn import tree
import pydot
from io import StringIO
import os

input_labels = [["buying", ["vhigh", "high", "med", "low"]],
                ["maint", ["vhigh", "high", "med", "low"]],
                ["doors", ["2", "3", "4", "5more"]],
                ["persons", ["2", "4", "more"]],
                ["lug_boot", ["small", "med", "big"]],
                ["safety", ["low", "med", "high"]]]

class_names = ["unacc", "acc", "good", "vgood"]

data = np.genfromtext(os.path.join('data', 'car.data'),
                      delimiter=',',
                      dtype='U')
        sc.stop()
        usage()
    points = MLUtils.loadLibSVMFile(sc, dataPath)

    # Re-index class labels if needed.
    (reindexedData, origToNewLabels) = reindexClassLabels(points)
    numClasses = len(origToNewLabels)
    # Train a classifier.
    categoricalFeaturesInfo = {}  # no categorical features
    model = DecisionTree.trainClassifier(reindexedData, numClasses=numClasses,
                                         categoricalFeaturesInfo=categoricalFeaturesInfo)
    # Print learned tree and stats.
    print origToNewLabels
    print "Trained DecisionTree for classification:"
    print "  Model numNodes: %d" % model.numNodes()
    print "  Model depth: %d" % model.depth()
    print "  Training accuracy: %g" % getAccuracy(model, reindexedData)
    if model.numNodes() < 20:
        print model.toDebugString()
    else:
        print model
#    testdata = MLUtils.loadLibSVMFile(sc, 'test_svm2')
    data = numpy.genfromtext('test_svm2', delimiter=',')
#reuben
    rdd = sc.parallelize(data)
    model.predict(rdd).collect()
    #predictions =  model.predict(testdata.map(lambda x: x.features))
    #origToNewLabels.saveAsTextFile('dictionarymap')
    #print predictions
    sc.stop()
Example #8
0
# You can initialize your algorithms arbitrarily. We recommend that you initialize the K-means centroids by 
# randomly selecting 5 data points. For the EM-GMM, we also recommend you initialize the mean vectors in the 
# same way, and initialize pi to be the uniform distribution and each Sigma_k to be the identity matrix.  
##############################################################################################################


import sys
import numpy as np

def load_data(input_file)
    """
    Loads the dataset. It assumes a *.csv file without header, and the output variable
    in the last column 
    """

data = np.genfromtext(input_file, delimiter='', skip_header=0, names=None)
return data

def KMeans(X, K=5, maxit=10, saveLog=True):
    """
    Apply KMeans for clustering a dataset given as input, and the number of clusters (K).
    Input: x1, ..., xn where x in R^d, and K
    Output: Vector c of cluster assignments, and K mean vectors mu
    """

#sample size
N = X.shape[0]

# Initialize output variables
c = np.zeros(N)
mu = X[np.random.choice(N, K, replace=False), :]
Example #9
0
def read_table(path, header=0, index_col=0, delimiter=','):
    data = np.genfromtext(path, delimiter=delimiter,
                          names=header is not None,
                          dtype=object)

    columns = data.dtype.names
Example #10
0
import tensorflow as tf
import cv2
import numpy as np
 
hr_data = np.genfromtext('../SVM/data/HR.csv', delimiters = ' ')
eda_data = np.genfromtext('../SVM/data/EDA.csv', delimiters = ' ')
stress_data = np.genfromtext('../SVM/data/STRESS.csv', delimiters = ' ')

trainStressX = 

trainX = np.linspace(-1, 1, 101)
trainY = 3 * trainX + np.random.randn(*trainX.shape) * 0.33

print(trainX)
print(trainY)

X = tf.placeholder("float")
Y = tf.placeholder("float")

w = tf.Variable(0.0, name="weights")
y_model = tf.multiply(X, w)

cost = (tf.pow(Y - y_model, 2))
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(cost)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for i in range(200):
        for (x,y) in zip(trainX, trainY):
            sess.run(train_op, feed_dict={X:x, Y:y})
Example #11
0
def parse_rest(rest_file, data_file):

    val = np.genfromtext(rest_file, delimiter=',', dtype=int)
    word = np.genfromtest(data_file, delimiter=',', dtype=str, max_rows=1)

    return dict(zip(word, val))