def read_table(path, header=0, index_col=0, delimiter=','): data = np.genfromtext(path, delimiter=delimiter, names=header is not None, dtype=object) columns = data.dtype.names
def predict_price(area) -> float: """ This method must accept as input an array `area` (represents a list of areas sizes in sq feet) and must return the respective predicted prices (price per sq foot) using the linear regression model that you build. You can run this program from the command line using `python3 regression.py`. """ response = requests.get(TRAIN_DATA_URL) # YOUR IMPLEMENTATION HERE import numpy as np train_data = np.genfromtext("linreg_train.csv", delimiter=",") X = np.array(train_data[0, 1:]) Y = np.array(train_data[1, 1:]) x_mean = np.mean(X) y_mean = np.mean(Y) size = np.size(X) cross = np.sum(Y * X) - size * y_mean * x_mean T = np.sum(X * X) - size * x_mean * x_mean a = cross / T b = y_mean - a * x_mean print(a, b) for i in range(len(X)): print(X[i], a * x[i] + b, Y[i]) return a + area * b
def load_model(): model = tf.keras.Sequential() model.add(layers.Dense(3, activation='relu')) model.add(layers.Dense(3, activation='relu')) model.add(layers.Dense(2, activation='softmax')) training = np.genfromtxt('training.csv', delimiter=",") labels = np.genfromtext('labels.csv', delimiter=",") model.compile(optimizer=tf.train.AdamOptimizer(0.001), loss='categorical_crossentropy', metrics=['accuracy']) model.fit(training, labels, epochs=0) model.load_weights('pubg_model_weights.h5')
def myexample(): """ Template for preprocessing function. Use copy and paste. Returns ------- adata : AnnData Stores data matrix and sample and variable annotations as well as an arbitrary amount of unstructured annotation. For the latter it behaves like a Python dictionary. """ # Generate an AnnData object, which is similar # to R's ExpressionSet (Huber et al., Nat. Meth. 2015) # AnnData allows annotation of samples/cells and variables/genes via # the attributes "smp" and "var" path_to_data = 'data/myexample/' adata = sc.read(path_to_data + 'myexample.csv') # other data reading examples #adata = sc.read(path_to_data + 'myexample.txt') #adata = sc.read(path_to_data + 'myexample.h5', sheet='mysheet') #adata = sc.read(path_to_data + 'myexample.xlsx', sheet='mysheet') #adata = sc.read(path_to_data + 'myexample.txt.gz') #adata = sc.read(path_to_data + 'myexample.soft.gz') # if the first column does not store strings, rownames are not detected # automatically, hence #adata = sc.read(path_to_data + 'myexample.csv', first_column_names=True) # transpose if needed to match the convention that rows store samples/cells # and columns variables/genes # adata = adata.transpose() # rows = samples/cells & columns = variables/genes # read some annotation from a file, now we want strings, and not a numerical # data matrix, the following reads from the first column of the file groups = np.genfromtext(path_to_data + 'mygroups.csv', dtype=str) adata.smp['groups'] = groups[:, 0] # or alternatively, when you want to be smart about row and column annotaton # dgroups = sc.read(path_to_data + 'mygroups.csv', as_strings=True, return_dict=True) # adata.smp['groups'] = dgroups['X'][:, 0] # as with a dict, you can add arbitrary additional data to an data # for example, DPT needs a the expression vector of a root cell adata['xroot'] = adata.X[336] return adata
import subprocess import shlex #end if # Create the figure fig = plt.figure() point1 = np.array([1, 1, 0]) normal = np.array([1, -6, 2]) point2 = np.array([1, 2, 3]) point3 = np.array([3, 1, 1]) # a plane is a*x+b*y+c*z+d=0 # [a,b,c] is the normal. Thus, we have to calculate # d and we're set d = -point1.dot(normal) # create x,y xx = np.genfromtext("meshX.dat") yy = np.genfromtext("meshY.dat") z = np.genfromtext("meshZ.dat") # plot the surface plt3d = plt.figure().gca(projection='3d') plt3d.plot_surface(xx, yy, z, alpha=0.6) #defining lines : x(k) = A + k*l A1 = np.array([1, 1, 0]).reshape((3, 1)) l1 = np.array([2, 1, 4]).reshape((3, 1)) A2 = np.array([1, 1, 0]).reshape((3, 1)) l2 = np.array([0, 1, 3]).reshape((3, 1)) #defining point of intersection P = np.array([1, 1, 0]) #generating points in line l1_p = line_dir_pt(l1, A1) l2_p = line_dir_pt(l2, A2)
# -*- coding: utf-8 -*- """ Created on Wed Aug 29 02:50:53 2018 @author: Student """ import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import LabelEncode, OneHotEncoder from sklearn import tree import pydot from io import StringIO import os input_labels = [["buying", ["vhigh", "high", "med", "low"]], ["maint", ["vhigh", "high", "med", "low"]], ["doors", ["2", "3", "4", "5more"]], ["persons", ["2", "4", "more"]], ["lug_boot", ["small", "med", "big"]], ["safety", ["low", "med", "high"]]] class_names = ["unacc", "acc", "good", "vgood"] data = np.genfromtext(os.path.join('data', 'car.data'), delimiter=',', dtype='U')
sc.stop() usage() points = MLUtils.loadLibSVMFile(sc, dataPath) # Re-index class labels if needed. (reindexedData, origToNewLabels) = reindexClassLabels(points) numClasses = len(origToNewLabels) # Train a classifier. categoricalFeaturesInfo = {} # no categorical features model = DecisionTree.trainClassifier(reindexedData, numClasses=numClasses, categoricalFeaturesInfo=categoricalFeaturesInfo) # Print learned tree and stats. print origToNewLabels print "Trained DecisionTree for classification:" print " Model numNodes: %d" % model.numNodes() print " Model depth: %d" % model.depth() print " Training accuracy: %g" % getAccuracy(model, reindexedData) if model.numNodes() < 20: print model.toDebugString() else: print model # testdata = MLUtils.loadLibSVMFile(sc, 'test_svm2') data = numpy.genfromtext('test_svm2', delimiter=',') #reuben rdd = sc.parallelize(data) model.predict(rdd).collect() #predictions = model.predict(testdata.map(lambda x: x.features)) #origToNewLabels.saveAsTextFile('dictionarymap') #print predictions sc.stop()
# You can initialize your algorithms arbitrarily. We recommend that you initialize the K-means centroids by # randomly selecting 5 data points. For the EM-GMM, we also recommend you initialize the mean vectors in the # same way, and initialize pi to be the uniform distribution and each Sigma_k to be the identity matrix. ############################################################################################################## import sys import numpy as np def load_data(input_file) """ Loads the dataset. It assumes a *.csv file without header, and the output variable in the last column """ data = np.genfromtext(input_file, delimiter='', skip_header=0, names=None) return data def KMeans(X, K=5, maxit=10, saveLog=True): """ Apply KMeans for clustering a dataset given as input, and the number of clusters (K). Input: x1, ..., xn where x in R^d, and K Output: Vector c of cluster assignments, and K mean vectors mu """ #sample size N = X.shape[0] # Initialize output variables c = np.zeros(N) mu = X[np.random.choice(N, K, replace=False), :]
import tensorflow as tf import cv2 import numpy as np hr_data = np.genfromtext('../SVM/data/HR.csv', delimiters = ' ') eda_data = np.genfromtext('../SVM/data/EDA.csv', delimiters = ' ') stress_data = np.genfromtext('../SVM/data/STRESS.csv', delimiters = ' ') trainStressX = trainX = np.linspace(-1, 1, 101) trainY = 3 * trainX + np.random.randn(*trainX.shape) * 0.33 print(trainX) print(trainY) X = tf.placeholder("float") Y = tf.placeholder("float") w = tf.Variable(0.0, name="weights") y_model = tf.multiply(X, w) cost = (tf.pow(Y - y_model, 2)) train_op = tf.train.GradientDescentOptimizer(0.01).minimize(cost) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(200): for (x,y) in zip(trainX, trainY): sess.run(train_op, feed_dict={X:x, Y:y})
def parse_rest(rest_file, data_file): val = np.genfromtext(rest_file, delimiter=',', dtype=int) word = np.genfromtest(data_file, delimiter=',', dtype=str, max_rows=1) return dict(zip(word, val))