def test(data): print 'data_range(data, data.get_headers())' print data_range(data, data.get_headers()) print 'mean(data, data.get_headers())' print mean(data, data.get_headers()) print 'stdev(data, data.get_headers())' print stdev(data, data.get_headers()) print 'normalize_columns_together(data, data.get_headers())' print normalize_columns_together(data, data.get_headers())
def __init__(self, parent, data): Dialog.__init__(self,parent) self.result=[] self.size = ["6"] self.color=["black"] self.headers = data.get_headers() self.dep_list = None self.ind_list = None
def stdev(data, headers=[]): list = [] if headers == []: for header in data.get_headers(): headers.append(header) for header in headers: col_index = data.header2matrix[header] list.append(data.matrix_data[:, col_index].std(ddof=1)) return list
def mean(data, headers=[]): list = [] if headers == []: for header in data.get_headers(): headers.append(header) for header in headers: col_index = data.header2matrix[header] list.append(data.matrix_data[:, col_index].mean(0).tolist()[0][0]) return list
def __init__(self, parent, data): print "within dialog box" self.selectVal = 0 self.shapeVal = 0 self.numDataPoints = 10 self.datacols = [] self.headerXVal = 0 self.headerYVal = 0 self.headerZVal = 0 self.headers = data.get_headers() self.colorSelect = None Dialog.__init__(self, parent)
def normalize_columns_separately(data, headers=[]): if headers == []: headers = data.get_headers() list = data_range(data, headers) m = data.get_data(headers) new = m.copy() for i in range(m.shape[0]): for j in range(m.shape[1]): new[i, j] = (m[i, j] - list[j][0]) / (list[j][1] - list[j][0]) return new
def data_range(data, headers=[]): list = [] if headers == []: for header in data.get_headers(): headers.append(header) for header in headers: col_index = data.header2matrix[header] list.append([ data.matrix_data[:, col_index].min(0).tolist()[0][0], data.matrix_data[:, col_index].max(0).tolist()[0][0] ]) return list
def normalize_columns_together(data, headers=[]): if headers == []: headers = data.get_headers() m = data.get_data(headers) min = m.min() max = m.max() new = m.copy() for i in range(m.shape[0]): for j in range(m.shape[1]): new[i, j] = (m[i, j] - min) / (max - min) return new
def write(self, filename): '''Writes the Bayes classifier to a file.''' fp = open(filename, 'w') for i in data.get_headers(): fp.write(i + ",") fp.write("\n") for i in range(len(self.headers)): fp.write("numeric,") fp.write("\n") for k in range(data.self.headers.shape[0]): for i in range(len(self.headers)): fp.write(str(data[k, i]) + ",") fp.write("\n") return
def __init__(self, parent, data): self.headers = data.get_headers() self.name = tk.StringVar(parent, value='') #number of clusters self.Cluster_box = None Dialog.__init__(self, parent, 'K-Means Clustering')
def __init__(self, parent, data): self.eheaders = data.get_headers() self.PCA_box = None Dialog.__init__(self, parent, 'PCA')
def __init__(self, parent, data): self.evectors = data.get_eigenvectors() self.evalues = data.get_eigenvalues() #convert from np array to python list self.headers = data.get_headers() self.eheaders = data.get_original_headers() Dialog.__init__(self, parent, 'PCA')
def __init__(self, parent, data): self.headers = data.get_headers() self.check = tk.IntVar(parent, value=0) # whether data should be normalized self.name = tk.StringVar(parent, value='') self.PCA_box = None Dialog.__init__(self, parent, 'PCA')
def __init__(self,parent,data): self.headers = data.get_headers() self.DV_list = None self.IV_list = None Dialog.__init__(self, parent, 'Choose variables for Regression')
def __init__(self,parent,data): self.headers = data.get_headers() Dialog.__init__(self,parent,'Choose Axes')
# CS 251 Project 6 # # PCA test function # import numpy as np import data import analysis import sys if __name__ == "__main__": if len(sys.argv) < 2: print 'Usage: python %s <data file>' % (sys.argv[0]) exit() data = data.Data(sys.argv[1]) analysisObj = analysis.Analysis() pcadata = analysisObj.pca(data, data.get_headers(), False) print "\nOriginal Data Headers" print pcadata.get_data_headers() print "\nOriginal Data", print data.get_data(data.get_headers(), data.get_num_rows()) print "\nOriginal Data Means" print pcadata.get_data_means() print "\nEigenvalues" print pcadata.get_eigenvalues() print "\nEigenvectors" print pcadata.get_eigenvectors() print "\nProjected Data" print pcadata.get_data(pcadata.get_headers(), data.get_num_rows())
# Spring 2015 # CS 251 Project 6 # # PCA test function # import numpy as np import data import analysis import sys if __name__ == "__main__": if len(sys.argv) < 2: print 'Usage: python %s <data file>' % (sys.argv[0]) exit() data = data.Data(sys.argv[1]) pcadata = analysis.pca(data, data.get_headers(), False) print "\nOriginal Data Headers" print pcadata.get_data_headers() print "\nOriginal Data", print data.get_data(data.get_headers()) print "\nOriginal Data Means" print pcadata.get_data_means() print "\nEigenvalues" print pcadata.get_eigenvalues() print "\nEigenvectors" print pcadata.get_eigenvectors() print "\nProjected Data" print pcadata.get_data(pcadata.get_headers())
# Updated for python3 # CS 251 Project 6 # # PCA test function # import numpy as np import data import analysis import sys if __name__ == "__main__": if len(sys.argv) < 2: print('Usage: python %s <data file>' % (sys.argv[0])) exit() data = data.Data(sys.argv[1]) pcadata = analysis.pca(data, data.get_headers(), False) print("\nOriginal Data Headers") print(pcadata.get_original_headers()) print("\nOriginal Data") print(data.get_matrix(data.get_headers())) print("\nOriginal Data Means") print(pcadata.get_original_means()) print("\nEigenvalues") print(pcadata.get_eigenvalues()) print("\nEigenvectors") print(pcadata.get_eigenvectors()) print("\nProjected Data") print(pcadata.get_matrix(pcadata.get_headers()))
from data import TRAINING_DATA_1, get_headers from matplotlib import pyplot #Get all headers and removes the datetime column. columns = get_headers() columns = columns[1:] #Creates a plot for all column for the first 300 values for column in columns: column_data = TRAINING_DATA_1[column] values = column_data.values plot_values = [x for x in values[:300]] pyplot.plot(plot_values) pyplot.ylabel(column) pyplot.xlabel("index") pyplot.savefig("cyclic/%s" % column) pyplot.cla()
def __init__(self, parent, data): self.cheaders = data.get_headers() self.Cluster_box = None Dialog.__init__(self, parent, 'Clustering')
# Spring 2015 # CS 251 Project 6 # # PCA test function # import numpy as np import data import analysis import sys if __name__ == "__main__": if len(sys.argv) < 2: print('Usage: python %s <data file>' % (sys.argv[0])) exit() data = data.Data( sys.argv[1] ) pcadata = analysis.pca( data, data.get_headers(), False ) print("\nOriginal Data Headers") print(pcadata.get_data_headers()) print("\nOriginal Data") print(data.get_data( data.get_headers() )) print("\nOriginal Data Means") print(pcadata.get_data_means()) print("\nEigenvalues") print(pcadata.get_eigenvalues()) print("\nEigenvectors") print(pcadata.get_eigenvectors()) print("\nProjected Data") print(pcadata.get_data(pcadata.get_headers()))
# CS 251 Project 6 # # PCA test function # import numpy as np import data import analysis import sys # import pcadata if __name__ == "__main__": if len(sys.argv) < 2: print('Usage: python %s <data file>' % (sys.argv[0])) exit() data = data.Data(sys.argv[1]) pcadata = analysis.pca(data, data.get_headers(), False) print("\nOriginal Data Headers") print(pcadata.get_original_headers()) print("\nOriginal Data") print(data.limit_columns(data.get_headers())) print("\nOriginal Data Means") print(pcadata.get_original_means()) print("\nEigenvalues") print(pcadata.get_eigenvalues()) print("\nEigenvectors") print(pcadata.get_eigenvectors()) print("\nProjected Data") print(pcadata.limit_columns(pcadata.get_headers()))