def handleLinearRegression(self): if self.data == None: print "you don't have data" return variables = Dialogs.LinRegressDialog(self.root, self.data.get_headers()) colorbox = Dialogs.ColorDialog(self.root, self.data.get_headers()) if variables.result == []: return if colorbox.resultc != []: self.colorMatrix =analysis.normalize_columns_separately(self.data, (colorbox.resultc,)) self.colorResult = colorbox.resultc else: self.colorResult = None self.colorMatrix = None if colorbox.results != []: self.sizeMatrix =analysis.normalize_columns_separately(self.data, (colorbox.results,)) self.sizeResult = colorbox.results else: self.sizeResult = None self.sizeMatrix = None self.dataheaders = variables.result self.resetData() #self.resetAxes() self.view.reset() self.view.screen[0, 0] = float(self.canvas.winfo_width()) self.view.screen[0, 1] = float(self.canvas.winfo_height()) self.buildLinearRegression()
def buildPCA(self, headers, data): self.totalReset() points = analysis.normalize_columns_separately(headers[0:3], data) if len(data.get_data_headers()) > 3: self.size = analysis.normalize_columns_separately([headers[3]], data) if len(data.get_data_headers()) > 4: self.color = analysis.normalize_columns_together([headers[4]], data) vtm = self.view.build() #make a matrix of only ones self.data2matrix = np.ones((points.shape[0], 4)) if len(data.get_data_headers()) < 3: self.data2matrix[:,:-2] = points self.data2matrix[:,-2] = np.zeros((points.shape[0])) else: self.data2matrix[:,:-1] = points tend = self.data2matrix * vtm.T dx = int(self.sizeOption.get()) dy = int(self.sizeOption.get()) for i in range(tend.shape[0]): tx = tend[i, 0] ty = tend[i, 1] tz = tend[i, 2] pt = self.canvas.create_oval(tx - dx, ty - dy, tx + dx, ty + dy, fill='black', outline='') self.objects.append(pt)
def buildPoints(self, plot_cols): # Delete any existing canvas objects used for plotting data. self.clearData() # If you are selecting only 2 columns to plot, add a column of 0's (z-value) # and a column of 1's (homogeneous coordinate) to the data. self.datamatrix = analysis.normalize_columns_separately(plot_cols, self.data_obj) if self.selected_size == 1: self.size_list = 1 else: self.size_list = analysis.normalize_columns_separately([self.selected_size], self.data_obj) if self.selected_color == "blue": self.color_list = "blue" else: self.color_list = analysis.normalize_columns_separately([self.selected_color], self.data_obj) # print("color list:", self.color_list) n,m = self.datamatrix.shape ones = np.ones((n, 1)) if len(plot_cols)==1: self.hasHistogram = True zeros = ones * 0 self.datamatrix = np.hstack((self.datamatrix, zeros, zeros, ones)) self.buildHistogram() return if len(plot_cols)==2: self.hasHistogram = False zeros = ones * 0 # zeros = np.matrix(zeros) self.datamatrix = np.hstack((self.datamatrix, zeros, ones)) if len(plot_cols)==3: self.hasHistogram = False self.datamatrix = np.hstack((self.datamatrix, ones)) self.build_miniwin() vtm = self.vobj.build() pts = vtm * self.datamatrix.T pts = pts.T for i in range(pts.shape[0]): x = pts[i, 0] y = pts[i, 1] if isinstance(self.size_list, int): dx = 1 else: dx = float(self.size_list[i])*2+1 #print("dx", dx) # print(rgb) if isinstance(self.color_list, str): color = "blue" else: rgb = (0, int((1-float(self.color_list[i]))*255), int(float(self.color_list[i])*255)) color = f'#{rgb[0]:02x}{rgb[1]:02x}{rgb[2]:02x}' print(color) if self.texture_selection.get() == "Solid": pt = self.canvas.create_oval( x-dx, y-dx, x+dx, y+dx, fill = color, outline="") elif self.texture_selection.get() == "Outline": pt = self.canvas.create_oval( x-dx, y-dx, x+dx, y+dx, fill="", outline=color) else: pt = self.canvas.create_oval( x-dx, y-dx, x+dx, y+dx, fill = color, outline="") self.objects.append(pt)
def main(): d = Data('cars.csv') print "Raw Headers" print d.get_raw_headers() print "\n\n" print "Raw number of columns" print d.get_raw_num_columns() print "\n\n" print "Raw number of rows" print d.get_raw_num_rows() print "\n\n" print "13th row" print d.get_raw_row(13) print "\n\n" print "Value at row 6, header 'Car'" print d.get_raw_value(6, 'Car') print "\n\n" print "Matrix data" print d.matrix_data print "\n\n" print "Headers" print d.get_headers() print "\n\n" print "Number of cols" print d.get_num_columns() print "\n\n" print "5th row" print d.get_row(5) print "\n\n" print "Get value" print d.get_value(5, 'Horsepower') print "\n\n" print "get_data function" print d.get_data(['Origin', 'Horsepower']) print "\n\n" print "data range" print analysis.data_range(d, ['Origin', 'Horsepower']) print "\n\n" print "mean of horsepower and origin" print analysis.mean(d, ['Horsepower', 'Origin']) print "\n\n" print "standard deviation for horsepower and origin" print analysis.stdev(d, ['Horsepower', 'Origin']) print "\n" print "normalized columns origin and horsepower" print analysis.normalize_columns_separately(d, ['Origin', 'Horsepower']) print "\n\n" print "normalized together origin and horsepower" print analysis.normalize_columns_together(d, ['Origin', 'Horsepower']) print "\n\n" print "median of columns origin, horspower and weight" print analysis.median(d, ['Origin', 'Horsepower', 'Weight']) print d.get_data(['Origin', 'Horsepower']).shape
def buildLinearRegression(self): if self.dialogWindow.getIV2() == None: headers = [self.dialogWindow.getIV(), self.dialogWindow.getDV()] normalized = analysis.normalize_columns_separately(headers, self.dataObj) zeros = np.zeros((normalized.shape[0], 1)) normalized = np.hstack((normalized, zeros)) ones = np.ones((normalized.shape[0], 1)) normalized = np.hstack((normalized, ones)) vtm = self.view.build() pts = (vtm*self.data2plot.T).T (m, b, rVal, pVal, stdErr, indRange, depRange) = analysis.single_linear_regression( self.dataObj, [self.dialogWindow.getIV()], [self.dialogWindow.getDV()]) self.endpoint1 = ((indRange[0][0] * m + b) - depRange[0][0]) / (depRange[0][1] - depRange[0][0]) self.endpoint2 = ((indRange[0][1] * m + b) - depRange[0][0]) / (depRange[0][1] - depRange[0][0]) self.endpoints = np.matrix([[0., self.endpoint1, 0., 1.], [1., self.endpoint2, 0., 1.]]) self.endpoints = (vtm * self.endpoints.T) self.line = self.canvas.create_line(self.endpoints[0, 0], self.endpoints[1, 0], self.endpoints[0, 1],self.endpoints[1, 1], fill="orange") self.reg_objects.append(self.line) self.buildRegressionLegend(m, b, rVal) return else: indHeaders = [self.dialogWindow.getIV(), self.dialogWindow.getIV2()] (b, sse, r2, t, p) = analysis.linear_regression(self.dataObj, indHeaders, self.dialogWindow.getDV()) self.buildMultipleRegressionLegend(b, sse, r2, t, p) return
def buildPoints(self, cols): self.clear() # build data point matrix self.dataMatrix = analysis.normalize_columns_separately(self.data, cols) if len(cols) == 2: zeros = np.zeros(self.data.get_raw_num_rows()) self.dataMatrix = np.hstack( (self.dataMatrix, np.matrix(zeros).T) ) ones = np.ones(self.data.get_raw_num_rows()) self.dataMatrix = np.hstack( (self.dataMatrix, np.matrix(ones).T) ) # add to view screen vtm = self.view.build() pts = (vtm * self.dataMatrix.T).T shape = self.dataShape.get() for i in range(pts.shape[0]): if shape == "circle": pt = self.canvas.create_oval(pts[i, 0]-self.sizes[i], pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], pts[i, 1]+self.sizes[i], outline=self.colors[i]) elif shape == "square": pt = self.canvas.create_rectangle(pts[i, 0]-self.sizes[i], pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], pts[i, 1]+self.sizes[i], fill=self.colors[i], outline='') else: pt = self.canvas.create_oval(pts[i, 0]-self.sizes[i], pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], pts[i, 1]+self.sizes[i], fill=self.colors[i], outline='') self.objects.append(pt)
def buildHistogram(self): for rec in self.bars: self.canvas.delete(rec) self.bars = [] self.updateLabels() # draw bars vtm = self.vobj.build() selected = analysis.normalize_columns_separately( self.headernames, self.dobj) axes = (vtm * self.axes.T).T binw = int((axes[1, 0] - axes[0, 0]) / 10) one_h = (axes[2, 1] - axes[3, 1]) / selected.shape[0] ten_pct = 0 twenty_pct = 0 thirty_pct = 0 fourty_pct = 0 fifty_pct = 0 sixty_pct = 0 seventy_pct = 0 eighty_pct = 0 ninety_pct = 0 hundred_pct = 0 for i in range(selected.shape[0]): if selected[i, 0] < 0.1: ten_pct += 1 elif 0.1 <= selected[i, 0] < 0.2: twenty_pct += 1 elif 0.2 <= selected[i, 0] < 0.3: thirty_pct += 1 elif 0.3 <= selected[i, 0] < 0.4: fourty_pct += 1 elif 0.4 <= selected[i, 0] < 0.5: fifty_pct += 1 elif 0.5 <= selected[i, 0] < 0.6: sixty_pct += 1 elif 0.6 <= selected[i, 0] < 0.7: seventy_pct += 1 elif 0.7 <= selected[i, 0] < 0.8: eighty_pct += 1 elif 0.8 <= selected[i, 0] < 0.9: ninety_pct += 1 elif 0.9 <= selected[i, 0] <= 1: hundred_pct += 1 i = 0 for j in [ ten_pct, twenty_pct, thirty_pct, fourty_pct, fifty_pct, sixty_pct, seventy_pct, eighty_pct, ninety_pct, hundred_pct ]: x0 = axes[0, 0] + i * binw y0 = axes[2, 1] - j * one_h x1 = axes[0, 0] + (i + 1) * binw y1 = axes[2, 1] i += 1 rec = self.canvas.create_rectangle(x0, y0, x1, y1, fill="dark orange") self.bars.append(rec)
def buildPoints(self, headers): #Plots data based on what user chose for dimensions for specific columns of data #Takes in a list of headers from dialog box selections self.clearData() norm = an.normalize_columns_separately(self.data, headers[0:2]) zeromatrix = np.zeros(norm.shape[0]) onesmatrix = np.ones(norm.shape[0]) #x and y are automatically first two dimensions xdata = headers[0] ydata = headers[1] if xdata != None and ydata != None: dmatrix = np.matrix(norm) nmatrix = np.matrix((zeromatrix, onesmatrix)).T self.dataPointMatrix = np.hstack((dmatrix, nmatrix)) vtm = self.v.build() pts = (vtm * self.dataPointMatrix.T).T for i in range(pts.shape[0]): x = pts[i, 0] y = pts[i, 1] dx = 5 pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx, fill='red', outline='') self.objects.append(pt)
def main(argv): # test command line arguments if len(argv) < 2: print('Usage: python %s <csv filename>' % (argv[0])) exit(0) # create a data object, which reads in the data dobj = data.Data(argv[1]) # print out information about the data print('Number of rows: ', dobj.get_num_points()) print('Number of columns: ', dobj.get_num_dimensions()) # print out the headers print("\nHeaders:") headers = dobj.get_headers() s = headers[0] for header in headers[1:]: s += ", " + header print(s) # print out the types print("\nTypes") types = dobj.get_types() s = types[0] for type in types[1:]: s += ", " + type print(s) # print out a single row print("\nPrinting row index 2") print(dobj.get_row(2)) # print out all of the data print("\nData") headers = dobj.get_headers() print("headers:", headers) for i in range(dobj.get_num_points()): s = str(dobj.get_value(headers[0], i)) for header in headers[1:]: s += "%10.3s" % (dobj.get_value(header, i)) print(s) print("\n\n\n\nselect_columns") d = dobj.get_data() # print("Data:", d) s = dobj.select_columns(['thing1', 'thing3']) print("Selected columns:", s) print("Data range:", analysis.data_range(['thing1', 'thing3'], dobj)) print("Mean:", analysis.mean(['thing1', 'thing3'], dobj)) print("Standard deviation:", analysis.stdev(['thing1', 'thing3'], dobj)) print("Normalize columns separately:", analysis.normalize_columns_separately(['thing1', 'thing3'], dobj)) print("Normalize columns together:", analysis.normalize_columns_together(['thing1', 'thing3'], dobj))
def buildLinearRegression(self,headers): normalized = analysis.normalize_columns_separately( headers, self.data ) list = normalized.tolist() for row in range(len(list)): list[row].append(0) list[row].append(1) normalized = np.matrix(list) self.points = normalized vtm = self.view.build() pts = (vtm * self.points.T).T for i in range( pts.shape[0] ): row = pts.tolist()[i] dx = 3 dy = 3 if self.shapeOption.get() == "Dot": pt = self.canvas.create_oval( row[0]-dx, row[1]-dx, row[0]+dx, row[1]+dx, fill=self.colorOption.get(), outline='', tags="data" ) self.dataObjects.append(pt) self.objects.append(pt) elif self.shapeOption.get() == "Square": pt = self.canvas.create_rectangle( row[0]-dx, row[1]-dx, row[0]+dx, row[1]+dx, fill=self.colorOption.get(), outline='', tags ="data" ) self.dataObjects.append(pt) self.objects.append(pt) unnormalized = self.data.get_data(headers).T.tolist() regress_output = scipy.stats.linregress(unnormalized[0],unnormalized[1]) m = round(regress_output[0],3) b = round(regress_output[1], 3) r = round(regress_output[2]*regress_output[2], 3) ranges = analysis.data_range(headers,self.data) xmin = ranges[0][0] xmax = ranges[0][1] ymin = ranges[1][0] ymax = ranges[1][1] pt1 = [0.0, ((xmin * m + b) - ymin)/(ymax - ymin),0,1 ] pt2 = [1.0, ((xmax * m + b) - ymin)/(ymax - ymin),0,1 ] print "point1" print pt1 print "point2" print pt2 self.regressionMatrix = np.matrix([pt1,pt2]) pts = (vtm * self.regressionMatrix.T).T print pts best_fit = self.canvas.create_line(pts[0,0],pts[0,1],pts[1,0],pts[1,1], width=3, fill='gold',tags="data") self.regressionLines.append(best_fit) self.label['text'] = "The best fit line equation:\n y = " + str(m) + "x + " + str(b)+"\n\nR^2 value: " + str(r)
def handlePlotData(self, event = None): if self.data is None: print 'you don\'t have any data' return headerbox = Dialogs.AxesDialog(self.root, self.data.get_headers()) colorbox = Dialogs.ColorDialog(self.root, self.data.get_headers()) #result = headerbox.result + colorbox.result #print headerbox.result if headerbox.result != []: #the point of new data is for when the user tries to plot new data, but cancels out of it, since the headerboxes need the new data, but the canvas needs the old data #print headerbox.result self.dataheaders = headerbox.result temp_matrix = analysis.normalize_columns_separately(self.data, headerbox.result) if colorbox.resultc != []: if self.colorVar.get() == 1: temp_matrix2 = self.data.get_data((colorbox.resultc,)) else: temp_matrix2 =analysis.normalize_columns_separately(self.data, (colorbox.resultc,)) if colorbox.results != []: temp_matrix3 =analysis.normalize_columns_separately(self.data, (colorbox.results,)) self.rows = len(temp_matrix) #print self.rows if len(headerbox.result) == 2: temp_matrix = np.hstack((temp_matrix, np.zeros(shape=(self.rows,1)))) homogenous_coordinates = np.ones(shape =(self.rows, 1)) self.dataMatrix = np.hstack((temp_matrix , homogenous_coordinates)) if colorbox.resultc != []: self.colorMatrix = temp_matrix2 self.colorResult = colorbox.resultc else: self.colorResult = None self.colorMatrix = None if colorbox.results != []: self.sizeMatrix = temp_matrix3 self.sizeResult = colorbox.results else: self.sizeResult = None self.sizeMatrix = None self.buildAxes()
def main(argv): # test command line arguments if len(argv) < 2: print('Usage: python %s <csv filename>' % (argv[0])) exit(0) # create a data object, which reads in the data dobj = Data(argv[1]) headers = dobj.get_headers() #test the five analysis functions print([headers[0], headers[2]]) print("Data range by column:", analysis.data_range([headers[0], headers[2]], dobj)) print("Mean:", analysis.mean([headers[0], headers[2]], dobj)) print("Standard deviation:", analysis.stdev([headers[0], headers[2]], dobj)) print( "Normalize columns separately:", analysis.normalize_columns_separately([headers[0], headers[2]], dobj)) print("Normalize columns together:", analysis.normalize_columns_together([headers[0], headers[2]], dobj)) #Extension 1 print("Median:", analysis.median([headers[0], headers[2]], dobj)) #Extension 2 print("Median Separately:", analysis.median_separately([headers[0], headers[2]], dobj)) #Extension 3 print("just few rows:", dobj.limit_rows()) #Extension 4 print( "just a few columns. I changed the limit to 2 for demonstration purposes:", dobj.limit_columns()) #Extension 5 print("Data range overall:", analysis.data_range([headers[0], headers[2]], dobj, True)) #Extension 6 print( "The next two print statements get the last row of data. I add a row of data in between," "so they are different.") print(dobj.get_row(-1)) dobj.add_point([1, 2, 3]) print(dobj.get_row(-1))
def buildPoints(self, headers): # Plots data based on what user chose for dimensions for specific columns of data # Takes in a list of headers from dialog box selections self.clearData() if len(self.pcaList)>0: idx = self.AnalysisWindow.index(tk.ACTIVE) self.data=self.pcaList[idx] norm = an.normalize_columns_separately(self.data, headers[0:3]) zeromatrix = np.zeros(norm.shape[0]) onesmatrix = np.ones(norm.shape[0]) # x and y are automatically first two dimensions xdata = headers[0] ydata = headers[1] # if the length of the headers is 2, only plot data on x and y axes if len(headers) == 2: zdata = None colorpt = None sizept = None # if the length of the headers is 3, only plot data on x, y, and z axes # z axis is third dimension of data # user must choose to plot x,y, and z axes in that order if header is length 3 if len(headers) == 3: zdata = headers[2] colorpt = None sizept = None # if the length of the headers is 4, only plot data on x, y, z, and color axes # z axis is third dimension of data # color axis is fourth dimension of data # user must choose to plot x,y,z, and color axes in that order if len(headers) == 4: zdata = headers[2] colorpt = headers[3] sizept = None #print "hedaders", headers # if the length of the headers is 5, plot data on x, y, z, color, and size axes # z axis is third dimension of data # color axis fourth dimension of data # size axis is the fifth dimension of data # user must choose to plot x,y,z, color, and size in that order if len(headers) == 5: zdata = headers[2] colorpt = headers[3] sizept = headers[4] if xdata != None and ydata != None: dmatrix = np.matrix(norm) nmatrix = np.matrix((zeromatrix, onesmatrix)).T self.dataPointMatrix = np.hstack((dmatrix, nmatrix)) if xdata != None and ydata != None and zdata != None: dmatrix = np.matrix(norm) nmatrix = np.matrix((onesmatrix)).T self.dataPointMatrix = np.hstack((dmatrix, nmatrix)) vtm = self.v.build() pts = (vtm * self.dataPointMatrix.T).T self.factorlist = [] #If color list is not empty color each point as indicated in color list if len(self.colorlist) > 0: for i in range(pts.shape[0]): self.factorlist.append(5) dx = self.factorlist[i] x = pts[i, 0] y = pts[i, 1] self.color = self.colorlist[i] pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx, fill=self.color, outline='') self.objects.append(pt) return self.colorlist = [] # if user chooses color and size dimensions for data # convert data set that user chose for color dimension to colors with a blue to yellow # gradient, blue being minimum value and yellow representing maximum value # Also convert data set that user chose for size dimension to size of data points # The size of data points range from 0 to 7 if colorpt != None and sizept != None: self.clearData() colornorm = an.normalize_columns_separately(self.data, [colorpt]) self.colorlist = [] for i in range(colornorm.shape[0]): alpha = colornorm[i, 0] * 255 self.colorlist.append((int(255 - alpha), int(255 - alpha), int(alpha))) sizenorm = an.normalize_columns_separately(self.data, [sizept]) for i in range(sizenorm.shape[0]): self.factorlist.append(sizenorm[i, 0] * 7) for i in range(pts.shape[0]): dx = self.factorlist[i] x = pts[i, 0] y = pts[i, 1] self.color = self.colorlist[i] pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx, fill="#%02X%02X%02X" % self.color, outline='') self.objects.append(pt) # if user chooses color and size dimensions for data # convert data set that user chose for color dimension to colors with a blue to yellow # gradient, blue being minimum value and yellow representing maximum value # Draw all data points with size 5 elif colorpt != None and sizept == None: self.clearData() colornorm = an.normalize_columns_separately(self.data, [colorpt]) for i in range(colornorm.shape[0]): alpha = colornorm[i, 0] * 255 self.colorlist.append((int(255 - alpha), int(255 - alpha), int(alpha))) for i in range(pts.shape[0]): self.factorlist.append(5) dx = self.factorlist[i] x = pts[i, 0] y = pts[i, 1] self.color = self.colorlist[i] pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx, fill="#%02X%02X%02X" % self.color, outline='') self.objects.append(pt) #If user chooses not to plot size or elif colorpt == None and sizept == None: self.clearData() # del self.factorlist[: for i in range(pts.shape[0]): self.factorlist.append(5) self.colorlist.append('red') self.color = self.colorlist[i] x = pts[i, 0] y = pts[i, 1] dx = self.factorlist[i] pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx, fill=self.color, outline='') self.objects.append(pt) # If there is a cluster object # Plot the data and then cluster the data by assigning each cluster a unique color if len(self.clusterList[-1][1])>0: self.clearData() for i in range(pts.shape[0]): codeVal=self.clusterList[-1][1][i] self.color=self.clustColorList[codeVal] x = pts[i, 0] y = pts[i, 1] dx = self.factorlist[i] pt = self.canvas.create_oval(x-dx, y-dx, x+dx, y+dx, fill=self.color, outline='') self.objects.append(pt)
# print out the types print("\nTypes:") types = dobj.get_types() s = types[0] for type in types[1:]: s += ", " + type print(s) r = analysis.data_range(headers, dobj) print("Data Range:\n ", r) mean = analysis.mean(headers, dobj) print("Mean: \n", mean) std = analysis.stdev(headers, dobj) print("Standard Deviation: \n", std) #std = analysis.stdev(headers, dobj) #print("Standard Deviation: \n", std) nor_m1 = analysis.normalize_columns_separately(headers, dobj) print("Normalized Columns Separately: \n", nor_m1) nor_m2 = analysis.normalize_columns_together(headers, dobj) print("Normalized Columns Together: \n", nor_m2) #dobj.add_colummn('new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]) #print("\nAdd new column: 'new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]") #print("----- New Matrix: -----") #print(dobj.get_whole_matrix()) print("---------------------------------")
def buildPoints(self, headers): print("\n\nheaders for buildPoints:", headers) # delete existting canvas objects used for plotting data for point in self.points: self.canvas.delete(point) self.points = [] if self.pcaplot: self.plot_data = analysis.normalize_columns_separately( headers, self.PCAs[self.PCAlistbox.curselection()[0]]) else: self.plot_data = analysis.normalize_columns_separately( headers, self.data) self.plot = self.plot_data[:, :2] z_flag = False if self.menuFlags[2]: self.plot = np.hstack((self.plot, self.plot_data[:, 2])) z_flag = True else: self.plot = np.hstack((self.plot, np.zeros((len(self.plot), 1)))) size_flag = False if self.menuFlags[3]: size_flag = True if z_flag: self.size = self.plot_data[:, 3] else: self.size = self.plot_data[:, 2] else: self.size = np.ones((len(self.plot), 1)) self.size = 3 * self.size + 1 if self.menuFlags[4]: if z_flag and size_flag: color = self.plot_data[:, 4] elif (z_flag and not size_flag) or (not z_flag and size_flag): color = self.plot_data[:, 3] else: color = self.plot_data[:, 2] self.green = -255 * color + 255 self.red = 255 * color else: self.green = np.ones((len(self.plot), 1)) self.red = np.ones((len(self.plot), 1)) # homogeneous coordinate self.plot = np.hstack((self.plot, np.ones((self.plot.shape[0], 1)))) # make a vtm so the points aren't tiny vtm = self.view.build() # put the points through the vtm pts = (vtm * self.plot.T).T # loop over the points, drawing each one for i in range(len(pts)): x = pts[i, 0] y = pts[i, 1] # Extension 1 gives the user the capability to use different shapes if self.vshape.get() == 'Circle': pt = self.canvas.create_oval( int(x - self.size[i]), int(y - self.size[i]), int(x + self.size[i]), int(y + self.size[i]), fill="#%02x%02x%02x" % (int(self.red[i]), int(self.green[i]), 0), outline='') elif self.vshape.get() == 'Rectangle': pt = self.canvas.create_rectangle( int(x - self.size[i]), int(y - self.size[i]), int(x + self.size[i]), int(y + self.size[i]), fill="#%02x%02x%02x" % (int(self.red[i]), int(self.green[i]), 0), outline='') elif self.vshape.get() == 'Triangle': pt = self.canvas.create_polygon(int(x - self.size[i]), int(y + self.size[i]), int(x + self.size[i]), int(y + self.size[i]), int(x), int(y - self.size[i])) elif self.vshape.get() == 'Pentagon': pt = self.canvas.create_polygon(int(x - self.size[i]), int(y), int(x), int(y + self.size[i]), int(x + self.size[i]), int(y), int(x + self.size[i]), int(y - self.size[i]), int(x - self.size[i]), int(y - self.size[i])) else: pt = self.canvas.create_arc( int(x - self.size[i]), int(y - self.size[i]), int(x + self.size[i]), int(y + self.size[i]), fill="#%02x%02x%02x" % (int(self.red[i]), int(self.green[i]), 0), outline='') # put the point object into self.points self.points.append(pt) return
def build_linear_regression(self, independent_variable, dependent_variable): #initialize the matrix of data we want to do a linear regression on self.plot = analysis.normalize_columns_separately( [independent_variable, dependent_variable], self.data) # self.plot = self.data.limit_columns([independent_variable, dependent_variable]) self.plot = np.hstack((self.plot, np.zeros((self.plot.shape[0], 1)))) self.plot = np.hstack((self.plot, np.ones((self.plot.shape[0], 1)))) #build the view matrix and transform the points vtm = self.view.build() pts = self.plot * vtm #(vtm * self.plot.T).T #initialize self.size so that our movement functions don't break self.size = [] #make a graphical point for each data point for i in range(len(pts)): self.size.append(3) x = pts[i, 0] y = pts[i, 1] if self.vshape.get() == 'Triangle': pt = self.canvas.create_polygon(int(x - self.size[i]), int(y + self.size[i]), int(x + self.size[i]), int(y + self.size[i]), int(x), int(y - self.size[i]), fill='black', outline='') elif self.vshape.get() == 'Pentagon': print("pentagon") pt = self.canvas.create_polygon( (int(x - self.size[i]), int(y), int(x), int(y + self.size[i]), int(x + self.size[i]), int(y), int(x + self.size[i]), int(y - self.size[i]), int(x - self.size[i]), int(y - self.size[i]))) elif self.vshape.get() == 'Circle': pt = self.canvas.create_oval(int(x - self.size[i]), int(y - self.size[i]), int(x + self.size[i]), int(y + self.size[i]), fill='black', outline='') elif self.vshape.get() == 'Rectangle': pt = self.canvas.create_rectangle(int(x - self.size[i]), int(y - self.size[i]), int(x + self.size[i]), int(y + self.size[i]), fill='black', outline='') else: pt = self.canvas.create_arc(int(x - self.size[i]), int(y - self.size[i]), int(x + self.size[i]), int(y + self.size[i]), fill='black', outline='') # pt = self.canvas.create_oval(int(x - 1), int(y - 1), # int(x + 1), int(y + 1), # fill="black", outline='') self.points.append(pt) linres = analysis.single_linear_regression(self.data, independent_variable, dependent_variable) slope = linres[0] intercept = linres[1] rvalue = linres[2] pvalue = linres[3] stderr = linres[4] xmin = linres[5] xmax = linres[6] ymin = linres[7] ymax = linres[8] xends = [0.0, 1.0] yends = [((xmin * slope + intercept) - ymin) / (ymax - ymin), ((xmax * slope + intercept) - ymin) / (ymax - ymin)] self.regression_endpoints = np.matrix([[0.0, 1.0], [yends[0], yends[1]], [0, 0], [1, 1]]) self.line_of_fit = (self.canvas.create_line( self.regression_endpoints[0, 0], self.regression_endpoints[1, 0], self.regression_endpoints[0, 1], self.regression_endpoints[1, 1], fill="red")) self.regression_lines.append(self.line_of_fit) self.fit_label = tk.Label(self.canvas, text="slope: " + str(slope) + "\nIntercept: " + str(intercept) + "\nR-value: " + str(rvalue)) self.fit_label.place(x=self.regression_endpoints[0, 1], y=self.regression_endpoints[1, 1]) self.updateAxes() self.updateFits() self.updatePoints()
def build_3d_linear_regression(self, independent_variables, dependent_variable): self.plot = analysis.normalize_columns_separately([ independent_variables[0], independent_variables[1], dependent_variable ], self.data) # self.plot = self.data.limit_columns([independent_variable, dependent_variable]) self.plot = np.hstack((self.plot, np.ones((self.plot.shape[0], 1)))) # build the view matrix and transform the points vtm = self.view.build() pts = self.plot * vtm # (vtm * self.plot.T).T # initialize self.size so that our movement functions don't break self.size = [] # make a graphical point for each data point for i in range(len(pts)): self.size.append(1) x = pts[i, 0] y = pts[i, 1] pt = self.canvas.create_oval(int(x - 1), int(y - 1), int(x + 1), int(y + 1), fill="black", outline='') self.points.append(pt) linres = analysis.linear_regression(self.data, independent_variables, dependent_variable) slope0 = linres[0] slope1 = linres[1] intercept = linres[2] rvalue = linres[4] xmin = analysis.data_range([independent_variables[0]], self.data)[0][0] xmax = analysis.data_range([independent_variables[0]], self.data)[0][1] ymin = analysis.data_range([independent_variables[1]], self.data)[0][0] ymax = analysis.data_range([independent_variables[1]], self.data)[0][1] zmin = analysis.data_range([dependent_variable], self.data)[0][0] zmax = analysis.data_range([dependent_variable], self.data)[0][1] xends = [0.0, 1.0] yends = [ ((xmin * slope0[0, 0] + intercept[0, 0]) - ymin) / (ymax - ymin), ((xmax * slope0[0, 0] + intercept[0, 0]) - ymin) / (ymax - ymin) ] zends = [ ((xmin * slope1[0, 0] + intercept[0, 0]) - zmin) / (zmax - zmin), ((xmax * slope1[0, 0] + intercept[0, 0]) - zmin) / (zmax - zmin) ] self.regression_endpoints = np.matrix([[0.0, 1.0], [yends[0], yends[1]], [zends[0], zends[1]], [1, 1]]) print("self.regression_endpoints", self.regression_endpoints) self.line_of_fit = (self.canvas.create_line( self.regression_endpoints[0, 0], self.regression_endpoints[1, 0], self.regression_endpoints[0, 1], self.regression_endpoints[1, 1], fill="red")) self.regression_lines.append(self.line_of_fit) self.fit_label = tk.Label(self.canvas, text="slope0: " + str(slope0[0, 0]) + "\nslope1: " + str(slope1[0, 0]) + "\nIntercept: " + str(intercept[0, 0]) + "\nR-value: " + str(rvalue)) self.fit_label.place(x=self.regression_endpoints[0, 1], y=self.regression_endpoints[1, 1]) self.updateAxes() self.updateFits() self.updatePoints()
def buildPoints(self, inputHeaders): '''Takes list of headers, deletes existing objects representing data, builds new set of data points.''' if len(self.PCAObjects) > 0: active = self.pcaBoxA.index(tk.ACTIVE) self.dataObj = self.PCAObjects[active] if len(self.ClusterObjects) > 0: active = self.pcaBoxA.index(tk.ACTIVE) self.dataObj = self.ClusterObjects[active] # clear the canvas for obj in self.objects: self.canvas.delete(obj) self.objects = [] # first two variables are given. consider whether z header has been selected if (inputHeaders[2] != None): self.data2plot = analysis.normalize_columns_separately(inputHeaders[0:3], self.dataObj) else: self.data2plot = analysis.normalize_columns_separately(inputHeaders[0:2], self.dataObj) # consider whether color header has been selected if (inputHeaders[3] == None): for row in range(self.data2plot.shape[0]): self.clist.append((0,0,0)) else: cmatrix = analysis.normalize_columns_separately([inputHeaders[3]], self.dataObj) for i in range(cmatrix.shape[0]): self.clist.append((int(float(cmatrix[i,0]) * 255), 0, int(255 * (1 - float(cmatrix[i, 0]))))) # case: size selected, but color not selected # consider whether size header has been selected if (inputHeaders[4] == None): for row in range(self.data2plot.shape[0]): self.slist.append(3.0) else: smatrix = analysis.normalize_columns_separately([inputHeaders[4]], self.dataObj) for i in range(smatrix.shape[0]): self.slist.append(smatrix[i, 0] * 5) zeros = np.zeros((self.data2plot.shape[0], 1)) ones = np.ones((self.data2plot.shape[0], 1)) if (inputHeaders[2] == None): self.data2plot = np.hstack((self.data2plot, zeros, ones)) else: self.data2plot = np.hstack((self.data2plot, ones)) self.data2plot = self.data2plot.astype(np.float) #build vtm and use it to transform the data vtm = self.view.build() pts = (vtm * self.data2plot.T).T #create the canvas graphics objects for i in range(pts.shape[0]): self.objects.append(self.canvas.create_oval(pts[i, 0] - self.slist[i], pts[i, 1] - self.slist[i], pts[i, 0] + self.slist[i], pts[i, 1] + self.slist[i], fill='#%02x%02x%02x'%self.clist[i])) self.updatePoints()
def buildPoints(self): # clear all data for pt in self.objects: self.canvas.delete(pt) self.objects = [] # reset orientation self.vobj = view.View().clone() self.updateAxes() self.updateLabels() self.raw = self.dobj.getNumCol(self.rawheaders) self.data = analysis.normalize_columns_separately( self.headernames, self.dobj) if len(self.headernames) == 2: # add a column of 0's and homogeneous coordinate self.coords = np.hstack((self.data, self.data.shape[0] * [[0]], self.data.shape[0] * [[1]])) self.y_axis_label.set(self.headernames[1]) self.z_axis_label.set(None) elif len(self.headernames) == 3: # add only homogeneous coordinate self.coords = np.hstack((self.data, self.data.shape[0] * [[1]])) self.y_axis_label.set(self.headernames[1]) self.z_axis_label.set(self.headernames[2]) if self.color_axis != None: # normalize color axis self.colors = analysis.normalize_columns_separately( [self.color_axis], self.dobj) else: # if not specified, use 1 self.colors = np.matrix([[1]] * self.data.shape[0]) if self.size_axis != None: # normalize size axis self.sizes = analysis.normalize_columns_separately( [self.size_axis], self.dobj) else: # if not specified, use 5 self.sizes = np.matrix([[3]] * self.data.shape[0]) # draw points vtm = self.vobj.build() ptcoords = vtm * self.coords.T for i in range(ptcoords.shape[1]): x0 = ptcoords[0, i] - float(self.sizes[i, 0]) y0 = ptcoords[1, i] - float(self.sizes[i, 0]) x1 = ptcoords[0, i] + float(self.sizes[i, 0]) y1 = ptcoords[1, i] + float(self.sizes[i, 0]) alpha = float(self.colors[i, 0]) rgb = (int(alpha * 255), int((1 - alpha) * 255), 0) point = self.canvas.create_oval(x0, y0, x1, y1, fill='#%02x%02x%02x' % rgb, outline='') self.objects.append(point)
def alignPCA(self): if self.PCAanalysis != [] and self.PCAListbox.curselection() != (): self.PCA = self.PCAanalysis[self.PCAListbox.curselection()[0]] if self.PCA is None: print 'you don\'t have any data' return headers = self.PCA.get_headers() + self.data.get_headers() variables = Dialogs.selectPCAData(self.root, headers) if variables.result == []: print "you didn't pick anything" return self.dataheaders = [] if variables.result[0] < len(self.PCA.get_headers()): header = self.PCA.get_headers()[variables.result[0]] self.dataMatrix =analysis.normalize_columns_separately(self.PCA, (header,)) #self.dataMatrix =self.PCA.get_data((header,)) else: header = self.data.get_headers()[variables.result[0]-len(self.PCA.get_headers())] self.dataMatrix = analysis.normalize_columns_separately(self.data, (header,)) self.dataheaders.append(header) for index in variables.result[1:]: if index < len(self.PCA.get_headers()): header = self.PCA.get_headers()[index] self.dataMatrix = np.hstack((self.dataMatrix, analysis.normalize_columns_separately(self.PCA, (header,)))) #self.dataMatrix =np.hstack((self.dataMatrix, self.PCA.get_data((header,)))) else: header = self.data.get_headers()[index-len(self.PCA.get_headers())] self.dataMatrix = np.hstack((self.dataMatrix, analysis.normalize_columns_separately(self.data, (header,)))) print header self.dataheaders.append(header) if len(variables.result) == 2: self.dataMatrix = np.hstack((self.dataMatrix, np.zeros(shape=(len(self.dataMatrix),1)))) #self.dataMatrix = self.PCA.get_data(headers) homogenous_coordinates = np.ones(shape =(len(self.dataMatrix), 1)) self.dataMatrix = np.hstack((self.dataMatrix , homogenous_coordinates)) if variables.resultc is not None: if variables.resultc < len(self.PCA.get_headers()): header = self.PCA.get_headers()[variables.resultc] if header[:7] == 'cluster': self.colorMatrix = self.PCA.get_data((header,)) else: self.colorMatrix =analysis.normalize_columns_separately(self.PCA, (header,)) self.colorResult = header else: header = self.data.get_headers()[variables.resultc-len(self.PCA.get_headers())] self.colorMatrix =analysis.normalize_columns_separately(self.data, (header,)) self.colorResult = header else: self.colorResult = None self.colorMatrix = None if variables.results is not None: if variables.resultc < len(self.PCA.get_headers()): header = self.PCA.get_headers()[variables.results] self.sizeMatrix =analysis.normalize_columns_separately(self.PCA, (header,)) self.sizeResult = header else: header = self.data.get_headers()[variables.results-len(self.PCA.get_headers())] self.sizeMatrix =analysis.normalize_columns_separately(self.data, (header,)) self.sizeResult = header else: self.sizeResult = None self.sizeMatrix = None self.rows = len(self.dataMatrix) self.buildAxes()
def main(): numpy.set_printoptions(suppress=True) print("\n----- Database Info -----") if len(sys.argv) < 2: print('Usage: python %s <csv filename>' % (sys.argv[0])) exit(0) # create a data object, which reads in the data dobj = data.Data(sys.argv[1]) print("\nName: ", dobj.get_filename()) # print out information about the dat print('Number of rows: ', dobj.get_num_points()) print('Number of numeric columns: ', dobj.get_num_dimensions()) # print out the headers print("\nHeaders:") headers = dobj.get_headers() s = headers[0] for header in headers[1:]: s += ", " + header print(s) # print out the headers print("\nNumeric Headers:") nheaders = dobj.get_numericheaders() s = nheaders[0] for header in nheaders[1:]: s += ", " + header print(s) # print out the types print("\nTypes:") types = dobj.get_types() s = types[0] for type in types[1:]: s += ", " + type print(s) r = analysis.data_range(headers, dobj) print("Data Range:\n ", r) mean = analysis.mean(headers, dobj) print("Mean: \n", mean) std = analysis.stdev(headers, dobj) print("Standard Deviation: \n", std) if headers == nheaders: nor_m1 = analysis.normalize_columns_separately(headers, dobj) print("Normalized Columns Separately: \n", nor_m1) if headers == nheaders: nor_m2 = analysis.normalize_columns_together(headers, dobj) print("Normalized Columns Together: \n", nor_m2) s = analysis.sumup(headers, dobj) print("Sum:\n", s) print("Variance:\n", analysis.variance(headers, dobj)) # EXTENSION5 ADD COLUMN dobj.add_colummn('new col', 'numeric', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]) print( "\nAdd new column: 'new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]" ) print("----- New Matrix: -----") m = dobj.get_whole_matrix() print(m) print('Number of rows: ', dobj.get_num_points()) print('Number of numeric columns: ', dobj.get_num_dimensions()) print("---------------------------------") # EXTENSION6 WRITE TO A CSV file a = numpy.asarray(m) with open('foo.csv', 'w') as outputfile: wr = csv.writer(outputfile, delimiter=',') wr.writerow(dobj.get_headers()) wr.writerow(dobj.get_types()) for ls in a: wr.writerow(ls)
def drawClustering(self): if self.clusterData == None: print "Run a cluster analysis" return dialog = DataDialog(self.root, self.clusterData.get_headers(), "Choose columns") if dialog.x == "NaN" or dialog.y=="NaN": print "You must choose columns for the x and y axes" return self.clear() self.reset() if (dialog.z != ""): matrix = analysis.normalize_columns_separately(self.clusterData, [dialog.x, dialog.y, dialog.z]) else: matrix = analysis.normalize_columns_separately(self.clusterData, [dialog.x, dialog.y]) zeros = np.zeros(self.data.get_raw_num_rows()) matrix = np.hstack( (matrix, np.matrix(zeros).T) ) ones = np.ones(self.data.get_raw_num_rows()) self.dataMatrix = np.hstack( (matrix, np.matrix(ones).T) ) # calculate view coordinates vtm = self.view.build() pts = (vtm * self.dataMatrix.T).T if (dialog.size != ''): size = analysis.normalize_columns_separately(self.clusterData, [dialog.size]) self.sizes = size.T.tolist()[0] for i in range(len(self.sizes)): self.sizes[i] = int(math.sqrt(self.sizes[i])*3+1) else: self.sizes = [2]*self.clusterData.get_raw_num_rows() # handle color data if dialog.color != "": self.colorMatrix = self.clusterData.get_data([dialog.color]) if dialog.pre.get() == 1: self.colors = self.preselectColors(self.colorMatrix) else: # normalize column with mean and standard deviation mean = np.mean(self.colorMatrix, axis=0) std = np.std(self.colorMatrix, axis=0) tmp = self.colorMatrix - mean if std == 0: color = np.matrix( np.zeros((self.colorMatrix.shape[0], self.colorMatrix.shape[1]) )) else: color = tmp / np.matrix(std, dtype=float) color = (color+2.5)/5 # create color list self.colors = color.T.tolist()[0] for i in range(self.data.get_raw_num_rows()): if color[i, 0] < 0: color[i, 0] = 0 if color[i, 0] > 1: color[i, 0] = 1 rgb = ( 0, color[i, 0]*255, (1-color[i, 0])*255 ) self.colors[i] = ('#%02x%02x%02x' % rgb) else: self.colors = ['#000000']*self.data.get_raw_num_rows() for i in range(len(pts)): pt = self.canvas.create_oval(pts[i, 0]-self.sizes[i], pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], pts[i, 1]+self.sizes[i], fill=self.colors[i], outline='') self.objects.append(pt)
def main(argv): # test command line arguments if len(argv) < 2: print( 'Usage: python %s <csv filename>' % (argv[0])) exit(0) # create a data object, which reads in the data dobj = Data(argv[1]) # print out information about the dat print('Number of rows: ', dobj.get_num_points() ) print('Number of columns: ', dobj.get_num_dimensions() ) # print out the headers print("\nHeaders:") headers = dobj.get_raw_headers() s = headers[0] for header in headers[1:]: s += "," + header print( s ) # print out the types print("\nTypes:") types = dobj.get_raw_types() s = types[0] for type in types[1:]: s += ", " + type print( s ) # print out a single row print("\nPrinting row index 2:") print( dobj.get_row( 2 ) ) # print out cols c = dobj.getCol([dobj.get_raw_headers()[0],dobj.get_raw_headers()[1]] ) print("\Select the 1st and 2nd col:") print( c ) # print out all of the data print("\n All Data:") headers = dobj.get_raw_headers() for i in range(dobj.get_num_points()): s = str( dobj.get_value( headers[0], i )) for header in headers[1:]: s += str(dobj.get_value( header, i )) print(s) # EXTENSION print("\nAdd a Column") dobj.addColumn("new col", "numeric", [[0],[1],[2]]) print('Number of columns: ', dobj.get_num_dimensions()) print("--- testing manipulations on the 1st and the 3rd NUMERIC columns:---") d = dobj.getCol([dobj.get_num_headers()[0],dobj.get_num_headers()[2]] ) print(d) # test normalized columns print("\n Normalize by columns") sep_norm = analysis.normalize_columns_separately([dobj.get_num_headers()[0],dobj.get_num_headers()[2]], dobj) print(sep_norm) # test normalized matrix print("\n Normalize the whole matrix") tog_norm = analysis.normalize_columns_together([dobj.get_num_headers()[0],dobj.get_num_headers()[2]], dobj) print(tog_norm)
def buildLinearRegression(self, indx, indz, dep, export, filename): if (indz != ''): matrix = analysis.normalize_columns_separately(self.data, [indx, dep, indz]) else: matrix = analysis.normalize_columns_separately(self.data, [indx, dep]) zeros = np.zeros(self.data.get_raw_num_rows()) matrix = np.hstack( (matrix, np.matrix(zeros).T) ) ones = np.ones(self.data.get_raw_num_rows()) self.dataMatrix = np.hstack( (matrix, np.matrix(ones).T) ) # calculate view coordinates vtm = self.view.build() pts = (vtm * self.dataMatrix.T).T # use points with default size and color self.sizes = [2]*self.data.get_raw_num_rows() self.colors = ['#000000']*self.data.get_raw_num_rows() for i in range(len(pts)): pt = self.canvas.create_oval(pts[i, 0]-self.sizes[i], pts[i, 1]-self.sizes[i], pts[i, 0]+self.sizes[i], pts[i, 1]+self.sizes[i], fill=self.colors[i], outline='') self.objects.append(pt) # calculate single variable linear regression if (indz == ''): slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( self.data.get_data([indx, dep])) ranges = analysis.data_range(self.data, [indx, dep]) end1y = ((ranges[0][0]*slope+intercept)-ranges[1][0])/(ranges[1][1]-ranges[1][0]) end2y = ((ranges[0][1]*slope+intercept)-ranges[1][0])/(ranges[1][1]-ranges[1][0]) self.regressionMatrix = np.matrix([ [0.0, end1y, 0.0, 1.0], [1.0, end2y, 0.0, 1.0] ]) eqn = "y = %.3fx + %.3f \nR = %.3f" % (slope, intercept, r_value) data = "p = %.3f \nStandard error = %.3f" % (p_value, std_err) out = eqn + "\n" + data # calculate muliple variable linear regression else: b, sse, r2, t, p = analysis.linear_regression(self.data, [indx, indz], dep) ranges = analysis.data_range(self.data, [indx, indz, dep]) end1y = ranges[0][0]*b[0] + ranges[1][0]*b[1] + b[2] end1y = (end1y - ranges[2][0])/(ranges[2][1] - ranges[2][0]) end2y = ranges[0][1]*b[0] + ranges[1][1]*b[1] + b[2] end2y = (end2y - ranges[2][0])/(ranges[2][1] - ranges[2][0]) self.regressionMatrix = np.matrix([ [0.0, end1y, 0.0, 1.0], [1.0, end2y, 1.0, 1.0] ]) eqn = "y = %.3fx + %.3fz + %.3f \nR^2 = %.3f" % (b[0], b[1], b[2], r2) sse_data = "Sum-squared error = %.3f" % (sse) p_data = "p = [%.3f, %.3f, %.3f]" % (p[0, 0], p[0, 1], p[0, 2]) t_data = "t-statistic = [%.3f, %.3f, %.3f]" % (t[0, 0], t[0, 1], t[0, 2]) out = eqn + "\n" + sse_data + "\n" + p_data + "\n" + t_data # display regression onscreen self.canvas.itemconfig(self.labels[0], text="x") self.canvas.itemconfig(self.labels[1], text="y") self.canvas.itemconfig(self.labels[2], text="z") endpts = (vtm * self.regressionMatrix.T).T l = self.canvas.create_line(endpts[0, 0], endpts[0, 1], endpts[1, 0], endpts[1, 1], fill="red") self.regressionObjects.append(l) regLabel = self.canvas.create_text(endpts[1, 0]+120, endpts[1, 1]+20, text=eqn) self.labels.append(regLabel) title = "Linear regression for " + str(self.fn) # write linear regression function to file if (export == 1): file = open(filename + ".txt", 'w') file.write(title + "\n" + out) file.close()
def buildLinearRegression(self,headers): norm = an.normalize_columns_separately(self.data, headers) zeromatrix = np.zeros(norm.shape[0]) onesmatrix = np.ones(norm.shape[0]) # x and y are automatically first two dimensions xdatahead = headers[0] ydatahead = headers[1] if xdatahead != None and ydatahead != None: dmatrix = np.matrix(norm) nmatrix = np.matrix((zeromatrix, onesmatrix)).T self.dataPointMatrix = np.hstack((dmatrix, nmatrix)) vtm = self.v.build() pts = (vtm * self.dataPointMatrix.T).T for i in range(pts.shape[0]): x = pts[i, 0] y = pts[i, 1] dx = 5 pt = self.canvas.create_oval(x - dx, y - dx, x + dx, y + dx, fill='blue', outline='') self.objects.append(pt) xdata=np.array(self.data.get_data([xdatahead]).T)[0] ydata=np.array(self.data.get_data([ydatahead]).T)[0] slope, intercept, r_value, p_value, slope_std_error = st.linregress(xdata,ydata) predict_y = intercept + slope * xdata pred_error = ydata - predict_y degrees_of_freedom = len(xdata) - 2 r2_value=r_value*r_value residual_std_error = np.sqrt(np.sum(pred_error ** 2) / degrees_of_freedom) rangex =an.data_range(self.data,[xdatahead]) rangey=an.data_range(self.data,[ydatahead]) yend0=((rangex[0,0]*slope+intercept)-rangey[0,0])/(rangey[0,1]-rangey[0,0]) yend1=((rangex[0,1]*slope+intercept)-rangey[0,0])/(rangey[0,1]-rangey[0,0]) print "minx", rangex[0,0] print "maxx", rangex[0,1] print "miny", rangey[0,0] print "maxy", rangey[0,1] linemtrxcol1=np.matrix([[0.0],[yend0],[0.0],[1.0]]) linemtrxcol2=np.matrix([[1.0],[yend1],[0.0],[1.0]]) self.linRegEndpoints=np.hstack((linemtrxcol1,linemtrxcol2)) print "vtm", vtm print "linRegEndpoints", self.linRegEndpoints le=vtm*self.linRegEndpoints print "le", le self.linRegLines.append(self.canvas.create_line(le[0, 0], le[1, 0], le[0, 1], le[1, 1], fill="red", tags="X")) self.statslabel.delete('1.0', tk.END) self.statslabel.insert(tk.END, "Slope: "+str(slope) + " " + "Intercept: " + str(intercept)+ " " + "r^2 value: "+ str(r2_value))
def buildLinearRegression(self, independent, dependent): dx = 5 dy = 5 #task5.1 #Extract Results and Assign them to Variables xvar = independent yvar = dependent #normalize columns separately a = analysis.normalize_columns_separately([xvar], self.data) b = analysis.normalize_columns_separately([yvar], self.data) c = np.hstack((a, b)) #task5.2 #add a third column of zeros to the matrix z1 = np.zeros((self.data.get_num_rows(), 1)) d = np.hstack((c, z1)) #task5.3 #add a fourth column of zeros to the matrix z2 = np.ones((self.data.get_num_rows(), 1)) self.data2matrix = np.hstack((d, z2)) #task5.4 #build the VTM vtm = self.view.build() #multiply it by data points tp = (vtm*self.data2matrix.T).T #build points for i in range(tp.shape[0]): tx = tp[i, 0] ty = tp[i, 1] pt = self.canvas.create_oval(tx - dx, ty - dy, tx + dx, ty + dy, fill="black", outline='') self.objects.append(pt) #task5.5 #calculate linear regression xy = self.data.get_data([xvar,yvar]) #yu = self.data.get_data([yvar]) ###help from Theo S. slope, intercept, r_value, p_value, r2 = sc.linregress(xy) print slope, intercept, r2 #task5.6 #get range xrange = analysis.data_range([xvar], self.data) yrange = analysis.data_range([yvar], self.data) #task5.7 #make endpoints value1 = ((xrange[0][0] * slope + intercept) - yrange[0][0]) / (yrange[0][1] - yrange[0][0]) value2 = ((xrange[0][1] * slope + intercept) - yrange[0][0]) / (yrange[0][1] - yrange[0][0]) print "hi" self.LRendpoints = np.matrix([ [0, value1, 0, 1], [1, value2, 0, 1] ]) #task5.8 #multiply the line endpoints by the vtm, #then make tk obj out of endpoints points = (vtm * self.LRendpoints.T).T self.regLine = self.canvas.create_line(points[0,0], points[0,1], points[1,0], points[1,1], fill= "Red", width = 3) self.LRobjects.append(self.regLine) #task5.9 self.lineLabel = tk.Label(self.canvas, text = "Linear Regression:" + str(slope)) self.lineLabel.place(x=points[1,0], y=points[1,1])
print("Range of Numeric Data") print(analysis.data_range(data, data.get_headers()), "\n") print("IQR of the Numeric Columns") print(analysis.data_iqr(data, data.get_headers()), "\n") print("Mean of the Numeric Columns") print(analysis.data_mean(data, data.get_headers()), "\n") print("Median of the Numeric Columns") print(analysis.data_median(data, data.get_headers()), "\n") print("StDev of the Numeric Columns") print(analysis.data_stdev(data, data.get_headers()), "\n") print("Variance of the Numeric Columns") print(analysis.data_variance(data, data.get_headers()), "\n") print("Normalized Numeric Columns") print(analysis.normalize_columns_separately(data, data.get_headers()), "\n") print("Normalized Numeric Array") print(analysis.normalize_columns_together(data, data.get_headers()), "\n") print("Data with first row added to end") data.add_col("added data", "numeric", data.subset(cols=[0])) data.__str__() print()
def buildPoints(self, headers, color, size, shapes, event=None): selected_headers = headers if selected_headers[0] == None: print "Please Select an X Plot" return if selected_headers[1] == None: print "Please Select a Y Plot" return if selected_headers[2] == None: print "Please Select a Z Plot" return if color[0] == None: print "Please Select a Color" return if size[0] == None: print "Please Select a Size" return if shapes[0] == None: print "Please Select a Shape" return self.totalReset() temp = [] dx = int(size) dy = int(size) a = analysis.normalize_columns_separately(selected_headers, self.data) for i in range(a.shape[0]): x = a[i, 0] y = a[i, 1] if len(selected_headers) == 2: z = 0 self.xaxisLegend.set("X-axis:" + selected_headers[0]) self.yaxisLegend.set("Y-axis:" + selected_headers[1]) elif len: z = a[i, 2] self.xaxisLegend.set("X-axis:" + selected_headers[0]) self.yaxisLegend.set("Y-axis:" + selected_headers[1]) self.zaxisLegend.set("Z-axis:" + selected_headers[2]) self.colorLegend.set("Color:" + color) self.sizeLegend.set("Size:" + size) self.shapeLegend.set("Shape:" + shapes) temp.append([x, y, z, 1]) #Make VTM vtm = self.view.build() #Convert Data to Matrix self.data2matrix = np.matrix(temp) print shapes tp = (vtm*self.data2matrix.T).T for i in range(tp.shape[0]): tx = tp[i, 0] ty = tp[i, 1] tz = tp[i, 2] #Extension 1 Continued... if shapes == "oval": pt = self.canvas.create_oval(tx - dx, ty - dy, tx + dx, ty + dy, fill=color, outline='') self.objects.append(pt) if shapes == "rectangle": pt = self.canvas.create_rectangle(tx - dx, ty - dy, tx + dx, ty + dy, fill=color, outline='') self.objects.append(pt) if shapes == "arc": pt = self.canvas.create_arc(tx - dx, ty - dy, tx + dx, ty + dy, fill=color, outline='') self.objects.append(pt) return
def buildLinearRegression(self): #self.uniqueColors = False if self.gRegressLine is not None: self.canvas.delete(self.gRegressLine) self.canvas.delete(self.glinText) self.gRegressLine = None temp_matrix = analysis.normalize_columns_separately(self.data, self.dataheaders) self.rows = len(temp_matrix) if len(self.dataheaders) == 2: temp_matrix = np.hstack((temp_matrix, np.zeros(shape=(self.rows,1)))) self.dataMatrix = np.hstack((temp_matrix, np.ones(shape=(self.rows,1)))) self.buildAxes() if len(self.dataheaders) == 2: slope, self.intercept, r_value, self.p_value, self.std_err = scipy.stats.linregress(self.data.get_data(self.dataheaders)) self.slope.append(slope) self.r_squared = r_value**2 data_range = analysis.data_range(self.data, self.dataheaders) high = ((data_range[0][0]*self.slope[0] + self.intercept)-data_range[1][1])/(data_range[1][0]-data_range[1][1]) low = ((data_range[0][1]*self.slope[0] + self.intercept)-data_range[1][1])/(data_range[1][0]-data_range[1][1]) #print low,high self.endpoints = np.matrix([[0, low, 0, 1], [1, high, 0, 1]]) vtm = self.view.build() pts = (vtm * self.endpoints.T).T self.gRegressLine = self.canvas.create_line(pts[0,0], pts[0,1], pts[1,0], pts[1,1], fill = "red") linText = ("Slope: %.3f, Intercept: %.3f, R Squared: %.3f"%(slope, self.intercept, r_value**2)) self.glinText = self.canvas.create_text(pts[1,0], pts[1,1], text = linText) else: regressstuffs = analysis.linear_regression(self.data, self.dataheaders[:2], [self.dataheaders[2],]) self.intercept = regressstuffs[0][0] self.slope.append(regressstuffs[0][1]) self.slope.append(regressstuffs[0][2]) self.std_err = regressstuffs[1] self.r_squared = regressstuffs[2] self.p_value = regressstuffs[4] #print intercept data_range = analysis.data_range(self.data, self.dataheaders) highx0 = ((data_range[0][0]*self.slope[0] + self.intercept)-data_range[2][1])/(data_range[2][0]-data_range[2][1]) lowx0 = ((data_range[0][1]*self.slope[0] + self.intercept)-data_range[2][1])/(data_range[2][0]-data_range[2][1]) #print lowx0, highx0 highx1 = ((data_range[1][0]*self.slope[1] + self.intercept)-data_range[2][1])/(data_range[2][0]-data_range[2][1]) lowx1 = ((data_range[1][1]*self.slope[1] + self.intercept)-data_range[2][1])/(data_range[2][0]-data_range[2][1]) #print lowx1,highx1 #x1 goes in the x direction, x2 in y, dep goes in Z self.endpoints = np.matrix([[0, 0, lowx0, 1], [1, 0, highx0, 1], [0, 0, lowx1, 1], [0, 1, highx1, 1]]) vtm = self.view.build() pts = (vtm * self.endpoints.T).T #print pts #self.gRegressLine = self.canvas.create_rectangle(pts[0,0],pts[2,1],pts[1,0],pts[3,1]) self.gRegressLines = [] #I made each line in the plane a different color because I wasn't sure if things were working right so I wanted to be able to differentiate them #I think this should be a 3D visualization of the linear regression, but I might have done something horribly wrong(it seems to work as a plane) for self.gRegressLines.append(self.canvas.create_line(pts[0,0], pts[0,1], pts[1,0], pts[1,1], fill = "red")) self.gRegressLines.append(self.canvas.create_line(pts[2,0], pts[2,1], pts[3,0], pts[3,1], fill = "green")) self.gRegressLines.append(self.canvas.create_line(pts[0,0], pts[0,1], pts[2,0], pts[2,1], fill = "blue")) self.gRegressLines.append(self.canvas.create_line(pts[1,0], pts[1,1], pts[3,0], pts[3,1], fill = "black")) #self.gRegressLine = self.canvas.create_polygon(pts[0,0],pts[0,1], # pts[1,0], pts[1,1], # pts[2,0], pts[2,1], # pts[3,0], pts[3,1], fill = '', outline = "red") linText = ("X0 Slope: %.3f, X1 Slope: %.3f, Intercept: %.3f, R Squared: %.3f"%(self.slope[0], self.slope[1], self.intercept, self.r_squared)) self.glinText = self.canvas.create_text(pts[1,0], pts[1,1], text = linText)