def computeCentroid(self): dim = self.samples[0].dimensionality() centroid = sample.Sample('centroid', [0.0] * dim) for e in self.samples: centroid += e centroid /= len(self.samples) return centroid
def genDistribution(xMean=0, xSD=1, yMean=0, ySD=1, n=50, namePrefix='', label = 'a'): samples = [] for s in range(n): x = random.gauss(xMean, xSD) y = random.gauss(yMean, ySD) samples.append(sample.Sample(namePrefix+str(s), [x, y], label)) return samples
def __mul__(self, other): ''' bonus: can you do vector multiplication? this is two vectors element-wise multiplication ''' f = [] for i in range(self.dimensionality()): f.append(self.getFeatures()[i] * other.getFeatures()[i]) return sample.Sample(self.name + '*', f, self.label)
def computeCentroid(self): ''' return an instance of Sample, its features should be the center of all the samples in the cluster ''' # return helper.computeCentroid(self) s = sample.Sample("center", [0, 0]) for spl in self.samples: s += spl s /= len(self.samples) return s
def make_data(n, scale=1): """ A simple y = x curve, with noisy displacement on both both x and y axis; change scale to change the range """ linear_data = [ sample.Sample('', [float(x) / scale, float(x) / scale], '') for x in range(n) ] noise = util.genDistribution(xSD=0.3, ySD=0.3, n=n) data = [linear_data[i] + noise[i] for i in range(n)] return data
def onclick(event): # Creating a new point and finding the k nearest neighbours new = sample.Sample('', [event.xdata, event.ydata], '') knn(new, data, K) # draw the new point data.append(new) pylab.scatter([new.getFeatures()[0]], \ [new.getFeatures()[1]], \ label = new.getLabel(), \ marker = util.make_cmarkers()[LABELS.index(new.getLabel())], \ color = util.make_cmap()[LABELS.index(new.getLabel())]) pylab.draw()
def __sub__(self, other): f = [] for i in range(self.dimensionality()): f.append(self.getFeatures()[i] - other.getFeatures()[i]) return sample.Sample(self.name + '-' + other.name, f)
def vec_div(self, other): f = [] for i in range(self.dimensionality()): f.append(self.getFeatures()[i] / float(other.getFeatures()[i])) return sample.Sample(self.name + '/', f, self.label)
def power(self, x): f = [] for i in range(self.dimensionality()): f.append(self.getFeatures()[i]**(x)) return sample.Sample(self.name + '-power(' + str(x) + ')', f, self.label)
def __mul__(self, other): f = [] for i in range(self.dimensionality()): f.append(self.getFeatures()[i] * other.getFeatures()[i]) return sample.Sample(self.name + '*', f, self.label)