def __init__(self, data, target, itercount=2120, theta=0.25): self.dm, self.dn = shape(data) self.tm, self.tn = shape(target) assert self.dm == self.tm self.weight = np.random.rand(self.dn+1, self.tn) self.data = np.hstack((data, -np.ones((self.dm, 1)))) self.target = target self.itercount = itercount self.theta = theta
def total_part(self, data, feature): m, n = shape(data) featureunique = np.unique(data[:, feature]) resultuniques = np.unique(data[:, self.RESULT]) allsum = 0 for fu in featureunique: partdata = data[data[:, feature] == fu] pdm, _ = shape(partdata) partsum = 0 for u in resultuniques: rum, _ = shape(partdata[partdata[:, self.RESULT] == u]) partsum = partsum - self.entropy(rum / pdm) allsum = allsum - (pdm / m * partsum) return allsum
def infogain(self, data): m, n = shape(data) s = self.total_entropy(data) ds = [s - self.total_part(data, i) for i in range(n - 1)] maxfeature = np.array(ds).argmax() featurevalues = np.unique(data[:, maxfeature]) return maxfeature, featurevalues
def __init__(self, inputdata): self.root = TreeNode() self.inputdata = inputdata self.features = inputdata[0] self.data = inputdata[1:] self.m, self.n = shape(inputdata) self.RESULT = self.n - 1
def convert_target(target): m, n = shape(target) t = np.zeros((n, 3)) t[np.where(target==0), 0] = 1 t[np.where(target==1), 1] = 1 t[np.where(target==2), 2] = 1 return t
def total_entropy(self, data): m, n = shape(data) uniques = np.unique(data[:, self.RESULT]) e = 0 for u in uniques: count = len(data[data[:, self.RESULT] == u]) e += self.entropy(count / m) return e
def __init__(self, data, k=3, itercount=2, eta=0.25): self.m, self.n = shape(data) self.data = data self.k = k self.weight = np.random.rand(self.n, k) self.itercount = itercount self.normalised_data = self.normalise(self.data) self.eta = eta
def __init__(self, data, target, hidden_node=5, itercount=1000, theta=0.5, beta=1, momentum=0.2, mode='logistic'): self.dm, self.dn = shape(data) self.tm, self.tn = shape(target) self.data = np.hstack((data, -np.ones((self.dm, 1)))) self.target = target self.itercount = itercount self.theta = theta self.beta = beta self.momentum = momentum self.mode = mode self.weight1 = (np.random.rand(self.dn+1, hidden_node)-0.5)*2/np.sqrt(self.dn) self.weight2 = (np.random.rand(hidden_node+1, self.tn)-0.5)*2/np.sqrt(hidden_node)
def fwd(self, data): hidden_output = np.dot(data, self.weight1) tmp_output = sigmod(hidden_output, self.beta) self.hidden_output = np.hstack((tmp_output, -np.ones((shape(tmp_output)[0], 1)))) output = np.dot(self.hidden_output, self.weight2) if self.mode == 'linear': return output elif self.mode == 'logistic': return sigmod(output) elif self.mode == 'softmax': return softmax(output) else: return None
def fwd(self, data=None): if data is not None: self.dm, self.dn = shape(data) hidden = np.zeros((self.dm, self.count+1)) for i in range(self.count): hidden[:, i] = np.exp(-np.sum(data - np.ones((1, self.dn)) * self.weight[:, i]**2, axis=1) / (2*self.sigma**2)) if self.normalise: pass output = self.pcn.fwd(hidden[:, :-1]) return output
def train(self): change = range(self.dm) updatew1 = np.zeros((shape(self.weight1))) updatew2 = np.zeros((shape(self.weight2))) for n in range(self.itercount): fwd_output = self.fwd(self.data) error = 0.5 * sum((self.target - fwd_output) ** 2) if n % 200 == 0: print '++++++++++++++++++' print "Iteration: ", n, "\tError: ", error if self.mode == 'linear': delta_o = (self.target - fwd_output) / self.dm elif self.mode == 'logistic': delta_o = (self.target - fwd_output) * fwd_output * (1 - fwd_output) elif self.mode == 'softmax': delta_o = (self.target - fwd_output) / self.dm else: delta_o = 0 delta_h = self.hidden_output * \ (1.0 - self.hidden_output) *\ (np.dot(delta_o, np.transpose(self.weight2))) updaetw1 = self.theta * (np.dot(np.transpose(self.data), delta_h[:,:-1])) + self.momentum * updatew1 updatew2 = self.theta * (np.dot(np.transpose(self.hidden_output), delta_o)) + self.momentum * updatew2 self.weight1 = self.weight1 + updatew1 self.weight2 = self.weight2 + updatew2 np.random.shuffle(change) self.data = self.data[change, :] self.target = self.target[change, :] print 'WEIGHTS ==================' print self.weight1, self.weight2
def assigngroup(cls, data, k=0, centroid=None): m, n = shape(data) category = np.zeros((m, 1)) scoretable = np.zeros((m, k)) if centroid: cls.centroid = centroid if k: cls.k = k cdata = np.hstack((data, category)) for c in range(cls.k): scoretable[:, c] = distance(cdata[:, :-1], cls.centroid[c, :]) cdata[:, -1] = scoretable.argmin(axis=1) return cdata
def __init__(self, data, target, count=5, sigma=0, normalise=False): self.dm, self.dn = shape(data) self.data = data self.target = target self.count = count self.normalise = normalise self.hidden = np.zeros((self.dm, self.count+1)) if sigma == 0: d = (data.max(axis=0) - data.min(axis=0)).max() self.sigma = d / np.sqrt(2*self.count) else: self.sigma = sigma self.pcn = Perceptron(self.hidden[:, :-1], target, itercount=3000) self.weight = np.zeros((self.dn, self.count))
def train(cls, data, k=3, itercount=100): m, n = shape(data) #medoids = data[np.random.choice(m, k)] for numpy1.7 medoids = data[np.random.randint(0, m, k)] old_medoids = None min_score = maxint scoretable = np.zeros((m, k+1)) category = np.zeros((m, 1)) cdata = np.hstack((data, category)) manhatton = partial(distance, mode='manhatton') count = 0 while not np.array_equal(old_medoids, medoids) and\ count<itercount: count += 1 for c in range(k): scoretable[:, c] = manhatton(data, medoids[c]) mi = scoretable[:, :-1].argmin(axis=1) cdata[:, -1] = mi for i in range(m): scoretable[i, -1] = scoretable[i, mi[i]] score = scoretable[:, -1].sum() if min_score > score: min_score = score medoids = data[np.random.randint(0, m, k)] cls.assigneddata = cdata ''' for c in range(k): print '--' print np.where(scoretable[:,-1]==c) print scoretable[np.where(scoretable[:,-1]==c)] medoids = data[np.random.randint(0, m, k)] ''' print cdata print min_score
def __init__(self, x, y, data, itercount=2000, size=0.5, eta_bfinal=0.03,eta_nfinal=0.01,sizefinal=0.05): self.m, self.n = shape(data) self.x, self.y = x, y self.xy = x * y self.itercount = itercount self.size = size self.eta_bfinal = eta_bfinal self.eta_nfinal = eta_nfinal self.sizefinal = sizefinal self.gridmap = np.mgrid[0:1:np.complex(0, x), 0:1:np.complex(0, y)] self.weight = (np.random.rand(self.n, x*Y) - 0.5) * 2 self.grid = np.zeros(self.xy, self.xy) for i in range(self.xy): for j in range(self.xy): self.grid[i, j] = np.sqrt((self.gridmap[0, i] - self.gridmap[0, j])**2 + (self.gridmap[1, i] - self.gridmap[1, j])**2) self.grid[j, i] = self.grid[i, j]
def train(self, data=None, target=None): if data is not None: self.dm, self.dn = shape(data) self.data = np.hstack((data, -np.ones((self.dm, 1)))) if target is not None: self.target = target change = range(self.dm) for n in range(self.itercount): error = 0.5 * sum((self.target - self.fwd()) ** 2) if n % 200 == 0: print '++++++++++++++++++' print "Iteration: ", n, "\tError: ", error self.weight += self.theta * np.dot(np.transpose(self.data), self.target - self.fwd()) np.random.shuffle(change) self.data = self.data[change, :] self.target = self.target[change, :] print 'WEIGHT ===========================' print self.weight
def train(cls, data, k=3, itercount=60): m, n = shape(data) category = np.zeros((m, 1)) cls.centroid = np.array([[5, 3, 5, 1], [4, 3, 1, 1], [5, 3, 1, 0]]) #cls.centroid = get_randomseed(data, (k, n)) cls.k = k ''' u = m / k d = [(u*i, u*(i+1), i) for i in range(k)] for s, e, i in d: category[s:e] = i ''' cdata = np.hstack((data, category)) scoretable = np.zeros((m, k)) old_centroid = None count = 0 while not np.array_equal(old_centroid, cls.centroid) and \ count < itercount: old_centroid = cls.centroid.copy() count += 1 for c in range(cls.k): scoretable[:, c] = distance(data, cls.centroid[c, :]) #check the number of group group = scoretable.argmin(axis=1) if len(np.unique(group)) != cls.k: cls.centroid = get_randomseed(data, (k, n)) continue cdata[:, -1] = group for c in range(k): cls.centroid[c, :] = \ cdata[np.where(cdata[:, -1]==c)].mean(axis=0)[:-1] cls.assigneddata = cdata
def score(ar): m, n = shape(ar) scoretable = np.zeros((m, 3)) groupids = set(ar[:, -1]) #groupids = np.unique(ar[:, -1]) A, B, S = range(3) for i in range(m): item = ar[i, :] # get a(i) subgroup = ar[np.where(ar[:, -1]==item[-1])] scoretable[i, A] = np.mean(distance(subgroup[:, :-1], item[:-1])) # get b(i) scoretable[i, B] = np.min([np.min(distance(ar[np.where(ar[:, -1]==gid)][:, :-1], item[:-1])) for gid in groupids - {item[-1]}]) # get s(i) scoretable[i, S] = (scoretable[i, B] - scoretable[i, A]) /\ max(scoretable[i, B], scoretable[i, A]) #print(scoretable[i, :]) return scoretable[:, S].mean()
def __init__(self, data, eps, minpts): self.m, self.n = shape(data) self.data = data self.markeddata = np.hstack((data, np.zeros((self.m, 2)))) self.eps = eps self.minpts = minpts
def fwd(self, data=None, func=threshold): if data is not None: self.dm, self.dn = shape(data) self.data = np.hstack((data, -np.ones((self.dm, 1)))) return self._fwd(self.data)
def predict(self, test_input): data = np.hstack((test_input, -np.ones((shape(test_input)[0], 1)))) return threshold(self.fwd(data))
def predict(self, test_data, func=threshold): #data = np.hstack((test_data, -np.ones(1))) data = np.hstack((test_data, -np.ones((shape(test_data)[0], 1)))) return self._fwd(data)
def score(self, input_data, target): data = np.hstack((input_data, -np.ones((shape(input_data)[0], 1)))) output = self._fwd(data) m = data.shape[0] s = np.sum([(output[i]==target[i]).all() for i in range(m)]) return float(s) / float(m) * 100.0
def train(cls, data, model, m=5): _, n = shape(data) np.random.randint(0, n, (m, n)) pass
def initialise(self, data, target): self.m, self.n = shape(data) self.data = data self.group = np.unique(target) self.target = target self.gaussians = None
def score(cls, target): m, n = shape(cls.assigneddata) return float(np.sum(cls.assigneddata[:, -1]==target)) / float(m) * 100