def calc_error(self, data, follow_theta=False): if self._style == LayerTypes.OUTPUT: self._delta = self._nodes - data else: self._delta = np.multiply( data * np.transpose(follow_theta)[:, 1:], np.multiply(sigmoid(self._z_val), 1 - sigmoid(self._z_val)))
def indexed_weighted_logistic_regression(x_train, x_test, y_train, y_test, num_iter=10000, Regulization_lamda=0.0001, lr=0.001, weight_Sigma=0.8): weights = Calculate_logistic_weights(x_train, x_test, weight_Sigma) weights = np.identity(weights.shape[0]) * weights #weights=np.reshape(weights,(weights.shape[0],1)) theta = np.reshape(np.zeros(x_train.shape[1]), (x_train.shape[1], 1)) for i in range(num_iter): z = np.dot(x_train, theta) h = sigmoid(z) h = np.reshape(h, (h.shape[0], 1)) #print(x_train.shape) #print(weights.shape) X = np.matmul(weights, x_train) gradient = -np.dot(X.T, (h - y_train)) + (theta) * Regulization_lamda #gradient = -np.dot(np.matmul(weights,x_train).T, (h - y_train)) + (theta) * Regulization_lamda theta = theta + lr * gradient y_pred = sigmoid(np.dot(x_test.T, theta)) #print(abs(y_test - y_pred),end='#############################\n') return y_pred
def reconstruction_likelihood(net, t_image = 0.250): spikes = np.zeros((len(x), n_outputs)) pi = ut.sigmoid(net._V) likelihoods =[] estimation_net = deepcopy(net) estimation_net._current_time = 0 estimation_net._trace = deque([]) pbar = tqdm(total=len(x) * t_image, unit='Time [s]', position=1, desc="Reconstruction") while estimation_net._current_time < len(x) * t_image: pbar.n = int(estimation_net._current_time * 1000) / 1000 pbar.update(0) z = estimation_net.step(lambda t: x[int(min(t, (len(x)-1) * t_image) / t_image)], update_weights=False) sample = x[int(min(estimation_net._current_time, (len(x)-1) * t_image) / t_image)] pi = ut.sigmoid(np.dot(z.reshape((1, -1)), net._V)) likelihoods.append(np.sum(np.log(sample * pi + (1 - sample) * (1 - pi)), axis=-1)) pbar.close() return np.mean(likelihoods)
def forward_prop(self, inputs): """ Calculate output from given inputs through the neural network """ layers = [inputs] for i in range(len(self.h_layers)+1): # Calculate the input * weights + bias z = np.dot(layers[i], self.weights[i]) + self.biases[i] # Apply activation function out = [] if d.sensor_mode: # Outputs are numbers for j in range(len(z)): o = sigmoid(clamp(-20, 20, z[j])) out.append(o) else: # Output are vectors for j in range(len(z)): sigm_x = sigmoid(clamp(-20, 20, z[j].x)) sigm_y = sigmoid(clamp(-20, 20, z[j].y)) out.append(Vector2(sigm_x, sigm_y)) layers.append(out) # Return the output final_output = layers[len(layers)-1][0] # Last layer only has 1 output neuron return final_output
def logistic_regression(x_train, x_test, y_train, y_test, num_iter=10000, Regulization_lamda=0.0001, lr=0.001): theta = np.reshape(np.zeros(x_train.shape[1]), (x_train.shape[1], 1)) for i in range(num_iter): z = np.dot(x_train, theta) h = sigmoid(z) h = np.reshape(h, (h.shape[0], 1)) gradient = -np.dot(x_train.T, (h - y_train)) + (theta) * Regulization_lamda #gradient = -np.dot(X.T, (h - y)) theta = theta + lr * gradient z = np.dot(x_test, theta) h = sigmoid(z) h = np.reshape(h, (h.shape[0], 1)) acc = 0 for i in range(len(y_test)): if ((h[i] > 0.5 and y_test[i] == 1) or (h[i] < 0.5 and y_test[i] == 0)): acc += 1 acc = acc / y_test.shape[0] #print("simple Accurecy : "+str(acc)) return 100 * acc
def reconstruct_from_input(self, input): output = numpy.dot(input, self.W.T) + self.hbias hidden_possible = sigmoid(output) input_after = numpy.dot(hidden_possible, self.W) + self.vbias input_possible = sigmoid(input_after) assert input.shape == input_possible.shape return input_possible
def contrast_divergence_binomial(self, epoch): dw = numpy.zeros((self.output_size, self.input_size)) # dvb = numpy.zeros(self.input_size) # dhb = numpy.zeros(self.output_size) output = numpy.dot(self.input, self.W.T) + self.hbias hidden_possible = sigmoid(output) hidden_state = numpy_rng.binomial(n=1, p=hidden_possible) dw += self.learning_rate * numpy.dot(hidden_state.T, self.input) # dvb += self.learning_rate * numpy.mean(self.input, axis=0) # dhb += self.learning_rate * numpy.mean(hidden_possible, axis=0) visible_output = numpy.dot(hidden_state, self.W) + self.vbias visible_possible = sigmoid(visible_output) visible_state = numpy_rng.binomial(n=1, p=visible_possible) hidden_output = numpy.dot(visible_state, self.W.T) + self.hbias hidden_possible_after = sigmoid(hidden_output) dw -= self.learning_rate * numpy.dot(hidden_possible_after.T, visible_state) # dvb -= self.learning_rate * numpy.mean(visible_state, axis=0) # dhb -= self.learning_rate * numpy.mean(hidden_possible_after, axis=0) #################### # parameter update #################### self.W += dw / self.data_size
def get_reconstruction_cross_entropy(self): pre_sigmoid_activation_h = numpy.dot(self.input, self.W.T) + self.hbias sigmoid_activation_h = sigmoid(pre_sigmoid_activation_h) pre_sigmoid_activation_v = numpy.dot(sigmoid_activation_h, self.W) + self.vbias sigmoid_activation_v = sigmoid(pre_sigmoid_activation_v) cross_entropy = - numpy.mean( numpy.sum(self.input * numpy.log(sigmoid_activation_v) + (1 - self.input) * numpy.log(1 - sigmoid_activation_v), axis=1)) return cross_entropy
def predict(self, DAG, predictors): if not isinstance(predictors, dict): predictors = util.array2dict(predictors) Z = DAG.get_latent_nodes() Y = DAG.get_response_nodes() X = DAG.get_input_nodes() non_input_nodes = Z.union(Y) non_input_nodes_by_rep = sorted(non_input_nodes, key=DAG.get_reputation, reverse=True) x = next(iter(X)) n = len(predictors[x]) node_values = dict() for node in non_input_nodes_by_rep: if node in DAG.parents: parents = DAG.parents[node] weighted_parent_values = np.zeros((len(parents), n)) for ix, parent in enumerate(parents): if parent in X: parvals = predictors[parent] else: parvals = node_values[parent] w = DAG.get_weight(parent, node) weighted_parent_values[ix, :] += self.boolean_NOT(parvals, w) node_values[node] = self.boolean_MEDIAN(weighted_parent_values) else: node_values[node] = np.zeros((n)) y = next(iter(Y)) parents_y = DAG.parents[y] parent_y_values = np.zeros((len(parents_y), n)) for ix, i in enumerate(parents_y): w = DAG.get_weight(i, y) if i in X: parent_y_values[ix, :] = self.boolean_NOT(predictors[i], w) elif i in Z: parent_y_values[ix, :] = self.boolean_NOT(node_values[i], w) if parents_y == 1: theta = util.sigmoid(2*parent_y_values-1, gain=self.gain) else: theta = util.sigmoid(2*np.mean(parent_y_values, axis=0)-1, gain=self.gain) response = 1*(np.random.rand(n) < theta) return response
def log_likelihood(self, DAG, predictors, response, epsilon=1e-3): if not isinstance(predictors, dict): predictors = util.array2dict(predictors) X = DAG.get_input_nodes() Z = DAG.get_latent_nodes() Y = DAG.get_response_nodes() zvalues = dict() # We assume only 1 response node y = next(iter(Y)) n = len(response) # highest reputation first Z_by_rep = sorted(Z, key=DAG.get_reputation, reverse=True) for z in Z_by_rep: if not z in DAG.parents: zvalues[z] = np.zeros(shape=(n,)) else: parents = DAG.parents[z] parent_values = np.zeros((len(parents), n)) for ix, i in enumerate(parents): w = DAG.get_weight(i, z) if i in X: parent_values[ix, :] = self.boolean_NOT(predictors[i], w) else: parent_values[ix, :] = self.boolean_NOT(zvalues[i], w) parent_values = np.array(parent_values) zvalue = self.boolean_MEDIAN(parent_values) zvalues[z] = zvalue parents_y = DAG.parents[y] parent_y_values = np.zeros((len(parents_y), n)) for ix, i in enumerate(parents_y): w = DAG.get_weight(i, y) if i in X: parent_y_values[ix, :] = self.boolean_NOT(predictors[i], w) elif i in Z: parent_y_values[ix, :] = self.boolean_NOT(zvalues[i], w) if parents_y == 1: theta = util.sigmoid(2*parent_y_values-1, gain=self.gain) else: theta = util.sigmoid(2*np.mean(parent_y_values, axis=0)-1, gain=self.gain) L = np.sum(response*np.log(theta) + (1 - response)*np.log(1 - theta)) return L
def get_accuracy(x, y, theta): length = len(x) correct = 0 for i in range(length): prediction = 1 if sigmoid(x[i].dot(theta)) >= 0.5 else 0 correct = correct + 1 if y[i] == prediction else correct return (correct / length) * 100
def step(self, inputs): inputs = inputs.reshape((-1, 1)) assert len(inputs) == self._n_inputs, "Input length does not match" # u = V * input + b u = np.dot(self._V, inputs) + self._b z = np.zeros((self._n_outputs, 1)) # find out if network is spiking if np.random.uniform(0, 1, 1) < self._delta_t * self._r_net: # p = softmax(u) p_z = np.exp(u) / np.sum(np.exp(u) + 1e-8) # sample from softmax distribution sum_p_z = np.cumsum(p_z) diff = sum_p_z - np.random.uniform(0, 1, 1) > 0 k = np.argmax(diff) z[k] = 1.0 self._b += self._delta_t * self._eta_b * ( self._delta_t * self._r_net * self._m_k - ut.dirac(z - 1)) self._V += self._delta_t * self._eta_v * ut.dirac(z - 1) * ( inputs.T - ut.sigmoid(self._V)) return z
def predict(self, X, Y): m = X.shape[-1] Z = np.dot(self.w, X) + self.b A = sigmoid(Z) self.Y_p = (A > 0.5) correct = (self.Y_p == Y) self.accuracy = np.sum(correct) / m return self.accuracy
def linear_forward_block(a_prev, w, b, activation): h = np.dot(w, a_prev) + b if activation == 'sigmoid': a = sigmoid(h) else: a = tanh(h) cache = {'a_prev': a_prev, 'w': w, 'b': b} return a, cache
def predict(self, x): """ Predicts value for a given datapoint x. variables: t - datapoint for which a value gets predicted by the model """ x = np.insert(x, 0, 1) prob = sigmoid(x.dot(self.theta)) if self.verbose: print(f'Prediction for {x[1:]} is: {100*prob:.1f}%') return prob
def compute_cost(X, Y, w, b, lambd, regularized): m = Y.shape[-1] Z = np.dot(w, X) + b A = sigmoid(Z) cost = - np.sum(Y * np.log(A) + (1-Y) * np.log(1-A)) / m if regularized == 1: cost += lambd * np.linalg.norm(w, 1) / m elif regularized == 2: cost += lambd * np.linalg.norm(w, 2) / m return cost
def forward_prop(self, inputs): layers = [inputs] for i in range(len(self.h_layers) + 1): # Calculate the input * weights + bias z = sum_matrix_float(np.dot(layers[i], self.weights[i]), self.biases[i]) # Apply activation function o = [sigmoid(clamp(-20, 20, z[j])) for j in range(len(z))] layers.append(o) # Return the output final_output = layers[len(layers) - 1][0] return final_output
def reconstruct(net, input, t_image=0.250): estimation_net = deepcopy(net) estimation_net._current_time = 0 estimation_net._trace = deque([]) reconstruction = np.zeros_like(input) while estimation_net._current_time < t_image: z = estimation_net.step(lambda t: input, update_weights=False) reconstruction += z.dot(ut.sigmoid(net._V)) return reconstruction
def costFun(self, theta): """ Returns objective value for measuring fitness of model. variables: theta - current model parameters """ if self.verbose: print("Iter: {} | theta: {}".format(self.iter, theta)) J = 0 m = len(self.y) # Using np.finfo(float).eps to avoid dividing by zero errors/warnings cost = -self.y * np.log( sigmoid(self.X.dot(theta)) + np.finfo(float).eps) - (np.ones([ m, ]) - self.y) * np.log(1 - sigmoid(self.X.dot(theta)) + np.finfo(float).eps) J = 1 / m * sum(cost) self.costHist.append(J) self.theta = theta self.iter += 1 return J
def compute_dd_loss(self, fst, snd, third): x = self.predict_score(fst, snd, "dd") - \ self.predict_score(fst, third, "dd") ranking_loss = -np.log(sigmoid(x)) complexity = 0.0 complexity += self.matrix_reg * np.dot(self.paper_latent_matrix[fst], self.paper_latent_matrix[fst]) complexity += self.matrix_reg * np.dot(self.paper_latent_matrix[snd], self.paper_latent_matrix[snd]) complexity += self.matrix_reg * np.dot(self.paper_latent_matrix[third], self.paper_latent_matrix[third]) return ranking_loss + complexity
def generate_R_PR(self): self.Pr = np.zeros((self.num_user, self.num_item)) self.R = np.zeros((self.num_user, self.num_item)) rel_list = [] exp_list = [] for m in range(self.C): P, Q, c, d, a, b, e, f = self.sess.run([ self.P_list[m], self.Q_list[m], self.c_list[m], self.d_list[m], self.a_list[m], self.b_list[m], self.e_list[m], self.f_list[m] ]) rel = np.matmul(P, Q.T) rel = np.exp(rel) rel /= np.sum(rel, axis=1, keepdims=True) rel *= (self.num_item / 2.) w = utility.sigmoid(np.matmul(Q, a) + b) pop = np.power(w * utility.sigmoid(np.matmul(Q, c) + d) \ + (1 - w) * self.item_pop, utility.sigmoid(np.matmul(Q, e) + f)) exp = np.zeros((self.num_user, self.num_item)) + pop.T user_ids = self.df_list[m]['userId'] item_ids = self.df_list[m]['itemId'] rel_list.append(rel[user_ids, item_ids]) exp_list.append(exp[user_ids, item_ids]) self.R += rel self.Pr += exp self.R /= self.C self.Pr /= self.C for m in range(self.C): user_ids = self.df_list[m]['userId'] item_ids = self.df_list[m]['itemId'] self.R[user_ids, item_ids] = rel_list[m] self.Pr[user_ids, item_ids] = exp_list[m] self.Pr[np.where(self.Pr < 0.01)] = 0.01 self.Pr[np.where(self.Pr > 0.99)] = 0.99
def costFunGrad(self, theta): """ Returns gradient of the objective. variables: theta - current model parameters """ Grad = np.zeros(theta.shape) m = len(self.y) for i in range(len(theta)): Grad[i] = 1 / m * sum( (sigmoid(self.X.dot(theta)) - self.y) * self.X[:, i]) return Grad
def step(self, data_generator_fn, update_weights=True): # sample isi isi = -np.log(np.random.uniform()) / self._r_net new_time = self._current_time + isi # now go back from T + isi - tau to T + isi, calculate input data # calculate the activations # update the weights time_start = max(0, new_time - 2 * self._tau) total_inputs = data_generator_fn(new_time).reshape((-1, 1)) for time in np.arange(time_start, new_time, self._delta_t): inputs = data_generator_fn(time) inputs = inputs.reshape((-1, 1)) inputs *= np.exp(-(new_time - time) / self._tau) assert len(inputs) == self._n_inputs, "Input length does not match" total_inputs += inputs # u = V * input + b u = np.dot(self._V, inputs) + self._b z = np.zeros((self._n_outputs, 1)) # p = softmax(u) p_z = np.exp(u) / np.sum(np.exp(u) + 1e-8) # sample from softmax distribution sum_p_z = np.cumsum(p_z) diff = sum_p_z - np.random.uniform(0, 1, 1) > 0 k = np.argmax(diff) z[k] = 1.0 if update_weights: self._b += self._eta_b * (isi * self._r_net * self._m_k - ut.dirac(z - 1)) self._V += self._eta_v * ut.dirac(z - 1) * (inputs.T - ut.sigmoid(self._V)) self._current_time += isi self._trace.append( (self._current_time, inputs, z, u, self._V, self._b)) if len(self._trace) > self._max_trace_length: self._trace.pop() return z
def compute_output(self): """ Returns the output of this Neuron node, using a sigmoid as the threshold function. returns: number (float or int) """ inputs_, weights_ = np.array([ x.output() for x in self.my_inputs ]), np.array([x.my_value for x in self.my_weights]) # print(inputs_) # print(weights_) out = sigmoid(np.sum(inputs_ * weights_)) return out raise NotImplementedError("Implement me!")
def compute_pp_loss(self, fst, snd, third): """ loss includes ranking loss and model complexity """ x = self.predict_score(fst, snd, "pp") - \ self.predict_score(fst, third, "pp") ranking_loss = -np.log(sigmoid(x)) complexity = 0.0 complexity += self.matrix_reg * np.dot(self.author_latent_matrix[fst], self.author_latent_matrix[fst]) complexity += self.matrix_reg * np.dot(self.author_latent_matrix[snd], self.author_latent_matrix[snd]) complexity += self.matrix_reg * np.dot( self.author_latent_matrix[third], self.author_latent_matrix[third]) return ranking_loss + complexity
def accuracy(self, threshold=0.5): """ Computes the accuracy of the trained model for the training set. variables: threshold - threshold for accepting a value as 1 """ p = np.zeros([ len(self.y), ]) for i in range(len(self.y)): if sigmoid(self.X[i].dot(self.theta)) >= threshold: p[i] = 1 else: p[i] = 0 self.acc = np.mean(p == self.y) * 100
def logistic_propagate(X, Y, w, b, lambd, regularized): m = Y.shape[-1] Z = np.dot(w, X) + b A = sigmoid(Z) cost = compute_cost(X, Y, w, b, lambd, regularized) dw = np.dot((A - Y), X.T) / m db = np.sum((A - Y), axis=1, keepdims=True) / m if regularized == 2: dw += 2 * lambd * w / m elif regularized == 1: dw += lambd * np.sign(w) / m grad = {'dw':dw, 'db':db} return grad, cost
def reconstruction_l2_loss(net, t_image=0.250): estimation_net = deepcopy(net) estimation_net._current_time = 0 estimation_net._trace = deque([]) spikes = np.zeros((len(x), n_outputs)) pbar = tqdm(total=len(x) * t_image, unit='Time [s]', position=1, desc="Reconstruction") while estimation_net._current_time < len(x) * t_image: pbar.n = int(estimation_net._current_time * 1000) / 1000 pbar.update(0) z = estimation_net.step(lambda t: x[int(min(t, (len(x) - 1) * t_image) / t_image)], update_weights=False) spikes[min(len(x)-1, int(estimation_net._current_time / t_image))] += z.flatten() reconstructions = np.dot(spikes, ut.sigmoid(estimation_net._V)) / np.sum(spikes, axis=-1).reshape(-1, 1) difference = np.mean((reconstructions - x) ** 2) return difference
def update_dd_gradient(self, fst, snd, third): x = self.predict_score(fst, snd, "dd") - \ self.predict_score(fst, third, "dd") common_term = sigmoid(x) - 1 grad_fst = common_term * (self.paper_latent_matrix[snd] - \ self.paper_latent_matrix[third]) + \ 2 * self.matrix_reg * self.paper_latent_matrix[fst] self.paper_latent_matrix[fst] = self.paper_latent_matrix[fst] - \ self.alpha * grad_fst grad_snd = common_term * self.paper_latent_matrix[fst] + \ 2 * self.matrix_reg * self.paper_latent_matrix[snd] self.paper_latent_matrix[snd]= self.paper_latent_matrix[snd] - \ self.alpha * grad_snd grad_third = -common_term * self.paper_latent_matrix[fst] + \ 2 * self.matrix_reg * self.paper_latent_matrix[third] self.paper_latent_matrix[third] = self.paper_latent_matrix[third] - \ self.alpha * grad_third
def estimate_likelihood(estimation_duration=10.0): log_likelihoods = deque([]) estimation_net = deepcopy(net) estimation_net._current_time = 0 estimation_net._trace = deque([]) while estimation_net._current_time < estimation_duration: estimation_net.step(lambda t: data_generator[t], update_weights=False) pbar.n = int(net._current_time * 1000) / 1000 pbar.update(0) # log likelihood y = estimation_net._trace[-1][1].reshape((1, -1)) pi = ut.sigmoid(net._V) log_likelihoods.append( np.log(1.0 / n_outputs) + np.log(np.sum(np.prod(y * pi + (1 - y) * (1 - pi), axis=-1)))) return np.mean(log_likelihoods), np.std(log_likelihoods)
def __init__(self, weights): self._fig = plt.figure(figsize=(3.5, 1.16), dpi=300) i = 2 num_weights = len(weights) while i < len(weights): if num_weights % i == 0: break else: i += 1 axes = add_axes_as_grid(self._fig, i, int(num_weights / i), m_xc=0.01, m_yc=0.01) self._weight_shape = weights.shape[1:] self._imshows = [] for i, ax in enumerate(list(axes.flatten())): # disable legends ax.set_yticks([]) ax.set_xticks([]) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['bottom'].set_visible(False) if i >= len(weights): self._imshows.append( ax.imshow(np.zeros(self._weight_shape), vmin=0, vmax=1)) else: self._imshows.append( ax.imshow(ut.sigmoid(weights[i].reshape( self._weight_shape)), vmin=0, vmax=1)) plt.show(block=False) self._fig.canvas.draw() self._fig.canvas.flush_events()
def reconstruction(nets, Xs, t_image=0.250): reconstructions = np.zeros_like(x) w = W // Xs.shape[0] h = H // Xs.shape[0] for n in range(Xs.shape[0]): for m in range(Xs.shape[1]): net = nets[n][m] estimation_net = deepcopy(net) estimation_net._current_time = 0 estimation_net._trace = deque([]) data = Xs[n][m] spikes = np.zeros((len(data), n_outputs)) pbar = tqdm(total=len(data) * t_image, unit='Time [s]', position=1, desc="Reconstruction") while estimation_net._current_time < len(X) * t_image: pbar.n = int(estimation_net._current_time * 1000) / 1000 pbar.update(0) z = estimation_net.step(lambda t: data[int( min(t, (len(x) - 1) * t_image) / t_image)], update_weights=False) spikes[min( len(data) - 1, int(estimation_net._current_time / t_image))] += z.flatten() reconstructions[:, n*h:(n+1)*h, m*w:(m+1)*w] =\ ( np.dot(spikes, ut.sigmoid(estimation_net._V)) / np.sum(spikes, axis=-1).reshape(-1, 1) ).reshape(-1, h, w) return reconstructions
def step(self, data_generator_fn, update_weights=True): # sample isi isi = -np.log(np.random.uniform()) / self._r_net inputs = data_generator_fn(self._current_time + isi) inputs = inputs.reshape((-1, 1)) assert len(inputs) == self._n_inputs, "Input length does not match" # u = V * input + b u = np.dot(self._V, inputs) + self._b z = np.zeros((self._n_outputs, 1)) # p = softmax(u) p_z = np.exp(u) / np.sum(np.exp(u)) # sample from softmax distribution sum_p_z = np.cumsum(p_z) diff = sum_p_z - np.random.uniform(0, 1, 1) > 0 k = np.argmax(diff) z[k] = 1.0 if update_weights: self._b += self._eta_b * (isi * self._r_net * self._m_k - ut.dirac(z - 1)) self._V += self._eta_v * ut.dirac(z - 1) * (inputs.T - ut.sigmoid(self._V)) self._current_time += isi self._trace.append( (self._current_time, inputs, z, u, self._V, self._b)) if len(self._trace) > self._max_trace_length: self._trace.pop() return z
def output(self): output_rgblist = [] # print self.input.shape if self.isRGB: for i_rgb in xrange(3): output_list = [] data_input = self.input[i_rgb] for i in xrange(data_input.shape[0]): if i % 100 == 0: print 'output image:' + str(i), data_input.shape[0] output_row = [] input_vector = data_input[i] # print 'cnn output check!!' # print input_vector.shape # 入力データの1次元ベクトルを2次元に直す input = [] for j in xrange(self.prev_shape[1]): # j 0~51 input.append(input_vector[j*self.prev_shape[0]: (j+1)*self.prev_shape[0]]) input = numpy.array(input) for y in xrange(self.post_shape[1]): for x in xrange(self.post_shape[0]): # x 0~73 74 # y 0~51 46 # input [52,80] input_dot = input[y*self.filter_shift[0]:y*self.filter_shift[0]+self.filter_shape[1], x*self.filter_shift[1]:x*self.filter_shift[1]+self.filter_shape[0]] now_W = self.W # if i_rgb == 0: # now_W = self.WR # if i_rgb == 1: # now_W = self.WG # if i_rgb == 2: # now_W = self.WB output = [a*b for (a, b) in zip(input_dot, now_W)] output = numpy.array(output) output = output.sum() + self.bias output_possible = sigmoid(output) output_row.append(output_possible) output_list.append(output_row) output_rgblist.append(output_list) # return numpy.array(output_list) return numpy.array(output_rgblist) else: for i in xrange(self.input.shape[0]): print 'output image:' + str(i) output_row = [] input_vector = self.input[i] # 入力データの1次元ベクトルを2次元に直す input = [] for j in xrange(self.prev_shape[1]): # j 0~51 input.append(input_vector[j*self.prev_shape[0]: (j+1)*self.prev_shape[0]]) input = numpy.array(input) for y in xrange(self.post_shape[1]): for x in xrange(self.post_shape[0]): # x 0~73 74 # y 0~51 46 # input [52,80] # print input.shape # print x,y # print self.post_shape input_dot = input[y*self.filter_shift[0]:y*self.filter_shift[0]+self.filter_shape[1], x*self.filter_shift[1]:x*self.filter_shift[1]+self.filter_shape[0]] output = [a*b for (a, b) in zip(input_dot, self.W)] output = numpy.array(output) output = output.sum() + self.bias output_possible = sigmoid(output) output_row.append(output_possible) # [Height, Width] を [Width, Height]に変換する???不要? # output_mat = [] # for x in xrange(self.post_shape[0]): # # output_mat_vec = [] # for y in xrange(self.post_shape[1]): # # print x,y # index = y*self.post_shape[1] + x # # print index # output_mat.append(output_row[index]) output_list.append(output_row) # output_list.append(output_mat) return numpy.array(output_list)
def output_hr(self): h_list = [] r_list = [] for i in xrange(self.data_size): # for i in xrange(4): # input = [] # if i is 0: # input = self.input_v[0] # elif i is 1: # input = self.input_v[19] # elif i is 2: # input = self.input_v[39] # elif i is 3: # input = self.input_v[19] input = self.input_v[i] # r = 0 # if i == 0: # r = numpy.dot(input, self.W.T) + self.hbias # else: # r = numpy.dot(input, self.W.T) + self.hbias + numpy.dot(r_list[i-1], self.U.T) if i == 0: r = numpy.dot(input, self.W.T) + self.hbias h = numpy.dot(input, self.W.T) + self.hbias r = (r - numpy.min(r)) / numpy.max(r - numpy.min(r)) * 6 - 3 h = (h - numpy.min(h)) / numpy.max(h - numpy.min(h)) * 6 - 3 # print numpy.max(h) # print numpy.min(h) else: # print 'feature_check!!!!' # print 'r_list[i-1]' # print r_list[i-1] # print 'self.U.T' # print self.U.T # print 'r_list[i-1]' # print r_list[i-1] # print 'self.U.T' # print self.U.T tmp_v = numpy.dot(input, self.W.T) tmp_r = numpy.dot(r_list[i-1], self.U.T) tmp_v = (tmp_v - numpy.min(tmp_v)) / numpy.max(tmp_v - numpy.min(tmp_v)) * 6 - 3 tmp_r = (tmp_r - numpy.min(tmp_r)) / numpy.max(tmp_r - numpy.min(tmp_r)) * 6 - 3 h = tmp_v + tmp_r r = tmp_v + tmp_r # h = tmp_v # r = tmp_v f = open('check_vrh.txt', 'a+') # check_v = numpy.dot(input, self.W.T) # check_r = numpy.dot(r_list[i-1], self.U.T) # check_h = numpy.dot(input, self.W.T) + self.hbias + numpy.dot(r_list[i-1], self.U.T) str_v = '' str_r = '' str_h = '' for i_check in xrange(30): str_v += str(tmp_v[i_check]) + ',' str_r += str(tmp_r[i_check]) + ',' str_h += str(h[i_check]) + ',' f.write(str_v + '\n') f.write(str_r + '\n') f.write(str_h + '\n') f.write('\n') f.close() print numpy.max(h) print numpy.min(h) # r = numpy.dot(input, self.W.T) + self.hbias + numpy.dot(r_list[i-1], self.U.T) # h = numpy.dot(input, self.W.T) + self.hbias + numpy.dot(r_list[i-1], self.U.T) r = sigmoid(r) h = sigmoid(h) h_list.append(h) r_list.append(r) h_list = numpy.array(h_list) r_list = numpy.array(r_list) # print 'output_hr check' # print numpy.max(self.input_v) # print numpy.min(self.input_v) # print numpy.average(self.input_v) # print numpy.max(self.W) # print numpy.min(self.W) # print numpy.average(self.W) # print numpy.max(h_list) # print numpy.min(h_list) # print numpy.average(h_list) return h_list, r_list
def reconstruct_from_output(self, output): input = numpy.dot(output, self.W) + self.vbias input_possible = sigmoid(input) # assert self.input.shape == input_possible.shape return input_possible
def output_from_input(self, input): output = numpy.dot(input, self.W.T) + self.hbias hidden_possible = sigmoid(output) return hidden_possible
def contrast_divergence(self, epoch): v_list = self.input_v h_list = [] r_list = [] d_list = [] v_iteration_list = [] h_iteration_list = [] # t=0,,,Tまで隠れ層の出力hとリカレントの出力rを計算する for i in xrange(self.data_size): input = v_list[i] r = 0 h = 0 if i == 0: r = numpy.dot(input, self.W.T) + self.hbias h = numpy.dot(input, self.W.T) + self.hbias else: r = numpy.dot(input, self.W.T) + self.hbias + numpy.dot(r_list[i-1], self.U.T) h = numpy.dot(input, self.W.T) + self.hbias + numpy.dot(r_list[i-1], self.U.T) r = sigmoid(r) # h = numpy.dot(input, self.W.T) + self.hbias h = sigmoid(h) h_list.append(h) r_list.append(r) # CD iterationはとりあえず1で試す v_iteration = numpy.dot(h, self.W) + self.vbias v_iteration = sigmoid(v_iteration) h_iteration = 0 if i == 0: h_iteration = numpy.dot(v_iteration, self.W.T) + self.hbias else: h_iteration = numpy.dot(v_iteration, self.W.T) + self.hbias + numpy.dot(r_list[i-1], self.U.T) h_iteration = sigmoid(h_iteration) v_iteration_list.append(v_iteration) h_iteration_list.append(h_iteration) # print numpy.array(v_iteration_list).shape # print numpy.array(h_iteration_list).shape v_list = numpy.array(v_list) h_list = numpy.array(h_list) r_list = numpy.array(r_list) d_list = numpy.array(d_list) v_iteration_list = numpy.array(v_iteration_list) h_iteration_list = numpy.array(h_iteration_list) d_reverse_list = [] for i in reversed(xrange(self.data_size)): h_diff = h_list[i] - h_iteration_list[i] d = 0 if i == self.data_size - 1: d = numpy.dot(self.U, h_diff) else: d = numpy.dot(self.U, d_reverse_list[-1] * r_list[i] * (1 - r_list[i]) + h_diff) d_reverse_list.append(d) d_reverse_list = numpy.array(d_reverse_list) d_list = [] for i in xrange(len(d_reverse_list)): d_list.append(d_reverse_list[len(d_reverse_list) - i - 1]) d_list = numpy.array(d_list) # calculate W H delta_H_W = [numpy.dot(h[numpy.newaxis, :].T, v[numpy.newaxis, :]) \ - numpy.dot(ha[numpy.newaxis, :].T, va[numpy.newaxis, :]) \ for v,h,va,ha \ in zip(v_list, h_list, v_iteration_list, h_iteration_list)] delta_H_W = numpy.array(delta_H_W) delta_H_W = numpy.average(delta_H_W, axis=0) # calculate W Q2 # delta_Q2_W = [numpy.dot(d*r*(1-r)[numpy.newaxis, :].T, v[numpy.newaxis, :]) \ # for d,r,v \ # in zip(d_list, r_list, v_list)] _delta_Q2_W = [d*r*(1-r) \ for d,r,v \ in zip(d_list, r_list, v_list)] _delta_Q2_W = numpy.array(_delta_Q2_W) delta_Q2_W = [numpy.dot(dr[numpy.newaxis, :].T, v[numpy.newaxis, :]) \ for dr,v \ in zip(_delta_Q2_W, v_list)] delta_Q2_W = numpy.average(delta_Q2_W, axis=0) # calculate W delta # delta_W = delta_H_W + delta_Q2_W delta_W = delta_H_W # calculate U delta _delta_Q2_U = _delta_Q2_W + (h_list - h_iteration_list) delta_Q2_U = numpy.dot(_delta_Q2_U.T, r_list) # delta_Q2_U = numpy.average(delta_Q2_U, axis=0) # 上の式,Uは(output_size, output_size)なので実行できても意味が通ってるか確認すべき # !!! check d_list # print numpy.max(d_list) # print numpy.min(d_list) # print numpy.average(d_list) delta_U = delta_Q2_U # calculate vbias delta_vbias = v_list - v_iteration_list delta_vbias = numpy.average(delta_vbias, axis=0) # calculate hbias delta_hbias = h_list - h_iteration_list + _delta_Q2_W delta_hbias = numpy.average(delta_hbias, axis=0) #################### # gradient check #################### f = open('gradient_check_U_10000.txt', 'a+') f.write(str(numpy.sum(numpy.fabs(delta_U))) + '\n') print numpy.sum(numpy.fabs(delta_U)) f.close() #################### # parameter update #################### # print 'delta_U' # print delta_U # print 'U' # print self.U self.W += self.lr * delta_W self.U += self.lr * delta_U - 0.01 * self.U
def contrast_divergence_eachdata(self, epoch): # print 'input.shape[0] : '+ str(self.input.shape[0]) total_delta = numpy.zeros((self.output_size, self.input_size)) for i in xrange(self.input.shape[0]): train_input = self.input[i] dw = numpy.zeros((self.output_size, self.input_size)) dvb = numpy.zeros(self.input_size) dhb = numpy.zeros(self.output_size) output = numpy.dot(train_input, self.W.T) + self.hbias hidden_possible = sigmoid(output) # hidden_state = numpy_rng.binomial(n=1, p=hidden_possible) delta_W = [] for j in xrange(hidden_possible.shape[0]): # j: 0~340 delta_W_elem = train_input * hidden_possible[j] delta_W.append(delta_W_elem) delta_W = numpy.array(delta_W) dw += delta_W dvb += numpy.mean(train_input, axis=0) dhb += numpy.mean(hidden_possible, axis=0) visible_output = numpy.dot(hidden_possible, self.W) + self.vbias visible_possible = sigmoid(visible_output) hidden_output = numpy.dot(visible_possible, self.W.T) + self.hbias hidden_possible_after = sigmoid(hidden_output) delta_W = [] for j in xrange(hidden_possible_after.shape[0]): # j: 0~340 delta_W_elem = visible_possible * hidden_possible_after[j] delta_W.append(delta_W_elem) delta_W = numpy.array(delta_W) dw -= delta_W dvb -= numpy.mean(visible_possible, axis=0) dhb -= numpy.mean(hidden_possible_after, axis=0) # data_sizeで割るのは結局必要なのか? # self.W += self.learning_rate * dw total_delta += dw error = numpy.sum(numpy.abs(dw)) print error os.chdir('result/rbm1_train' + str(epoch)) f = open('error.txt', 'a') f.write(str(error) + ',') f.close() os.chdir('../../') # total_delta: -700~700 / 7000 self.W += self.learning_rate * total_delta / self.data_size os.chdir('result/rbm1_train' + str(epoch)) f = open('error.txt', 'a') f.write('\n') f.close() os.chdir('../../')
def predict_sigmoid(self, input): output = numpy.dot(input, self.W.T) + self.b hidden_possible = sigmoid(output) return hidden_possible
def pre_train(self): for ep in xrange(self.epoch): print 'pretrain epoch:' + str(ep+1) loss = 0.0 if self.isRGB: for i_rgb in xrange(3): data_input = self.input[i_rgb] for i in xrange(data_input.shape[0]): input_vector = data_input[i] # 入力データの1次元ベクトルを2次元に直す input = [] for j in xrange(self.prev_shape[1]): # j 0~51 input.append(input_vector[j*self.prev_shape[0]: (j+1)*self.prev_shape[0]]) input = numpy.array(input) for y in xrange(self.post_shape[1]): for x in xrange(self.post_shape[0]): # print input.shape # print x,y # print x+self.filter_shape[0], y+self.filter_shape[1] input_dot = input[y*self.filter_shift[0]:y*self.filter_shift[0]+self.filter_shape[1], x*self.filter_shift[1]:x*self.filter_shift[1]+self.filter_shape[0]] now_W = self.W # if i_rgb == 0: # now_W = self.WR # if i_rgb == 1: # now_W = self.WG # if i_rgb == 2: # now_W = self.WB output = [a*b for (a, b) in zip(input_dot, now_W)] output = numpy.array(output) output = output.sum() + self.bias output_possible = sigmoid(output) # 0,1の2値にする必要があるのかは不明 # output_state = numpy_rng.binomial(n=1, p=output_possible) # print output_state visible = output_possible * now_W visible_possible = sigmoid(visible) hidden_output = [a*b for (a, b) in zip(visible_possible, now_W)] hidden_output = numpy.array(hidden_output) hidden_output = hidden_output.sum() + self.bias hidden_possible = sigmoid(hidden_output) dw = numpy.zeros(self.filter_shape) # print x,y # print input_dot.shape # print visible_possible.shape dw = input_dot*output_possible - visible_possible*hidden_possible loss += numpy.average(dw * dw) # if self.isRGB: # if i_rgb == 0: # self.WR += self.lr * dw # elif i_rgb == 1: # self.WG += self.lr * dw # elif i_rgb == 2: # self.WB += self.lr * dw # else: # self.W += self.lr * dw self.W += self.lr * dw print loss else: for i in xrange(self.input.shape[0]): input_vector = self.input[i] # 入力データの1次元ベクトルを2次元に直す input = [] for j in xrange(self.prev_shape[1]): # j 0~51 input.append(input_vector[j*self.prev_shape[0]: (j+1)*self.prev_shape[0]]) input = numpy.array(input) for y in xrange(self.post_shape[1]): for x in xrange(self.post_shape[0]): # print input.shape # print x,y # print x+self.filter_shape[0], y+self.filter_shape[1] input_dot = input[y*self.filter_shift[0]:y*self.filter_shift[0]+self.filter_shape[1], x*self.filter_shift[1]:x*self.filter_shift[1]+self.filter_shape[0]] output = [a*b for (a, b) in zip(input_dot, self.W)] output = numpy.array(output) output = output.sum() + self.bias output_possible = sigmoid(output) # 0,1の2値にする必要があるのかは不明 # output_state = numpy_rng.binomial(n=1, p=output_possible) # print output_state visible = output_possible * self.W visible_possible = sigmoid(visible) print 'check!!' print visible_possible.shape print self.W hidden_output = [a*b for (a, b) in zip(visible_possible, self.W)] hidden_output = numpy.array(hidden_output) hidden_output = hidden_output.sum() + self.bias hidden_possible = sigmoid(hidden_output) dw = numpy.zeros(self.filter_shape) # print x,y # print input_dot.shape # print visible_possible.shape dw = input_dot*output_possible - visible_possible*hidden_possible self.W += self.lr * dw