class NormLin(AbstractTransformation): """ A normalization layer followed by a linear projection layer. Currently just accepts most defaults for the two layers, but this could be changed in the future if more customization is needed. """ def __init__(self, num_dims, num_factors=2, name="Norm Lin"): self.name = name self.num_dims = num_dims self._norm = Normalization(num_dims) self._proj = Linear(num_dims, num_factors=num_factors) @property def hypers(self): return self._proj.hypers def output_num_dims(self): return self._proj.output_num_dims() def forward_pass(self, inputs): norm_inputs = self._norm.forward_pass(inputs) proj_inputs = self._proj.forward_pass(norm_inputs) return proj_inputs def backward_pass(self, V): JV_proj = self._proj.backward_pass(V) JV_norm = self._norm.backward_pass(JV_proj) return JV_norm
def __init__(self, similarity_function: Dict[str, Any] = None, **kwargs): super(MatrixAttention, self).__init__(**kwargs) self.similarity_function_params = deepcopy(similarity_function) if similarity_function is None: similarity_function = {} similarity_function['name'] = self.name + '_similarity_function' self.similarity_function = Linear(**similarity_function)
class Network(Module): def __init__(self, nb_in, nb_out, nb_hidden): super(Network, self).__init__() self.fc1 = Linear(nb_in, nb_hidden) self.fc2 = Linear(nb_hidden, nb_out) def forward(self, x): x = relu(self.fc1.forward(x)) x = relu(self.fc2.forward(x)) return x
def build_model(): model = Sequential(MSE(), input_size=2) model.add_layer(Linear(2, 25)) model.add_layer(ReLU(25)) model.add_layer(Linear(25, 25)) model.add_layer(ReLU(25)) model.add_layer(Linear(25, 25)) model.add_layer(Tanh(25)) model.add_layer(Linear(25, 2)) return model
def __init__(self) -> None: super().__init__() self.activation = Tanh() #self.layer1 = self.Conv2D((1, 28, 28), (8, 3, 3), 1) #self.layer2 = self.MaxPool() #self.layer3 = self.Conv2D((2, 3, 3), 2) self.layer4 = Linear(784, 16) self.layer5 = Linear(16, 16) self.layer6 = Linear(16, 10)
def run_bigger_example(): x = Tensor([1, 2, 3]) y = Tensor([7, 10]) print(x.shape, y.shape) linear1 = Linear(x.shape[0], x.shape[0], weight_init='ones') linear2 = Linear(x.shape[0], y.shape[0], weight_init='ones') net_2layer = Network([linear1, linear2]) pred_2layer = net_2layer.forward(x) #loss.backward() print("pred_2layer is ") print(pred_2layer) mse = MSE() loss = mse.forward(pred_2layer, y) print("loss for 2 layer net is ") print(loss) # Should be 2*(18-7) = 22 loss_grad = mse.backward() print("loss_grad for 2layer net is ") print(loss_grad) print("Printing params Grad before ") for layer in net_2layer.layers: for par_grad in layer.param_grad(): print(par_grad) print("now setting param grad to zero") net_2layer.zero_grad() print("Printing params Grad after ") for layer in net_2layer.layers: for par_grad in layer.param_grad(): print(par_grad) print("Printing params before backward") for layer in net_2layer.layers: for par in layer.param(): print(par) print("Doing backward pass") net_2layer.backward(loss_grad) print("Printing params after backward") for layer in net_2layer.layers: for par in layer.param(): print(par) print("Printing params Grad") for layer in net_2layer.layers: for par_grad in layer.param_grad(): print(par_grad) print("Doing param update") net_2layer.grad_step(lr=1e-3) print("Printing params after update") for layer in net_2layer.layers: for par in layer.param(): print(par)
def test_linear_weights(): w = Tensor([[2, 4, 8], [16, 32, 69]]) b = Tensor([0, 0, 0]) x = Tensor([3, 9, 27]) print(w.shape, b.shape) l1 = Linear(2, 3) l1.init_weights(1) w, b = l1.param() print(w.shape, b.shape)
def test_neural_net_back_forward(self): n_in, n_out = 3, 2 weights = np.array([[0, -1, 2], [-3, 4, -5]]) bias = np.arange(n_out)[:, np.newaxis] nn = NeuralNet(MeanSquaredError(), 1e-3, layers=[Linear(n_in, 2, weights, bias), ReLU()]) x = np.array([[[0], [1], [2]]]) y = np.array([[[2], [3]]]) assert y.shape[1] == n_out # |0 -1 2| |0| |0| | 3| |0| | 3| |3| # |-3 4 -5| |1| + |1| = |-6| + |1| = |-5| -> |0| # |2| pred = nn.forward(x) assert np.array_equal(pred, [[[3], [0]]]) nn.compute_loss(pred, y) dL_dx = nn.backward() # |0 -1 2| |0 + dx1| | 3 + 0 - dx2 + 2dx3| | 3 + ...| |3 - dx2 + 2dx3| # |-3 4 -5| |1 + dx2| = |-6 - 3dx1 + 4dx2 - 5dx3| = |-5 + ...| -> |0| # |2 + dx3| The second component is ReLU'ed away # MSE loss results in 2( ... ) so dL = -2dx2 + 4dx3, dL/dx = |0, -2, 4| assert np.array_equal(dL_dx, [[[0], [-2], [4]]])
def test_forward(self): n_in, n_out = 3, 2 bias = np.arange(n_out)[:, np.newaxis] weights = np.arange(n_in * n_out).reshape((n_out, n_in)) layer = Linear(n_in, n_out, weights, bias) x = np.array([[[0], [1], [2]]]) # |0 1 2| |0| |0| | 5| |0| | 5| # |3 4 5| |1| + |1| = |14| + |1| = |15| # |2| # breakpoint() assert np.array_equal(layer.forward(x), [[[5], [15]]]) assert np.array_equal(layer.d_out_d_in, weights)
def __init__(self, in_features, n_classes, cutoffs, div_value=4., head_bias=False): super(AdaptiveLogSoftmaxWithLoss, self).__init__() cutoffs = list(cutoffs) if (cutoffs != sorted(cutoffs)) \ or (min(cutoffs) <= 0) \ or (max(cutoffs) >= (n_classes - 1)) \ or (len(set(cutoffs)) != len(cutoffs)) \ or any([int(c) != c for c in cutoffs]): raise ValueError( "cutoffs should be a sequence of unique, positive " "integers sorted in an increasing order, where " "each value is between 1 and n_classes-1") self.in_features = in_features self.n_classes = n_classes self.cutoffs = cutoffs + [n_classes] self.div_value = div_value self.head_bias = head_bias self.shortlist_size = self.cutoffs[0] self.n_clusters = len(self.cutoffs) - 1 self.head_size = self.shortlist_size + self.n_clusters self.head = Linear(self.in_features, self.head_size, bias=self.head_bias) self.tail = ModuleList() for i in range(self.n_clusters): hsz = int(self.in_features // (self.div_value**(i + 1))) osz = self.cutoffs[i + 1] - self.cutoffs[i] projection = Sequential(Linear(self.in_features, hsz, bias=False), Linear(hsz, osz, bias=False)) self.tail.append(projection)
class IdentityRegressor(): def __init__(self, n, classes): self.fc1 = Linear("fc1", n, 256) self.fc2 = Linear("fc2", 256, classes) def forward(self, x): # bs, m, n = int(x.shape[0]), int(x.shape[1]), int(x.shape[2]) # print("x", x) m, n = int(x.shape[1]), int(x.shape[2]) # print(x) # 64*14*4608 x = tf.reshape(x, (-1, n*m)) # print(x) # 64*64512 x = tf.nn.relu(self.fc1.forward(x)) # print(x) # 64*256 x = self.fc2.forward(x) # return x x = tf.nn.softmax(x) # print(x) # 64*10575 return x
def __init__( self, in_features: int, out_features: int, first: str=False, couple: str=False, dropout_p: float=0.0, init_weight: float='kaiming', init_bias: Union[int, float, str]=-1 ): super().__init__() self.first = first self.couple = couple if first: self.W_H = Linear(in_features, out_features, bias=False, activation=None) self.W_T = Linear(in_features, out_features, bias=False, activation=None) if not couple: self.W_C = Linear(in_features, out_features, bias=False, activation=None) self.R_H = Linear(in_features, out_features, bias=True, activation=None) self.R_T = Linear(in_features, out_features, bias=True, activation=None) if not couple: self.R_C = Linear(in_features, out_features, bias=True, activation=None) for child in self.children(): child.reset_parameters(init_weight, init_bias) self.dropout = RNNDropout(dropout_p)
def __init__(self, n_samples, batch_size, n_bits, fwd_scale_factor, bck_scale_factor, loss_scale_factor, in_features, out_features, lr): self.lin_layer = Linear(n_samples=n_samples, batch_size=batch_size, n_bits=n_bits, fwd_scale_factor=fwd_scale_factor, bck_scale_factor=bck_scale_factor, in_features=in_features, out_features=out_features) self.loss_layer = CrossEntropy(n_samples, out_features, batch_size, n_bits, loss_scale_factor) self.lr = lr self.fwd_scale_factor = fwd_scale_factor self.bck_scale_factor = bck_scale_factor self.loss_scale_factor = loss_scale_factor
def build_model(self): self.conv_layers = [] self.linear_layers = [] self.layers = [] # 1x28x28 -> 6x24x24 self.conv_layers += [Conv(1, 6, 5, self.activation)] # 6x24x24 -> 6x12x12 self.conv_layers += [MaxPool_2()] # 6x12x12 -> 16x8x8 self.conv_layers += [Conv(6, 16, 5, self.activation)] # 16x8x8 -> 16x4x4 self.conv_layers += [MaxPool_2()] # 256 -> 120 self.linear_layers += [Linear(16 * 4 * 4, 120, self.activation)] # 120 -> 84 self.linear_layers += [Linear(120, 84, self.activation)] # 84 -> 10 self.linear_layers += [Softmax(84, self.no_of_classes)] self.layers = self.conv_layers + self.linear_layers
class GenModel(): def __init__(self, feature_size): self.f_size = feature_size self.g1 = Linear("g1", self.f_size*3*3, self.f_size*3*3) self.g2 = Linear("g2", self.f_size*2*2, self.f_size*3*3) self.g3 = Linear("g3", self.f_size*1*1, self.f_size*3*3) def forward(self, x, scope): S0 = x conv1 = conv_op(input_op=S0, name="S0"+scope, kh=3, kw=3, n_out=self.f_size, dh=1, dw=1) S1 = tf.nn.relu(conv1) conv2 = conv_op(input_op=S1, name="S1"+scope, kh=3, kw=3, n_out=self.f_size, dh=1, dw=1) S2 = tf.nn.relu(conv2) p1 = extract_patches(S0, 3) # print("p1", p1) # bs*9*4608 p2 = extract_patches(S1, 2) # print("p2", p2) # bs*4*2048 p3 = extract_patches(S2, 1) # print("p3", p3) # bs*1*512 kk1 = tf.nn.relu(self.g1.forward(p1)) # print("kk1", kk1) # bs*9*4608 kk2 = tf.nn.relu(self.g2.forward(p2)) # print("kk2", kk2) # bs*4*4608 kk3 = tf.nn.relu(self.g3.forward(p3)) # print("kk3", kk3) # bs*1*4608 kernels = tf.concat((kk1, kk2, kk3), 1) return kernels
def run_mini_example(): x = Tensor([1, 2, 3]) y = Tensor([7, 10]) print(x.shape, y.shape) linear = Linear(x.shape[0], y.shape[0], weight_init='ones') net = Network([linear]) pred = net.forward(x) #loss.backward() print("Pred is ") print(pred)
class Regressor(): def __init__(self, n): self.n = n self.linear = Linear("linear", n, 1) def forward(self, x): bs, c = x.shape[0], x.shape[1] # print(bs, c) # print(x) # 64*350 pytorch 64*686 # return x x = self.linear.forward(x) # return x # print(x) # 64*1 x = tf.nn.sigmoid(x) # print(x) # 64*1 return x
def linearProcessor(): """ Processor node for linear operation """ inputs, weights, bias = Input(), Input(), Input() f = Linear(inputs, weights, bias) feed_dict = { inputs: [6, 14, 3], weights: [0.5, 0.25, 1.4], bias: 2 } graph = topological_sort(feed_dict) output = forward_pass(f, graph) print(output , "(according to miniflow - linear)")
def make_exprs(inpt, in_to_out, bias, out_transfer, loss, c_rim): exprs = Linear.make_exprs(inpt, in_to_out, bias, out_transfer, loss) output = exprs['output'] marginal = output.mean(axis=0) cond_entropy = misc.discrete_entropy(output, axis=1).mean() entropy = misc.discrete_entropy(marginal) # negative mutual information -> we are minimizing neg_mi = cond_entropy - entropy l2 = (in_to_out**2).sum() exprs['neg_mi'] = neg_mi exprs['l2'] = l2 exprs['loss'] = neg_mi + c_rim * l2 return exprs
def __init__(self, input_size, output_size, hiddens, activations, weight_init_fn, bias_init_fn, criterion, lr, momentum=0.0, num_bn_layers=0): # Don't change this --> self.train_mode = True self.num_bn_layers = num_bn_layers self.bn = num_bn_layers > 0 self.nlayers = len(hiddens) + 1 self.input_size = input_size self.output_size = output_size self.activations = activations self.criterion = criterion self.lr = lr self.momentum = momentum # <--------------------- # Don't change the name of the following class attributes, # the autograder will check against these attributes. But you will need to change # the values in order to initialize them correctly # Initialize and add all your linear layers into the list 'self.linear_layers' # (HINT: self.foo = [ bar(???) for ?? in ? ]) # (HINT: Can you use zip here?) #self.linear_layers = [] #self.linear_layers.append(Linear(input_size, output_size, weight_init_fn, bias_init_fn)) input_size_list = [self.input_size]+hiddens output_size_list = hiddens+[self.output_size] weight_init_list = np.repeat(weight_init_fn,self.nlayers) bias_init_list = np.repeat(bias_init_fn,self.nlayers) para_list = list(zip(input_size_list,output_size_list,weight_init_list,bias_init_list)) self.linear_layers = [Linear(para_list[i][0],para_list[i][1],para_list[i][2],para_list[i][3]) for i in range(self.nlayers)] # If batch norm, add batch norm layers into the list 'self.bn_layers' self.bn_layers = [] if self.bn: #self.bn_layers.append(BatchNorm(input_size, alpha=0.9)) self.bn_layers = [BatchNorm(hiddens[i], alpha=0.9) for i in range(num_bn_layers)]
def test_neural_net_tends_to_correct(self): n_in, n_out = 4, 2 np.random.seed(12) weights = np.random.normal(size=(n_out, n_in)) bias = np.zeros(n_out)[:, np.newaxis] nn = NeuralNet(MeanSquaredError(), 1e-2, layers=[Linear(n_in, 2, weights, bias)]) x = np.array([[[-1], [0.5], [-0.33], [0.75]]]) y = np.array([[[-0.5], [0.2]]]) for _ in range(1000): pred = nn.forward(x) loss = nn.compute_loss(pred, y) nn.backward() assert np.isclose(loss, 0)
def test_neural_net_works_with_batches(self): n_in, n_out = 2, 2 np.random.seed(12) weights = np.random.normal(size=(n_out, n_in)) bias = np.zeros(n_out)[:, np.newaxis] nn = NeuralNet(MeanSquaredError(), 1e-2, layers=[Linear(n_in, 2, weights, bias)]) # batch of 3 x = np.array([[[-1], [0.5]], [[1], [-0.2]], [[-0.33], [0.75]]]) y = x # Why does this take so much longer to converge than the previous one? for _ in range(10000): pred = nn.forward(x) loss = nn.compute_loss(pred, y) nn.backward() assert np.isclose(loss, 0) assert np.all( np.isclose(nn.layers[0].weights, [[1, 0], [0, 1]], atol=1e-3))
def __init__(self, num_dims, num_factors=2, name="Norm Lin"): self.name = name self.num_dims = num_dims self._norm = Normalization(num_dims) self._proj = Linear(num_dims, num_factors=num_factors)
### Generation of the DATA train_input, train_target = generate_disc_set(1000) test_input, test_target = generate_disc_set(1000) #Standarize Data mean, std = train_input.mean(), train_input.std() train_input.sub_(mean).div_(std) test_input.sub_(mean).div_(std) #Convert to Labels so that we can train train_target_hot = conv_to_one_hot(train_target) test_target_hot = conv_to_one_hot(test_target) ### Build the Network hidden_layers = 3 layers = [] linear = Linear(2, 25, bias_init=True) layers.append(linear) layers.append(Relu()) for i in range(hidden_layers - 1): layers.append(Linear(25, 25, bias_init=True)) layers.append(Relu()) layers.append(Tanh()) layers.append(Linear(25, 2, bias_init=True)) model = Sequential(layers) #print model summary print("Model Summary:") print(model) ### Select Parameters to train the model criterion = MSE()
W1, b1 = Input(), Input() W2, b2 = Input(), Input() # Train dataset X_ = np.reshape(np.array([[-1., -2., -3.], [1., 2., 3.]]), (2, 3)) W1_ = np.random.randn(3, 2) b1_ = np.random.randn(2) W2_ = np.random.randn(2, 1) b2_ = np.random.randn(1) y_ = np.reshape(np.array([[1.], [0.]]), (-1, 1)) # Test dataset X_t_ = np.reshape(np.array([-1., -2.01, -2.8]), (1, 3)) y_t_ = np.array([1.]) l1 = Linear(X, W1, b1) s1 = Sigmoid(l1) l2 = Linear(s1, W2, b2) cost = L2(y, l2) feed_dict = {X: X_, y: y_, W1: W1_, b1: b1_, W2: W2_, b2: b2_} hyper_parameters = [W1, b1, W2, b2] graph = Network.topological_sort(feed_dict) epoch = 1000000 for i in xrange(epoch): Network.forward_propagation(graph) Network.backward_propagation(graph) Update.stochastic_gradient_descent(hyper_parameters, learning_rate=1e-4)
def test_Linear(): T = 5 batch_size = 2 douput = 3 dinput = 4 unit = Linear(dinput, douput) W = unit.get_weights() X = np.random.randn(T, dinput, batch_size) acc_Y = unit.forward(X) wrand = np.random.randn(*acc_Y.shape) loss = np.sum(acc_Y * wrand) dY = wrand dX = unit.backward(dY) dW = unit.get_grads() def fwd(): unit.set_weights(W) h = unit.forward(X) return np.sum(h * wrand) delta = 1e-4 error_threshold = 1e-3 all_values = [X, W] backpropagated_gradients = [dX, dW] names = ['X', 'W'] error_count = 0 for v in range(len(names)): values = all_values[v] dvalues = backpropagated_gradients[v] name = names[v] for i in range(values.size): actual = values.flat[i] values.flat[i] = actual + delta loss_minus = fwd() values.flat[i] = actual - delta loss_plus = fwd() values.flat[i] = actual backpropagated_gradient = dvalues.flat[i] numerical_gradient = (loss_minus - loss_plus) / (2 * delta) if numerical_gradient == 0 and backpropagated_gradient == 0: error = 0 elif abs(numerical_gradient) < 1e-7 and abs(backpropagated_gradient) < 1e-7: error = 0 else: error = abs(backpropagated_gradient - numerical_gradient) / abs(numerical_gradient + backpropagated_gradient) if error > error_threshold: print 'FAILURE!!!\n' print '\tparameter: ', name, '\tindex: ', np.unravel_index(i, values.shape) print '\tvalues: ', actual print '\tbackpropagated_gradient: ', backpropagated_gradient print '\tnumerical_gradient', numerical_gradient print '\terror: ', error print '\n\n' error_count += 1 if error_count == 0: print 'Linear Gradient Check Passed' else: print 'Failed for {} parameters'.format(error_count)
def main(): if sys.argv[1] == "LINEAR": model = Linear() num_epochs = 100 elif sys.argv[1] == "CNN": model = CNN() num_epochs = 100 elif sys.argv[1] == "RNN": model = RNN() num_epochs = 100 if len(sys.argv) != 2 or sys.argv[1] not in {"LINEAR", "CNN", "RNN"}: print("USAGE: python main.py <Model Type>") print("<Model Type>: [LINEAR/CNN/RNN]") exit() print("Running preprocessing...") if sys.argv[1] == "RNN": train_inputs, train_labels, test_inputs, test_labels = get_rnn_data( "data/genres.tar") else: train_inputs, train_labels, test_inputs, test_labels = get_data( "data/genres.tar") print("Preprocessing completed.") if sys.argv[1] == "RNN": model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(units=256)) model.add(tf.keras.layers.BatchNormalization()) model.add(tf.keras.layers.LeakyReLU(0.2)) model.add(tf.keras.layers.Reshape((16, 16))) model.add( tf.keras.layers.LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True)) model.add( tf.keras.layers.LSTM(units=32, dropout=0.05, recurrent_dropout=0.35, return_sequences=False)) model.add(tf.keras.layers.Dense(units=10, activation="softmax")) num_epochs = 100 opt = tf.keras.optimizers.Adam(lr=.01) model.compile(optimizer=opt, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) history = model.fit(train_inputs, train_labels, epochs=num_epochs, batch_size=100) model.summary() test_loss, test_acc = model.evaluate(test_inputs, test_labels) print('test acc: ', test_acc) else: for _ in range(num_epochs): train(model, train_inputs, train_labels) print(test(model, test_inputs, test_labels))
def __main__(): alltriple = [] for t in c.triple: if len(t[2]) > 10 or len(t[0])>1 or len(t[1])>1: continue pass for i in t[0]: if i.encode('utf-8') not in model: continue for j in t[1]: if j.encode('utf-8') not in model: continue for k in t[2]: if k.encode('utf-8') not in model: continue if i!=j and j!=k and i!=k: alltriple.append((i,j,k)) # writeln(i.encode('utf-8'),j.encode('utf-8'),k.encode('utf-8') writeln('All triple num: %d'%(len(alltriple))) if len(alltriple)>3000: alltriple = random.sample(alltriple, 3000) # sample = random.sample(alltriple, 1391) # train = sample[:len(sample)/4] # test = sample[len(sample)/4:] sample = alltriple test = sample[:len(sample)/3] train = sample[len(sample)/3:] writeln('Train data num: %d\n Test data num: %d'%(len(train),len(test))) l = Linear() l.kmeans_clusters(train) if affirm('Show Train Data'): makeGraph(l, [ model[x[1].encode('utf-8')]-model[x[2].encode('utf-8')] for x in train], label = l.labels_) plt.show() related,unrelated = deal_test_data(test) if affirm('Show Train and Test Data'): makeGraph(l, [ model[x[1].encode('utf-8')]-model[x[2].encode('utf-8')] for x in train], label = l.labels_) makeGraph(l, [ model[x[1].encode('utf-8')]-model[x[0].encode('utf-8')] for x in related], eachlabel = 'related', col = 'gray') makeGraph(l, [ model[x[1].encode('utf-8')]-model[x[0].encode('utf-8')] for x in unrelated], eachlabel = 'unrelated', col = 'black') plt.show() for i in xrange(l.kcluster): l.wrongdict[i] = 0 l.rightdict[i] = 0 l.rpos[i] = 0 l.uneg[i] = 0 l.upos[i] = 0 l.rneg[i] = 0 l.sumnum[i] = 0 group = [] for i in xrange(l.kcluster): group.append(([],[])) for i in xrange(len(train)): group[l.labels_[i]][0].append( model[train[i][2].encode('utf-8')] ) group[l.labels_[i]][1].append( model[train[i][1].encode('utf-8')] ) for i in xrange(l.kcluster): writeln('Cluster %d'%(i)) phi,cost = l.calcPhiSGD(group[i][0], group[i][1]) l.phis.append( phi ) l.finalcosts.append( cost ) writeln('Detail test start') tmpthresholdrate = 1.2 l.ForTest(related, unrelated, tmpthresholdrate, [(x[2],x[1]) for x in train]) writeln('\n\nDict Detail:') l.printclusterdetail() writeln('\n\nGenetal test start') tmpthresholdrate= 0.1 while tmpthresholdrate< 3: l.ForTest(related, unrelated, tmpthresholdrate, [(x[2],x[1]) for x in train]) l.printclusterdetail() tmpthresholdrate += 0.1 while tmpthresholdrate< 20: l.ForTest(related, unrelated, tmpthresholdrate, [(x[2],x[1]) for x in train]) l.printclusterdetail() tmpthresholdrate+= 1 pass
) # 加epsilon,避免出现除‘0’的错误 validX = (validX - mu) / (std + np.finfo(np.float32).eps) testX = (testX - mu) / (std + np.finfo(np.float32).eps) #%% 可视化 mnist # https://colah.github.io/posts/2014-10-Visualizing-MNIST/ #%% 创建模型 model = NeuralNetwork() # 将神经网络看成由若干完成特定计算的层组成,数据经过这些层完成前馈运算; # 根据求导链式法则的启示,可以利用误差的反向传播计算代价函数对模型参数的偏导(即梯度)。 # 任务1:实现Relu类中的forward和backward方法 # 任务2:实现Softmax类中的forward方法 model.layers.append(Linear(n_feature, 60, lr)) model.layers.append(Relu()) model.layers.append(Linear(60, 10, lr)) model.layers.append(Softmax()) #%% 训练 # stochastic gradient descent batchsize = 100 trainloss = [] validloss = [] snapshot = [] for i in range(n_iter): # 每一轮迭代前,产生一组新的序号(目的在于置乱数据) idxs = np.random.permutation(trainX.shape[0])
#-*- coding:utf-8 -*- # @Time : 2019/10/30 # @Author : Botao Fan from config import DATA_PATH from linear import Linear from data_prepare import data_prep if __name__ == '__main__': train_idx, train_val, train_y, user_dict, item_dict = data_prep( DATA_PATH, 'ua.base') test_idx, test_val, test_y, _, _ = data_prep(DATA_PATH, 'ua.test', user_dict, item_dict) linear = Linear(param_size=len(user_dict) + len(item_dict), epoch=200) linear.fit(train_idx, train_val, train_y, test_idx, test_val, test_y)
class MatrixAttention(Layer): ''' This ``Layer`` takes two matrices as input and returns a matrix of attentions. We compute the similarity between each row in each matrix and return unnormalized similarity scores. We don't worry about zeroing out any masked values, because we propagate a correct mask. By default similarity is computed with a dot product, but you can alternatively use a parameterized similarity function if you wish. This is largely similar to using ``TimeDistributed(Attention)``, except the result is unnormalized, and we return a mask, so you can do a masked normalization with the result. You should use this instead of ``TimeDistributed(Attention)`` if you want to compute multiple normalizations of the attention matrix. Input: - matrix_1: ``(batch_size, num_rows_1, embedding_dim)``, with mask ``(batch_size, num_rows_1)`` - matrix_2: ``(batch_size, num_rows_2, embedding_dim)``, with mask ``(batch_size, num_rows_2)`` Output: - ``(batch_size, num_rows_1, num_rows_2)``, with mask of same shape Parameters ---------- similarity_function_params: Dict[str, Any], default={} These parameters get passed to a similarity function (see :mod:`deep_qa.tensors.similarity_functions` for more info on what's acceptable). The default similarity function with no parameters is a simple dot product. ''' def __init__(self, similarity_function: Dict[str, Any] = None, **kwargs): super(MatrixAttention, self).__init__(**kwargs) self.similarity_function_params = deepcopy(similarity_function) if similarity_function is None: similarity_function = {} similarity_function['name'] = self.name + '_similarity_function' self.similarity_function = Linear(**similarity_function) # self.similarity_function = DotProduct(**similarity_function) def build(self, input_shape): tensor_1_dim = input_shape[0][-1] tensor_2_dim = input_shape[1][-1] self.trainable_weights = self.similarity_function.initialize_weights( tensor_1_dim, tensor_2_dim) super(MatrixAttention, self).build(input_shape) def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument mask_1, mask_2 = mask if mask_1 is None and mask_2 is None: return None if mask_1 is None: mask_1 = K.ones_like(K.sum(inputs[0], axis=-1)) if mask_2 is None: mask_2 = K.ones_like(K.sum(inputs[1], axis=-1)) # Theano can't do batch_dot on ints, so we need to cast to float and then back. mask_1 = K.cast(K.expand_dims(mask_1, axis=2), 'float32') mask_2 = K.cast(K.expand_dims(mask_2, axis=1), 'float32') return K.cast(K.batch_dot(mask_1, mask_2), 'uint8') def compute_output_shape(self, input_shape): return (input_shape[0][0], input_shape[0][1], input_shape[1][1]) def call(self, inputs, mask=None): matrix_1, matrix_2 = inputs num_rows_1 = K.shape(matrix_1)[1] num_rows_2 = K.shape(matrix_2)[1] tile_dims_1 = K.concatenate([[1, 1], [num_rows_2], [1]], 0) tile_dims_2 = K.concatenate([[1], [num_rows_1], [1, 1]], 0) tiled_matrix_1 = K.tile(K.expand_dims(matrix_1, axis=2), tile_dims_1) tiled_matrix_2 = K.tile(K.expand_dims(matrix_2, axis=1), tile_dims_2) return self.similarity_function.compute_similarity( tiled_matrix_1, tiled_matrix_2) def get_config(self): base_config = super(MatrixAttention, self).get_config() config = {'similarity_function': self.similarity_function_params} config.update(base_config) return config
def __main__(): c = CilinE() alltriple = [] for t in c.triple: if len(t[2]) > 10 or len(t[0]) > 1 or len(t[1]) > 1: continue pass for i in t[0]: if i.encode("utf-8") not in model: continue for j in t[1]: if j.encode("utf-8") not in model: continue for k in t[2]: if k.encode("utf-8") not in model: continue if i != j and j != k and i != k: alltriple.append((i, j, k)) # writeln(i.encode('utf-8'),j.encode('utf-8'),k.encode('utf-8') writeln("All triple num: %d" % (len(alltriple))) if len(alltriple) > 3000: alltriple = random.sample(alltriple, 3000) # sample = random.sample(alltriple, 1391) # train = sample[:len(sample)/4] # test = sample[len(sample)/4:] sample = alltriple test = sample[: len(sample) / 3] train = sample[len(sample) / 3 :] writeln("Train data num: %d\n Test data num: %d" % (len(train), len(test))) l = Linear() l.kmeans_clusters(train) if affirm("Show Train Data"): makeGraph([model[x[1].encode("utf-8")] - model[x[2].encode("utf-8")] for x in train], label=labels_) plt.show() related, unrelated = deal_test_data(test) if affirm("Show Train and Test Data"): makeGraph([model[x[1].encode("utf-8")] - model[x[2].encode("utf-8")] for x in train], label=labels_) makeGraph( [model[x[1].encode("utf-8")] - model[x[0].encode("utf-8")] for x in related], eachlabel="related", col="gray", ) makeGraph( [model[x[1].encode("utf-8")] - model[x[0].encode("utf-8")] for x in unrelated], eachlabel="unrelated", col="black", ) plt.show() for i in xrange(kcluster): wrongdict[i] = 0 rightdict[i] = 0 rpos[i] = 0 uneg[i] = 0 upos[i] = 0 rneg[i] = 0 sumnum[i] = 0 group = [] for i in xrange(kcluster): group.append(([], [])) for i in xrange(len(train)): group[labels_[i]][0].append(model[train[i][2].encode("utf-8")]) group[labels_[i]][1].append(model[train[i][1].encode("utf-8")]) for i in xrange(kcluster): writeln("Cluster %d" % (i)) phi, cost = calcPhiSGD(group[i][0], group[i][1]) phis.append(phi) finalcosts.append(cost) writeln("Detail test start") tmpthresholdrate = 1.2 ForTest(related, unrelated, tmpthresholdrate, [(x[2], x[1]) for x in train]) writeln("\n\nDict Detail:") printclusterdetail() writeln("\n\nGenetal test start") tmpthresholdrate = 0.1 while tmpthresholdrate < 3: ForTest(related, unrelated, tmpthresholdrate, [(x[2], x[1]) for x in train]) printclusterdetail() tmpthresholdrate += 0.1 while tmpthresholdrate < 20: ForTest(related, unrelated, tmpthresholdrate, [(x[2], x[1]) for x in train]) printclusterdetail() tmpthresholdrate += 1 pass