Beispiel #1
0
class NormLin(AbstractTransformation):
    """
    A normalization layer followed by a linear projection layer.

    Currently just accepts most defaults for the two layers, but this
    could be changed in the future if more customization is needed.
    """
    def __init__(self, num_dims, num_factors=2, name="Norm Lin"):
        self.name = name
        self.num_dims = num_dims
        self._norm = Normalization(num_dims)
        self._proj = Linear(num_dims, num_factors=num_factors)

    @property
    def hypers(self):
        return self._proj.hypers

    def output_num_dims(self):
        return self._proj.output_num_dims()

    def forward_pass(self, inputs):
        norm_inputs = self._norm.forward_pass(inputs)
        proj_inputs = self._proj.forward_pass(norm_inputs)

        return proj_inputs

    def backward_pass(self, V):
        JV_proj = self._proj.backward_pass(V)
        JV_norm = self._norm.backward_pass(JV_proj)

        return JV_norm
Beispiel #2
0
    def __init__(self, similarity_function: Dict[str, Any] = None, **kwargs):
        super(MatrixAttention, self).__init__(**kwargs)
        self.similarity_function_params = deepcopy(similarity_function)
        if similarity_function is None:
            similarity_function = {}

        similarity_function['name'] = self.name + '_similarity_function'
        self.similarity_function = Linear(**similarity_function)
Beispiel #3
0
class Network(Module):
    def __init__(self, nb_in, nb_out, nb_hidden):
        super(Network, self).__init__()
        self.fc1 = Linear(nb_in, nb_hidden)
        self.fc2 = Linear(nb_hidden, nb_out)

    def forward(self, x):
        x = relu(self.fc1.forward(x))
        x = relu(self.fc2.forward(x))
        return x
Beispiel #4
0
def build_model():
    model = Sequential(MSE(), input_size=2)
    model.add_layer(Linear(2, 25))
    model.add_layer(ReLU(25))
    model.add_layer(Linear(25, 25))
    model.add_layer(ReLU(25))
    model.add_layer(Linear(25, 25))
    model.add_layer(Tanh(25))
    model.add_layer(Linear(25, 2))
    return model
Beispiel #5
0
    def __init__(self) -> None:
        super().__init__()
        self.activation = Tanh()

        #self.layer1 = self.Conv2D((1, 28, 28), (8, 3, 3), 1)
        #self.layer2 = self.MaxPool()
        #self.layer3 = self.Conv2D((2, 3, 3), 2)
        self.layer4 = Linear(784, 16)
        self.layer5 = Linear(16, 16)
        self.layer6 = Linear(16, 10)
Beispiel #6
0
def run_bigger_example():
    x = Tensor([1, 2, 3])
    y = Tensor([7, 10])
    print(x.shape, y.shape)

    linear1 = Linear(x.shape[0], x.shape[0], weight_init='ones')
    linear2 = Linear(x.shape[0], y.shape[0], weight_init='ones')

    net_2layer = Network([linear1, linear2])
    pred_2layer = net_2layer.forward(x)

    #loss.backward()
    print("pred_2layer is ")
    print(pred_2layer)
    mse = MSE()
    loss = mse.forward(pred_2layer, y)
    print("loss for 2 layer net is ")
    print(loss)
    # Should be 2*(18-7) = 22
    loss_grad = mse.backward()
    print("loss_grad for 2layer net is ")
    print(loss_grad)
    print("Printing params Grad before ")
    for layer in net_2layer.layers:
        for par_grad in layer.param_grad():
            print(par_grad)

    print("now setting param grad to zero")
    net_2layer.zero_grad()
    print("Printing params Grad after ")
    for layer in net_2layer.layers:
        for par_grad in layer.param_grad():
            print(par_grad)
    print("Printing params before backward")
    for layer in net_2layer.layers:
        for par in layer.param():
            print(par)
    print("Doing backward pass")
    net_2layer.backward(loss_grad)
    print("Printing params after backward")
    for layer in net_2layer.layers:
        for par in layer.param():
            print(par)
    print("Printing params Grad")
    for layer in net_2layer.layers:
        for par_grad in layer.param_grad():
            print(par_grad)
    print("Doing param update")
    net_2layer.grad_step(lr=1e-3)
    print("Printing params after update")
    for layer in net_2layer.layers:
        for par in layer.param():
            print(par)
Beispiel #7
0
def test_linear_weights():

    w = Tensor([[2, 4, 8], [16, 32, 69]])
    b = Tensor([0, 0, 0])
    x = Tensor([3, 9, 27])
    print(w.shape, b.shape)

    l1 = Linear(2, 3)
    l1.init_weights(1)
    w, b = l1.param()

    print(w.shape, b.shape)
Beispiel #8
0
    def test_neural_net_back_forward(self):
        n_in, n_out = 3, 2

        weights = np.array([[0, -1, 2], [-3, 4, -5]])
        bias = np.arange(n_out)[:, np.newaxis]

        nn = NeuralNet(MeanSquaredError(),
                       1e-3,
                       layers=[Linear(n_in, 2, weights, bias),
                               ReLU()])
        x = np.array([[[0], [1], [2]]])
        y = np.array([[[2], [3]]])
        assert y.shape[1] == n_out
        # |0 -1  2| |0|   |0|   | 3|   |0|   | 3|    |3|
        # |-3 4 -5| |1| + |1| = |-6| + |1| = |-5| -> |0|
        #           |2|

        pred = nn.forward(x)
        assert np.array_equal(pred, [[[3], [0]]])

        nn.compute_loss(pred, y)
        dL_dx = nn.backward()

        # |0 -1  2| |0 + dx1|   | 3 + 0    -  dx2 + 2dx3|   | 3 + ...|    |3 - dx2 + 2dx3|
        # |-3 4 -5| |1 + dx2| = |-6 - 3dx1 + 4dx2 - 5dx3| = |-5 + ...| -> |0|
        #           |2 + dx3| The second component is ReLU'ed away
        # MSE loss results in 2( ... ) so dL = -2dx2 + 4dx3, dL/dx = |0, -2, 4|

        assert np.array_equal(dL_dx, [[[0], [-2], [4]]])
Beispiel #9
0
    def test_forward(self):

        n_in, n_out = 3, 2

        bias = np.arange(n_out)[:, np.newaxis]
        weights = np.arange(n_in * n_out).reshape((n_out, n_in))

        layer = Linear(n_in, n_out, weights, bias)
        x = np.array([[[0], [1], [2]]])
        # |0 1 2| |0|   |0|   | 5|   |0|   | 5|
        # |3 4 5| |1| + |1| = |14| + |1| = |15|
        #         |2|

        # breakpoint()
        assert np.array_equal(layer.forward(x), [[[5], [15]]])
        assert np.array_equal(layer.d_out_d_in, weights)
Beispiel #10
0
    def __init__(self,
                 in_features,
                 n_classes,
                 cutoffs,
                 div_value=4.,
                 head_bias=False):
        super(AdaptiveLogSoftmaxWithLoss, self).__init__()

        cutoffs = list(cutoffs)

        if (cutoffs != sorted(cutoffs)) \
                or (min(cutoffs) <= 0) \
                or (max(cutoffs) >= (n_classes - 1)) \
                or (len(set(cutoffs)) != len(cutoffs)) \
                or any([int(c) != c for c in cutoffs]):

            raise ValueError(
                "cutoffs should be a sequence of unique, positive "
                "integers sorted in an increasing order, where "
                "each value is between 1 and n_classes-1")

        self.in_features = in_features
        self.n_classes = n_classes
        self.cutoffs = cutoffs + [n_classes]
        self.div_value = div_value
        self.head_bias = head_bias

        self.shortlist_size = self.cutoffs[0]
        self.n_clusters = len(self.cutoffs) - 1
        self.head_size = self.shortlist_size + self.n_clusters

        self.head = Linear(self.in_features,
                           self.head_size,
                           bias=self.head_bias)
        self.tail = ModuleList()

        for i in range(self.n_clusters):

            hsz = int(self.in_features // (self.div_value**(i + 1)))
            osz = self.cutoffs[i + 1] - self.cutoffs[i]

            projection = Sequential(Linear(self.in_features, hsz, bias=False),
                                    Linear(hsz, osz, bias=False))

            self.tail.append(projection)
Beispiel #11
0
class IdentityRegressor():
    def __init__(self, n, classes):
        self.fc1 = Linear("fc1", n, 256)
        self.fc2 = Linear("fc2", 256, classes)

    def forward(self, x):
        # bs, m, n = int(x.shape[0]), int(x.shape[1]), int(x.shape[2])
        # print("x", x)
        m, n = int(x.shape[1]), int(x.shape[2])

        # print(x) # 64*14*4608
        x = tf.reshape(x, (-1, n*m))
        # print(x) # 64*64512
        x = tf.nn.relu(self.fc1.forward(x))
        # print(x) # 64*256
        x = self.fc2.forward(x)
        # return x
        x = tf.nn.softmax(x)
        # print(x) # 64*10575
        return x
Beispiel #12
0
 def __init__(
     self,
     in_features: int,
     out_features: int,
     first: str=False,
     couple: str=False,
     dropout_p: float=0.0,
     init_weight: float='kaiming',
     init_bias: Union[int, float, str]=-1
 ):
     super().__init__()
     self.first = first
     self.couple = couple
     if first:
         self.W_H = Linear(in_features, out_features, bias=False, activation=None)
         self.W_T = Linear(in_features, out_features, bias=False, activation=None)
         if not couple:
             self.W_C = Linear(in_features, out_features, bias=False, activation=None)
     self.R_H = Linear(in_features, out_features, bias=True, activation=None)
     self.R_T = Linear(in_features, out_features, bias=True, activation=None)
     if not couple:
         self.R_C = Linear(in_features, out_features, bias=True, activation=None)
     for child in self.children():
         child.reset_parameters(init_weight, init_bias)
     self.dropout = RNNDropout(dropout_p)
Beispiel #13
0
 def __init__(self, 
              n_samples, 
              batch_size, 
              n_bits, 
              fwd_scale_factor,
              bck_scale_factor,
              loss_scale_factor,
              in_features, 
              out_features, 
              lr):
     self.lin_layer = Linear(n_samples=n_samples, 
                        batch_size=batch_size, 
                        n_bits=n_bits,
                        fwd_scale_factor=fwd_scale_factor,
                        bck_scale_factor=bck_scale_factor, 
                        in_features=in_features, 
                        out_features=out_features)
     self.loss_layer = CrossEntropy(n_samples, out_features, batch_size, n_bits, loss_scale_factor)
     self.lr = lr
     self.fwd_scale_factor = fwd_scale_factor
     self.bck_scale_factor = bck_scale_factor
     self.loss_scale_factor = loss_scale_factor
Beispiel #14
0
    def build_model(self):
        self.conv_layers = []
        self.linear_layers = []
        self.layers = []

        # 1x28x28 -> 6x24x24
        self.conv_layers += [Conv(1, 6, 5, self.activation)]
        # 6x24x24 -> 6x12x12
        self.conv_layers += [MaxPool_2()]
        # 6x12x12 -> 16x8x8
        self.conv_layers += [Conv(6, 16, 5, self.activation)]
        # 16x8x8  -> 16x4x4
        self.conv_layers += [MaxPool_2()]

        # 256 -> 120
        self.linear_layers += [Linear(16 * 4 * 4, 120, self.activation)]
        # 120 -> 84
        self.linear_layers += [Linear(120, 84, self.activation)]
        # 84  -> 10
        self.linear_layers += [Softmax(84, self.no_of_classes)]

        self.layers = self.conv_layers + self.linear_layers
Beispiel #15
0
class GenModel():
    def __init__(self, feature_size):
        self.f_size = feature_size

        self.g1 = Linear("g1", self.f_size*3*3, self.f_size*3*3)
        self.g2 = Linear("g2", self.f_size*2*2, self.f_size*3*3)
        self.g3 = Linear("g3", self.f_size*1*1, self.f_size*3*3)

    def forward(self, x, scope):
        S0 = x

        conv1 = conv_op(input_op=S0, name="S0"+scope, kh=3, kw=3, n_out=self.f_size, dh=1, dw=1)
        S1 = tf.nn.relu(conv1)

        conv2 = conv_op(input_op=S1, name="S1"+scope, kh=3, kw=3, n_out=self.f_size, dh=1, dw=1)
        S2 = tf.nn.relu(conv2)

        p1 = extract_patches(S0, 3)
        # print("p1", p1) # bs*9*4608

        p2 = extract_patches(S1, 2)
        # print("p2", p2) # bs*4*2048

        p3 = extract_patches(S2, 1)
        # print("p3", p3) # bs*1*512

        kk1 = tf.nn.relu(self.g1.forward(p1))
        # print("kk1", kk1) # bs*9*4608

        kk2 = tf.nn.relu(self.g2.forward(p2))
        # print("kk2", kk2) # bs*4*4608

        kk3 = tf.nn.relu(self.g3.forward(p3))
        # print("kk3", kk3) # bs*1*4608

        kernels = tf.concat((kk1, kk2, kk3), 1)

        return kernels
Beispiel #16
0
def run_mini_example():

    x = Tensor([1, 2, 3])
    y = Tensor([7, 10])
    print(x.shape, y.shape)

    linear = Linear(x.shape[0], y.shape[0], weight_init='ones')
    net = Network([linear])

    pred = net.forward(x)

    #loss.backward()
    print("Pred is ")
    print(pred)
Beispiel #17
0
class Regressor():
    def __init__(self, n):
        self.n = n
        self.linear = Linear("linear", n, 1)

    def forward(self, x):
        bs, c = x.shape[0], x.shape[1]
        # print(bs, c)
        # print(x) # 64*350 pytorch 64*686
        # return x
        x = self.linear.forward(x)
        # return x
        # print(x) # 64*1
        x = tf.nn.sigmoid(x)
        # print(x) # 64*1
        return x
Beispiel #18
0
def linearProcessor():
    """
    Processor node for linear operation
    """
    inputs, weights, bias = Input(), Input(), Input()

    f = Linear(inputs, weights, bias)

    feed_dict = {
        inputs: [6, 14, 3],
        weights: [0.5, 0.25, 1.4],
        bias: 2
    }

    graph = topological_sort(feed_dict)
    output = forward_pass(f, graph)

    print(output , "(according to miniflow - linear)")
Beispiel #19
0
    def make_exprs(inpt, in_to_out, bias, out_transfer, loss, c_rim):
        exprs = Linear.make_exprs(inpt, in_to_out, bias, out_transfer, loss)
        output = exprs['output']

        marginal = output.mean(axis=0)
        cond_entropy = misc.discrete_entropy(output, axis=1).mean()
        entropy = misc.discrete_entropy(marginal)

        # negative mutual information -> we are minimizing
        neg_mi = cond_entropy - entropy
        l2 = (in_to_out**2).sum()

        exprs['neg_mi'] = neg_mi
        exprs['l2'] = l2

        exprs['loss'] = neg_mi + c_rim * l2

        return exprs
Beispiel #20
0
    def make_exprs(inpt, in_to_out, bias, out_transfer, loss, c_rim):
        exprs = Linear.make_exprs(inpt, in_to_out, bias, out_transfer, loss)
        output = exprs['output']

        marginal = output.mean(axis=0)
        cond_entropy = misc.discrete_entropy(output, axis=1).mean()
        entropy = misc.discrete_entropy(marginal)

        # negative mutual information -> we are minimizing
        neg_mi = cond_entropy - entropy
        l2 = (in_to_out**2).sum()

        exprs['neg_mi'] = neg_mi
        exprs['l2'] = l2

        exprs['loss'] = neg_mi + c_rim * l2

        return exprs
Beispiel #21
0
    def __init__(self, input_size, output_size, hiddens, activations, weight_init_fn,
                 bias_init_fn, criterion, lr, momentum=0.0, num_bn_layers=0):

        # Don't change this -->
        self.train_mode = True
        self.num_bn_layers = num_bn_layers
        self.bn = num_bn_layers > 0
        self.nlayers = len(hiddens) + 1
        self.input_size = input_size
        self.output_size = output_size
        self.activations = activations
        self.criterion = criterion
        self.lr = lr
        self.momentum = momentum
        # <---------------------

        # Don't change the name of the following class attributes,
        # the autograder will check against these attributes. But you will need to change
        # the values in order to initialize them correctly

        # Initialize and add all your linear layers into the list 'self.linear_layers'
        # (HINT: self.foo = [ bar(???) for ?? in ? ])
        # (HINT: Can you use zip here?)
        #self.linear_layers = []
        #self.linear_layers.append(Linear(input_size, output_size, weight_init_fn, bias_init_fn))
        
        input_size_list = [self.input_size]+hiddens
        output_size_list = hiddens+[self.output_size]
        weight_init_list = np.repeat(weight_init_fn,self.nlayers)
        bias_init_list = np.repeat(bias_init_fn,self.nlayers)
        para_list = list(zip(input_size_list,output_size_list,weight_init_list,bias_init_list))
        self.linear_layers = [Linear(para_list[i][0],para_list[i][1],para_list[i][2],para_list[i][3]) for i in range(self.nlayers)]
        
        
        
        
        
    

        # If batch norm, add batch norm layers into the list 'self.bn_layers'
        self.bn_layers = []
        if self.bn:
            #self.bn_layers.append(BatchNorm(input_size, alpha=0.9))
            self.bn_layers = [BatchNorm(hiddens[i], alpha=0.9) for i in range(num_bn_layers)]
Beispiel #22
0
    def test_neural_net_tends_to_correct(self):
        n_in, n_out = 4, 2

        np.random.seed(12)
        weights = np.random.normal(size=(n_out, n_in))
        bias = np.zeros(n_out)[:, np.newaxis]

        nn = NeuralNet(MeanSquaredError(),
                       1e-2,
                       layers=[Linear(n_in, 2, weights, bias)])

        x = np.array([[[-1], [0.5], [-0.33], [0.75]]])
        y = np.array([[[-0.5], [0.2]]])

        for _ in range(1000):
            pred = nn.forward(x)
            loss = nn.compute_loss(pred, y)
            nn.backward()

        assert np.isclose(loss, 0)
Beispiel #23
0
    def test_neural_net_works_with_batches(self):
        n_in, n_out = 2, 2

        np.random.seed(12)
        weights = np.random.normal(size=(n_out, n_in))
        bias = np.zeros(n_out)[:, np.newaxis]

        nn = NeuralNet(MeanSquaredError(),
                       1e-2,
                       layers=[Linear(n_in, 2, weights, bias)])

        # batch of 3
        x = np.array([[[-1], [0.5]], [[1], [-0.2]], [[-0.33], [0.75]]])
        y = x

        # Why does this take so much longer to converge than the previous one?
        for _ in range(10000):
            pred = nn.forward(x)
            loss = nn.compute_loss(pred, y)
            nn.backward()

        assert np.isclose(loss, 0)
        assert np.all(
            np.isclose(nn.layers[0].weights, [[1, 0], [0, 1]], atol=1e-3))
Beispiel #24
0
 def __init__(self, num_dims, num_factors=2, name="Norm Lin"):
     self.name = name
     self.num_dims = num_dims
     self._norm = Normalization(num_dims)
     self._proj = Linear(num_dims, num_factors=num_factors)
### Generation of the DATA
train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)
#Standarize Data
mean, std = train_input.mean(), train_input.std()
train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)
#Convert to Labels so that we can train
train_target_hot = conv_to_one_hot(train_target)
test_target_hot = conv_to_one_hot(test_target)

### Build the Network
hidden_layers = 3

layers = []
linear = Linear(2, 25, bias_init=True)
layers.append(linear)
layers.append(Relu())
for i in range(hidden_layers - 1):
    layers.append(Linear(25, 25, bias_init=True))
    layers.append(Relu())
layers.append(Tanh())
layers.append(Linear(25, 2, bias_init=True))
model = Sequential(layers)

#print model summary
print("Model Summary:")
print(model)

### Select Parameters to train the model
criterion = MSE()
Beispiel #26
0
W1, b1 = Input(), Input()
W2, b2 = Input(), Input()

# Train dataset
X_ = np.reshape(np.array([[-1., -2., -3.], [1., 2., 3.]]), (2, 3))
W1_ = np.random.randn(3, 2)
b1_ = np.random.randn(2)
W2_ = np.random.randn(2, 1)
b2_ = np.random.randn(1)
y_ = np.reshape(np.array([[1.], [0.]]), (-1, 1))

# Test dataset
X_t_ = np.reshape(np.array([-1., -2.01, -2.8]), (1, 3))
y_t_ = np.array([1.])

l1 = Linear(X, W1, b1)
s1 = Sigmoid(l1)
l2 = Linear(s1, W2, b2)
cost = L2(y, l2)

feed_dict = {X: X_, y: y_, W1: W1_, b1: b1_, W2: W2_, b2: b2_}
hyper_parameters = [W1, b1, W2, b2]

graph = Network.topological_sort(feed_dict)

epoch = 1000000
for i in xrange(epoch):
    Network.forward_propagation(graph)
    Network.backward_propagation(graph)
    Update.stochastic_gradient_descent(hyper_parameters, learning_rate=1e-4)
Beispiel #27
0
def test_Linear():
	T = 5
	batch_size = 2
	douput = 3
	dinput = 4
	
	unit = Linear(dinput, douput)

	W = unit.get_weights()

	X = np.random.randn(T, dinput, batch_size)

	acc_Y = unit.forward(X)
	wrand = np.random.randn(*acc_Y.shape)
	loss = np.sum(acc_Y * wrand)
	dY = wrand
	dX = unit.backward(dY)
	dW = unit.get_grads()
	
	def fwd():
		unit.set_weights(W)
		h = unit.forward(X)
		return np.sum(h * wrand)

	delta = 1e-4
	error_threshold = 1e-3
	all_values = [X, W]
	backpropagated_gradients = [dX, dW]
	names = ['X', 'W']

	error_count = 0
	for v in range(len(names)):
		values = all_values[v]
		dvalues = backpropagated_gradients[v]
		name = names[v]
		
		for i in range(values.size):
			actual = values.flat[i]
			values.flat[i] = actual + delta
			loss_minus = fwd()
			values.flat[i] = actual - delta
			loss_plus = fwd()
			values.flat[i] = actual
			backpropagated_gradient = dvalues.flat[i]
			numerical_gradient = (loss_minus - loss_plus) / (2 * delta)
			

			if numerical_gradient == 0 and backpropagated_gradient == 0:
				error = 0 
			elif abs(numerical_gradient) < 1e-7 and abs(backpropagated_gradient) < 1e-7:
				error = 0 
			else:
				error = abs(backpropagated_gradient - numerical_gradient) / abs(numerical_gradient + backpropagated_gradient)
			
			if error > error_threshold:
				print 'FAILURE!!!\n'
				print '\tparameter: ', name, '\tindex: ', np.unravel_index(i, values.shape)
				print '\tvalues: ', actual
				print '\tbackpropagated_gradient: ', backpropagated_gradient 
				print '\tnumerical_gradient', numerical_gradient 
				print '\terror: ', error
				print '\n\n'

				error_count += 1

	if error_count == 0:
		print 'Linear Gradient Check Passed'
	else:
		print 'Failed for {} parameters'.format(error_count)
def main():
    if sys.argv[1] == "LINEAR":
        model = Linear()
        num_epochs = 100
    elif sys.argv[1] == "CNN":
        model = CNN()
        num_epochs = 100
    elif sys.argv[1] == "RNN":
        model = RNN()
        num_epochs = 100
    if len(sys.argv) != 2 or sys.argv[1] not in {"LINEAR", "CNN", "RNN"}:
        print("USAGE: python main.py <Model Type>")
        print("<Model Type>: [LINEAR/CNN/RNN]")
        exit()

    print("Running preprocessing...")
    if sys.argv[1] == "RNN":
        train_inputs, train_labels, test_inputs, test_labels = get_rnn_data(
            "data/genres.tar")
    else:
        train_inputs, train_labels, test_inputs, test_labels = get_data(
            "data/genres.tar")
    print("Preprocessing completed.")

    if sys.argv[1] == "RNN":
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(units=256))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LeakyReLU(0.2))
        model.add(tf.keras.layers.Reshape((16, 16)))
        model.add(
            tf.keras.layers.LSTM(units=128,
                                 dropout=0.05,
                                 recurrent_dropout=0.35,
                                 return_sequences=True))
        model.add(
            tf.keras.layers.LSTM(units=32,
                                 dropout=0.05,
                                 recurrent_dropout=0.35,
                                 return_sequences=False))
        model.add(tf.keras.layers.Dense(units=10, activation="softmax"))

        num_epochs = 100
        opt = tf.keras.optimizers.Adam(lr=.01)
        model.compile(optimizer=opt,
                      loss="sparse_categorical_crossentropy",
                      metrics=["accuracy"])
        history = model.fit(train_inputs,
                            train_labels,
                            epochs=num_epochs,
                            batch_size=100)
        model.summary()

        test_loss, test_acc = model.evaluate(test_inputs, test_labels)
        print('test acc: ', test_acc)
    else:
        for _ in range(num_epochs):
            train(model, train_inputs, train_labels)
        print(test(model, test_inputs, test_labels))
Beispiel #29
0
def __main__():
	alltriple = []

	for t in c.triple:
		if len(t[2]) > 10 or len(t[0])>1 or len(t[1])>1:
			continue
			pass
		for i in t[0]:
			if i.encode('utf-8') not in model:
				continue
			for j in t[1]:
				if j.encode('utf-8') not in model:
					continue
				for k in t[2]:
					if k.encode('utf-8') not in model:
						continue
					if i!=j and j!=k and i!=k:
						alltriple.append((i,j,k))
						# writeln(i.encode('utf-8'),j.encode('utf-8'),k.encode('utf-8')

	writeln('All triple num: %d'%(len(alltriple)))
	if len(alltriple)>3000:
		alltriple = random.sample(alltriple, 3000)

	# sample = random.sample(alltriple, 1391)
	# train = sample[:len(sample)/4]
	# test = sample[len(sample)/4:]


	sample = alltriple
	test = sample[:len(sample)/3]
	train = sample[len(sample)/3:]
	writeln('Train data num: %d\n Test data num: %d'%(len(train),len(test)))


	l = Linear()


	l.kmeans_clusters(train)
	if affirm('Show Train Data'):
		makeGraph(l, [ model[x[1].encode('utf-8')]-model[x[2].encode('utf-8')] for x in train], label = l.labels_)
		plt.show()

	related,unrelated = deal_test_data(test)

	if affirm('Show Train and Test Data'):
		makeGraph(l, [ model[x[1].encode('utf-8')]-model[x[2].encode('utf-8')] for x in train], label = l.labels_)
		makeGraph(l, [ model[x[1].encode('utf-8')]-model[x[0].encode('utf-8')] for x in related], eachlabel = 'related', col = 'gray')
		makeGraph(l, [ model[x[1].encode('utf-8')]-model[x[0].encode('utf-8')] for x in unrelated], eachlabel = 'unrelated', col = 'black')
		plt.show()


	for i in xrange(l.kcluster):
		l.wrongdict[i] = 0
		l.rightdict[i] = 0
		l.rpos[i] = 0
		l.uneg[i] = 0
		l.upos[i] = 0
		l.rneg[i] = 0
		l.sumnum[i] = 0


	group = []
	for i in xrange(l.kcluster):
		group.append(([],[]))

	for i in xrange(len(train)):
		group[l.labels_[i]][0].append( model[train[i][2].encode('utf-8')] )
		group[l.labels_[i]][1].append( model[train[i][1].encode('utf-8')] )

	for i in xrange(l.kcluster):
		writeln('Cluster %d'%(i))
		phi,cost = l.calcPhiSGD(group[i][0], group[i][1])
		l.phis.append( phi )
		l.finalcosts.append( cost )




	writeln('Detail test start')
	tmpthresholdrate = 1.2
	l.ForTest(related, unrelated, tmpthresholdrate, [(x[2],x[1]) for x in train])
	writeln('\n\nDict Detail:')
	l.printclusterdetail()

	writeln('\n\nGenetal test start')
	tmpthresholdrate= 0.1
	while tmpthresholdrate< 3:
		l.ForTest(related, unrelated, tmpthresholdrate, [(x[2],x[1]) for x in train])
		l.printclusterdetail()
		tmpthresholdrate += 0.1

	while tmpthresholdrate< 20:
		l.ForTest(related, unrelated, tmpthresholdrate, [(x[2],x[1]) for x in train])
		l.printclusterdetail()
		tmpthresholdrate+= 1
	pass
Beispiel #30
0
                          )  # 加epsilon,避免出现除‘0’的错误
validX = (validX - mu) / (std + np.finfo(np.float32).eps)
testX = (testX - mu) / (std + np.finfo(np.float32).eps)

#%% 可视化 mnist
# https://colah.github.io/posts/2014-10-Visualizing-MNIST/

#%%  创建模型
model = NeuralNetwork()
# 将神经网络看成由若干完成特定计算的层组成,数据经过这些层完成前馈运算;
# 根据求导链式法则的启示,可以利用误差的反向传播计算代价函数对模型参数的偏导(即梯度)。

# 任务1:实现Relu类中的forward和backward方法
# 任务2:实现Softmax类中的forward方法

model.layers.append(Linear(n_feature, 60, lr))
model.layers.append(Relu())
model.layers.append(Linear(60, 10, lr))
model.layers.append(Softmax())

#%% 训练
# stochastic gradient descent
batchsize = 100
trainloss = []
validloss = []
snapshot = []

for i in range(n_iter):
    # 每一轮迭代前,产生一组新的序号(目的在于置乱数据)
    idxs = np.random.permutation(trainX.shape[0])
Beispiel #31
0
#-*- coding:utf-8 -*-
# @Time : 2019/10/30
# @Author : Botao Fan

from config import DATA_PATH
from linear import Linear
from data_prepare import data_prep

if __name__ == '__main__':
    train_idx, train_val, train_y, user_dict, item_dict = data_prep(
        DATA_PATH, 'ua.base')
    test_idx, test_val, test_y, _, _ = data_prep(DATA_PATH, 'ua.test',
                                                 user_dict, item_dict)
    linear = Linear(param_size=len(user_dict) + len(item_dict), epoch=200)
    linear.fit(train_idx, train_val, train_y, test_idx, test_val, test_y)
Beispiel #32
0
class MatrixAttention(Layer):
    '''
    This ``Layer`` takes two matrices as input and returns a matrix of attentions.
    We compute the similarity between each row in each matrix and return unnormalized similarity
    scores.  We don't worry about zeroing out any masked values, because we propagate a correct
    mask.
    By default similarity is computed with a dot product, but you can alternatively use a
    parameterized similarity function if you wish.
    This is largely similar to using ``TimeDistributed(Attention)``, except the result is
    unnormalized, and we return a mask, so you can do a masked normalization with the result.  You
    should use this instead of ``TimeDistributed(Attention)`` if you want to compute multiple
    normalizations of the attention matrix.
    Input:
        - matrix_1: ``(batch_size, num_rows_1, embedding_dim)``, with mask
          ``(batch_size, num_rows_1)``
        - matrix_2: ``(batch_size, num_rows_2, embedding_dim)``, with mask
          ``(batch_size, num_rows_2)``
    Output:
        - ``(batch_size, num_rows_1, num_rows_2)``, with mask of same shape
    Parameters
    ----------
    similarity_function_params: Dict[str, Any], default={}
        These parameters get passed to a similarity function (see
        :mod:`deep_qa.tensors.similarity_functions` for more info on what's acceptable).  The
        default similarity function with no parameters is a simple dot product.
    '''
    def __init__(self, similarity_function: Dict[str, Any] = None, **kwargs):
        super(MatrixAttention, self).__init__(**kwargs)
        self.similarity_function_params = deepcopy(similarity_function)
        if similarity_function is None:
            similarity_function = {}

        similarity_function['name'] = self.name + '_similarity_function'
        self.similarity_function = Linear(**similarity_function)
        # self.similarity_function = DotProduct(**similarity_function)

    def build(self, input_shape):
        tensor_1_dim = input_shape[0][-1]
        tensor_2_dim = input_shape[1][-1]
        self.trainable_weights = self.similarity_function.initialize_weights(
            tensor_1_dim, tensor_2_dim)
        super(MatrixAttention, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        # pylint: disable=unused-argument
        mask_1, mask_2 = mask
        if mask_1 is None and mask_2 is None:
            return None
        if mask_1 is None:
            mask_1 = K.ones_like(K.sum(inputs[0], axis=-1))
        if mask_2 is None:
            mask_2 = K.ones_like(K.sum(inputs[1], axis=-1))
        # Theano can't do batch_dot on ints, so we need to cast to float and then back.
        mask_1 = K.cast(K.expand_dims(mask_1, axis=2), 'float32')
        mask_2 = K.cast(K.expand_dims(mask_2, axis=1), 'float32')
        return K.cast(K.batch_dot(mask_1, mask_2), 'uint8')

    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], input_shape[0][1], input_shape[1][1])

    def call(self, inputs, mask=None):
        matrix_1, matrix_2 = inputs
        num_rows_1 = K.shape(matrix_1)[1]
        num_rows_2 = K.shape(matrix_2)[1]
        tile_dims_1 = K.concatenate([[1, 1], [num_rows_2], [1]], 0)
        tile_dims_2 = K.concatenate([[1], [num_rows_1], [1, 1]], 0)
        tiled_matrix_1 = K.tile(K.expand_dims(matrix_1, axis=2), tile_dims_1)
        tiled_matrix_2 = K.tile(K.expand_dims(matrix_2, axis=1), tile_dims_2)
        return self.similarity_function.compute_similarity(
            tiled_matrix_1, tiled_matrix_2)

    def get_config(self):
        base_config = super(MatrixAttention, self).get_config()
        config = {'similarity_function': self.similarity_function_params}
        config.update(base_config)
        return config
Beispiel #33
0
def __main__():
    c = CilinE()
    alltriple = []

    for t in c.triple:
        if len(t[2]) > 10 or len(t[0]) > 1 or len(t[1]) > 1:
            continue
            pass
        for i in t[0]:
            if i.encode("utf-8") not in model:
                continue
            for j in t[1]:
                if j.encode("utf-8") not in model:
                    continue
                for k in t[2]:
                    if k.encode("utf-8") not in model:
                        continue
                    if i != j and j != k and i != k:
                        alltriple.append((i, j, k))
                        # writeln(i.encode('utf-8'),j.encode('utf-8'),k.encode('utf-8')

    writeln("All triple num: %d" % (len(alltriple)))
    if len(alltriple) > 3000:
        alltriple = random.sample(alltriple, 3000)

        # sample = random.sample(alltriple, 1391)
        # train = sample[:len(sample)/4]
        # test = sample[len(sample)/4:]

    sample = alltriple
    test = sample[: len(sample) / 3]
    train = sample[len(sample) / 3 :]
    writeln("Train data num: %d\n Test data num: %d" % (len(train), len(test)))

    l = Linear()

    l.kmeans_clusters(train)
    if affirm("Show Train Data"):
        makeGraph([model[x[1].encode("utf-8")] - model[x[2].encode("utf-8")] for x in train], label=labels_)
        plt.show()

    related, unrelated = deal_test_data(test)

    if affirm("Show Train and Test Data"):
        makeGraph([model[x[1].encode("utf-8")] - model[x[2].encode("utf-8")] for x in train], label=labels_)
        makeGraph(
            [model[x[1].encode("utf-8")] - model[x[0].encode("utf-8")] for x in related],
            eachlabel="related",
            col="gray",
        )
        makeGraph(
            [model[x[1].encode("utf-8")] - model[x[0].encode("utf-8")] for x in unrelated],
            eachlabel="unrelated",
            col="black",
        )
        plt.show()

    for i in xrange(kcluster):
        wrongdict[i] = 0
        rightdict[i] = 0
        rpos[i] = 0
        uneg[i] = 0
        upos[i] = 0
        rneg[i] = 0
        sumnum[i] = 0

    group = []
    for i in xrange(kcluster):
        group.append(([], []))

    for i in xrange(len(train)):
        group[labels_[i]][0].append(model[train[i][2].encode("utf-8")])
        group[labels_[i]][1].append(model[train[i][1].encode("utf-8")])

    for i in xrange(kcluster):
        writeln("Cluster %d" % (i))
        phi, cost = calcPhiSGD(group[i][0], group[i][1])
        phis.append(phi)
        finalcosts.append(cost)

    writeln("Detail test start")
    tmpthresholdrate = 1.2
    ForTest(related, unrelated, tmpthresholdrate, [(x[2], x[1]) for x in train])
    writeln("\n\nDict Detail:")
    printclusterdetail()

    writeln("\n\nGenetal test start")
    tmpthresholdrate = 0.1
    while tmpthresholdrate < 3:
        ForTest(related, unrelated, tmpthresholdrate, [(x[2], x[1]) for x in train])
        printclusterdetail()
        tmpthresholdrate += 0.1

    while tmpthresholdrate < 20:
        ForTest(related, unrelated, tmpthresholdrate, [(x[2], x[1]) for x in train])
        printclusterdetail()
        tmpthresholdrate += 1
    pass