def get_softmax_model(parameters, X, y): softmax = Softmax() loss_history = softmax.train(X, y, parameters[1], parameters[0], 200, 1500, True) VisualizeLoss(loss_history) VisualizeW(softmax) return softmax
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} # Weights and biases self.params['W1'] = weight_init_std * \ np.random.rand(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * \ np.random.rand(hidden_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * \ np.random.rand(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # Layers self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine3'] = Affine(self.params['W3'], self.params['b3']) self.lastLayer = Softmax()
def __init__( self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01 ): # 畳み込み層のハイパーパラメータ filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int( filter_num * (conv_output_size / 2) * (conv_output_size / 2) ) # 重みパラメータ self.params = {} self.params['W1'] = weight_init_std * \ np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * \ np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # レイヤー self.layers = OrderedDict() self.layers['Conv1'] = Convolution( self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad'] ) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine3'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = Softmax()
def setUp(self): """Configures and sets up variables for each test case N (int): Number of inputs D (int): Input dimension """ np.random.seed(314) self.N = 10 self.D = 10 self.layer = Softmax()
def test_Softmax_CE(input, target): f = Softmax_CE() output = f.forward(input, target) grad_output = f.backward() softmax = Softmax() CE = CrossEntropy() pred = softmax.forward(input) div_output = CE.forward(pred, target) ce_grad = CE.backward() div_grad = softmax.backward(ce_grad) return output, grad_output, div_output, div_grad
class TimeSoftmaxWithLoss: def __init__(self): self.params = [] self.grads = [] self.cache = None self.ignore_label = -1 self.softmax = Softmax() def forward(self, xs, ts): N, T, V = xs.shape if ts.ndim == 3: ts = ts.argmax(axis=2) mask = (ts != self.ignore_label) xs = xs.reshape(N * T, V) ts = ts.reshape(N * T) mask = mask.reshape(N * T) ys = self.softmax.calc_softmax(xs) ls = np.log(ys[np.arange(N * T), ts]) ls *= mask loss = -np.sum(ls) loss /= mask.sum() self.cache = (ts, ys, mask, (N, T, V)) return loss def backward(self, dout=1): ts, ys, mask, (N, T, V) = self.cache dx = ys dx[np.arange(N * T), ts] -= 1 dx *= dout dx /= mask.sum() dx *= mask[:, np.newaxis] dx = dx.reshape((N, T, V)) return dx
def experience_replay(self, batch_size, num_replay, verb=0): self.total_replay += num_replay if len(self.replay_buffer.memory) > batch_size * 4: self.model2.set_weights(self.model.get_weights()) for _ in range(num_replay): states, actions, rewards, next_states, terminals = self.replay_buffer.sample( batch_size) q_next_mat = self.model2.predict(next_states) probs_mat = Softmax(q_next_mat, self.tau) v_next_vec = np.sum(probs_mat * q_next_mat, 1) * (1 - terminals) target_vec = rewards + self.gamma * v_next_vec q_mat = self.model.predict(states) x_batch = states y_batch = q_mat for index, a in enumerate(actions): y_batch[index][a] = target_vec[index] self.model.fit(x_batch, y_batch, batch_size=batch_size, verbose=verb, epochs=1) print('Experience replay done {} times'.format(num_replay)) else: print('Not enough memory in replay buffer')
def __init__(self, vocab_size=10000, wordvec_size=650, hidden_size=650, dropout_ratio=0.5): V = vocab_size D = wordvec_size H = hidden_size rn = np.random.randn # Initialise weight embed_W = (rn(V, D) / 100).astype('f') lstm_Wx1 = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh1 = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b1 = np.zeros(4 * H).astype('f') lstm_Wx2 = (rn(D, 4 * H) / np.sqrt(H)).astype('f') lstm_Wh2 = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b2 = np.zeros(4 * H).astype('f') affine_b = np.zeros(V).astype('f') # Generate layers self.layers = [ TimeEmbedding(embed_W), TimeDropout(dropout_ratio), TimeLSTM(lstm_Wx1, lstm_Wh1, lstm_b1, stateful=True), TimeDropout(dropout_ratio), TimeLSTM(lstm_Wx2, lstm_Wh2, lstm_b2, stateful=True), TimeDropout(dropout_ratio), TimeAffine(embed_W.T, affine_b) ] self.loss_layer = TimeSoftmaxWithLoss() self.softmax = Softmax() self.lstm_layers = [self.layers[2], self.layers[4]] self.dropout_layers = [self.layers[1], self.layers[3], self.layers[5]] #Integrate all weight and gradients to a list each self.params = [] self.grads = [] for layer in self.layers: self.params += layer.params self.grads += layer.grads
def __init__(self, input_size: int = INPUT_SIZE, output_size: int = OUTPUT_SIZE, hidden_size: int = HIDDEN_SIZE, embed_size: int = EMBED_SIZE, lr: float = LEARNING_RATE, clip_grad: float = CLIP_GRAD, init_range: float = INIT_RANGE): input_layers = [ Embedding(input_size, embed_size, init_range), LSTM(embed_size, hidden_size, init_range) ] output_layers = [ Embedding(output_size, embed_size, init_range), LSTM(embed_size, hidden_size, init_range, previous=input_layers[1]), Softmax(hidden_size, output_size, init_range) ] self.input_layers, self.output_layers = input_layers, output_layers self.hidden_size = hidden_size self.embed_size = embed_size self.input_size = input_size self.output_size = output_size self.lr = lr self.clip_grad = clip_grad
def BIDILSTM(Ni,Ns,No): lstm1 = LSTM(Ni,Ns) lstm2 = Reversed(LSTM(Ni,Ns)) bidi = Parallel(lstm1,lstm2) assert No>1 logreg = Softmax(2*Ns,No) stacked = Stacked([bidi,logreg]) return stacked
def calc(output_network, target_vector): shape = target_vector.shape[0] smax = Softmax.calc(output_network) log_likelihood = -np.log(smax[range(shape), target_vector.argmax(axis=1)]) loss = np.sum(log_likelihood) / shape #loss = log_likelihood / shape return loss
def __init__(self, layers_dim): self.layers = [] self.layerdim = layers_dim self.SM = Softmax() self.MB = MiniBatch() for i in range(len(layers_dim) - 1): #For TanH activation ensure weights are positive #wx = np.random.randint(0, 100, size=(layers_dim[i], layers_dim[i+1])) / 10000 #bx = np.atleast_2d(np.array([np.random.randint(0, 100) / 1000 for i in range(layers_dim[i+1])])) #For leaky relu we want both positive and negative weights wx = np.random.randn(layers_dim[i], layers_dim[i + 1]) / np.sqrt( layers_dim[i]) bx = np.atleast_2d( np.random.randn(layers_dim[i + 1]).reshape( 1, layers_dim[i + 1])) self.layers.append(Layer(wx, bx))
class SoftmaxTest(unittest.TestCase): def setUp(self): self.layer = Softmax() def test_backprop(self): dummy_scores = np.random.rand(5, 5) dummy_labels = np.array([0, 1, 2, 3, 4]) expected_grad = eval_numerical_gradient( lambda x: avg_loss(self.layer.forward(x), dummy_labels), dummy_scores) grad = self.layer.backprop(dummy_labels) np.testing.assert_almost_equal(expected_grad, grad, decimal=5) def tearDown(self): pass
def __init__( self, vocabulary_size, max_seq_length, output_dims=2, out_channels=100, embed_dim=300, padding_idx=0, kernel_heights=[3, 4, 5], hidden_dims=[], fc_dropout=0, embedding_matrix=None, freeze_embedding_layer=True, ): super().__init__() self.out_channels = out_channels self.in_channels = 1 self.n_kernels = len(kernel_heights) self.pool_sizes = [(max_seq_length - K, 1) for K in kernel_heights] self.max_seq_length = max_seq_length self.hidden_dims = hidden_dims self.output_dims = output_dims # Assumes vocab size is same as embedding matrix size. Therefore should # contain special tokens e.g. <pad> self.embedding = nn.Embedding( vocabulary_size, embed_dim, padding_idx=padding_idx ) if embedding_matrix is not None: # Load pre-trained weights. Should be torch FloatTensor self.embedding.from_pretrained(embedding_matrix) if freeze_embedding_layer: self.embedding.weight.requires_grad = False self.convs = nn.ModuleList( [ nn.Conv2d( self.in_channels, self.out_channels, kernel_size=(K, embed_dim), ) for K in kernel_heights ] ) self.pools = nn.ModuleList( [ nn.MaxPool2d(kernel_size=pool_size) for pool_size in self.pool_sizes ] ) self.fc = Softmax( input_dim=self.out_channels * self.n_kernels, hidden_dims=self.hidden_dims, output_dim=self.output_dims, dropout=fc_dropout, )
def __init__(self): self.data_reader = DataReader('data/training_data/training.data', 'data/stopwords/stopwords.txt', True, 1000) self.perceptron = Perceptron() self.softmax = Softmax() # Let's create 5 classifiers universe_size = len(self.data_reader.universe) self.perceptron_classifiers = [np.zeros((universe_size + 1)) for i in range(5)] self.softmax_classifier = np.ones((5, universe_size + 1))
def POST(self, data): arr = re.split("\[|]|,", data) X = [[0 for x in range(28 * 28)] for y in range(1)] for i in range(28 * 28): X[0][i] = (255 - float(str(arr[i + 1]))) / 255. softmax = Softmax() y = softmax.classify(X) index = -1 mv = -100 strY = "" for i in range(10): if i > 0: strY = strY + "," strY = strY + str(y[0][i]) if (y[0][i] * 1000) > mv: mv = (y[0][i] * 1000) index = i return strY
class SoftmaxTest(unittest.TestCase): def setUp(self): """Configures and sets up variables for each test case N (int): Number of inputs D (int): Input dimension """ np.random.seed(314) self.N = 10 self.D = 10 self.layer = Softmax() def tearDown(self): """Tear down after each test case """ pass def test_forward_prop(self): x = np.linspace(-1, 1, num=self.N * self.D).reshape(self.N, self.D) output = self.layer.forward_prop(x) np.testing.assert_array_almost_equal(np.ones(self.N), np.sum(output, axis=1), decimal=7) np.testing.assert_array_almost_equal(np.ones(self.N) * (self.D - 1), np.argmax(output, axis=1), decimal=7) def test_backprop(self): x = np.random.randn(self.N, self.D) y = np.random.randn(*x.shape) # Numerical gradient w.r.t inputs num_grad_x = eval_numerical_gradient( f=lambda x: categorical_cross_entropy(self.layer.forward_prop(x), y ), x=x, verbose=False) # Compute gradients using backprop algorithm grad_x = self.layer.backprop(y) np.testing.assert_array_almost_equal(num_grad_x, grad_x, decimal=7)
def auto_get_parameters(X_train, y_train, X_val, y_val): learning_rates = [1e-7, 5e-5] regularization_strengths = [5e4, 1e5] best_val = -1 best_parameters = None for i in learning_rates: for j in regularization_strengths: softmax = Softmax() softmax.train(X_train, y_train, j, i, 200, 1500, True) y_pred = softmax.predict(X_val) acc = np.mean(y_pred == y_val) if acc > best_val: best_val = acc best_parameters = (i, j) print('OK! Have been identified parameter! Best validation accuracy achieved during cross-validation: %f' % best_val) return best_parameters
def __init__(self, input_len, nodes): # We divide by input_len to reduce the variance of our initial values self.weights = np.random.randn(input_len, nodes) / input_len self.biases = np.zeros(nodes) def forward(self, input): #Performs a forward pass of the softmax layer using the given input. #Returns a 1d numpy array containing the respective probability values. # input can be any array with any dimensions. input = input.flatten() input_len, nodes = self.weights.shape totals = np.dot(input, self.weights) + self.biases exp = np.exp(totals) return exp / np.sum(exp, axis=0) # In[ ]: import mnist import numpy as np from conv import Conv3x3 from maxpool import MaxPool2 from softmax import Softmax # We only use the first 1k testing examples (out of 10k total) # in the interest of time. Feel free to change this if you want. test_images = mnist.test_images()[:1000] test_labels = mnist.test_labels()[:1000] conv = Conv3x3(8) # 28x28x1 -> 26x26x8 pool = MaxPool2() # 26x26x8 -> 13x13x8 softmax = Softmax(13 * 13 * 8, 10) # 13x13x8 -> 10 def forward(image, label): #Completes a forward pass of the CNN and calculates the accuracy and #cross-entropy loss. #image is a 2d numpy array #label is a digit # We transform the image from [0, 255] to [-0.5, 0.5] to make it easier # to work with. This is standard practice. out = conv.forward((image / 255) - 0.5) out = pool.forward(out) out = softmax.forward(out) # Calculate cross-entropy loss and accuracy. np.log() is the natural log. loss = -np.log(out[label]) acc = 1 if np.argmax(out) == label else 0 return out, loss, acc
def get_calc(context): if self.model_params.use_adaptive_softmax: softmax = self.adaptive_logits[context].log_prob calc = lambda hidden, _: softmax(hidden) else: calc_logits = Logits() softmax = Softmax() calc = lambda hidden, candidate_entity_ids: softmax( calc_logits(hidden, self.entity_embeds(candidate_entity_ids ))) return calc
def outnode(self, ylist, midnum, w, b, batchnum, itbool, normclass): #random #seed(seednum) #wran = normal(loc = 0, scale = 1/math.sqrt(midnum), size = (10, midnum)) #seed(seednum) #bran = normal(loc = 0, scale = 1/math.sqrt(midnum), size = (10, 1)) summid = np.dot(w, ylist) + b (sum_norm, running_mean, running_var) = normclass.forward(summid, itbool) soft = Softmax.softmax(sum_norm, batchnum) return (soft, running_mean, running_var)
class AttentionWeight: def __init__(self): self.cache = None self.softmax = Softmax() def forward(self, hs, h): N, T, H = hs.shape hr = h.reshape(N, 1, H).repeat(T, axis=1) t = hs * hr s = np.sum(t, axis=2) self.softmax.forward(s) a = self.softmax.out self.cache = (hs, hr) return a def backward(self, da): hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) dhs = dt * hr dh = np.sum(dhs, axis=1) return dhs, da
def build_model(self): self.conv_layers = [] self.linear_layers = [] self.layers = [] # 1x28x28 -> 6x24x24 self.conv_layers += [Conv(1, 6, 5, self.activation)] # 6x24x24 -> 6x12x12 self.conv_layers += [MaxPool_2()] # 6x12x12 -> 16x8x8 self.conv_layers += [Conv(6, 16, 5, self.activation)] # 16x8x8 -> 16x4x4 self.conv_layers += [MaxPool_2()] # 256 -> 120 self.linear_layers += [Linear(16 * 4 * 4, 120, self.activation)] # 120 -> 84 self.linear_layers += [Linear(120, 84, self.activation)] # 84 -> 10 self.linear_layers += [Softmax(84, self.no_of_classes)] self.layers = self.conv_layers + self.linear_layers
def softmax_algo(): ''' The Softmax algorithm tries to cope with arms differing in estimated value by explicitly incorporating information about the reward rates of the available arms into its method for choosing which arm to select when it explores. :temperatue: the Softmax algorithm at low temperatures behaves orderly, while it behaves essentially randomly at high temperatures ''' n_sim = 5000 horizon = 250 temperature = 0.5 algo = Softmax(temperature, [], []) mean_probs = [0.3, 0.35, 0.4, 0.5, 0.55] filename = 'softmax_temp0.5' test_algo_monte_carlo(algo, mean_probs, n_sim=n_sim, horizon=horizon, filename=filename, store_it=True)
class SoftMaxWithLoss: def __init__(self): self.params = [] self.grads = [] self.y = None self.t = None self.softmax = Softmax() def forward(self, x, t): self.t = t self.y = self.softmax.calc_softmax(x) if self.t.size == self.y.size: self.t = self.t.argmax(axis=1) loss = cross_entropy_error(self.y, self.t) return loss def backward(self, dout=1): batch_size = self.t.shape[0] dx = self.y.copy() dx[np.arange(batch_size), self.t] -= 1 dx *= dout dx = dx / batch_size return dx
class TestSoftmax(unittest.TestCase): def setUp(self): self.softmax = Softmax() self.x = np.array([[-0.27291637, 3.0623984, 1.08772839, 1.21167545], [0.77815361, 1.20011612, -0.37179735, 1.93945452], [-1.02360881, -0.23723418, -1.42713268, -0.6484095], [-0.6631865, 0.01433258, -2.450729, -2.02298841]]) def test_calcsoftmax(self): x = self.softmax.calc_softmax(self.x) assert_almost_equal( np.array([[0.02673862, 0.75101348, 0.10424601, 0.11800189], [0.16568116, 0.2526557, 0.05246332, 0.52919982], [0.18801706, 0.41277693, 0.12558827, 0.27361774], [0.29471841, 0.58029664, 0.04932731, 0.07565764]]), x) def test_forward(self): self.softmax.forward(self.x) assert_almost_equal( np.array([[0.02673862, 0.75101348, 0.10424601, 0.11800189], [0.16568116, 0.2526557, 0.05246332, 0.52919982], [0.18801706, 0.41277693, 0.12558827, 0.27361774], [0.29471841, 0.58029664, 0.04932731, 0.07565764]]), self.softmax.out) def test_backward(self): self.softmax.forward(self.x) dout = np.array([[0.11843554, -1.15122357, 1.47778478, -1.61246747], [1.42841483, 0.51888186, 0.18154817, 0.37469379], [-0.37009244, 0.21842416, -0.72251804, -0.20918206], [-1.47353003, -0.08212526, 0.90979081, 1.11006032]]) dx = self.softmax.backward(dout) assert_almost_equal( np.array([[0.0267386, 0.7510135, 0.104246, 0.1180019], [0.1656812, 0.2526557, 0.0524633, 0.5291998], [0.1880171, 0.4127769, 0.1255883, 0.2736177], [0.2947184, 0.5802966, 0.0493273, 0.0756576]]), self.softmax.out)
class APAProject(object): def __init__(self): self.data_reader = DataReader('data/training_data/training.data', 'data/stopwords/stopwords.txt', True, 1000) self.perceptron = Perceptron() self.softmax = Softmax() # Let's create 5 classifiers universe_size = len(self.data_reader.universe) self.perceptron_classifiers = [np.zeros((universe_size + 1)) for i in range(5)] self.softmax_classifier = np.ones((5, universe_size + 1)) def file_to_data_set(self, file): data_set = [] with open(file) as data: for line in data: _, score, sentence = line.split('|') score = float(score) # Calculating train target: # 0 if 0 < score <= 0.2, 1 if 0.2 < score <= 0.4, etc... class_number = math.floor(score * 5) sentence_vector = self.data_reader.get_sentence_coordinates(sentence) data_set.append((sentence_vector, class_number)) return data_set def train_perceptron(self): start_time = time.time() print "Starting training session ..." # We need to read data from datasmall and train the perceptron training_data_set = self.file_to_data_set('data/training_data/training.data') PERIODS = 5 for i in range(PERIODS): # For each period, reshuffle random.shuffle(training_data_set) # We train every classfier for (classifier_index, classifier) in enumerate(self.perceptron_classifiers): self.perceptron_classifiers[classifier_index], updates = self.perceptron.train_epoch(training_data_set, classifier_index, classifier) self.test_perceptron_multiclass() training_end_time = time.time() training_duration = training_end_time - start_time print "Training session finished: duration %s seconds" % training_duration def test_perceptron(self): print "Starting testing session..." test_data_set = self.file_to_data_set('data/test_data/test.data') for (classifier_index, classifier) in enumerate(self.perceptron_classifiers): error_count, success_count = self.perceptron.test_classifier(test_data_set, classifier, classifier_index) print "Classifier %s just finished. %s%% results are good" % ((classifier_index + 1), success_count * 100 / (success_count + error_count)) def test_perceptron_multiclass(self): print "Starting testing session..." test_data_set = self.file_to_data_set('data/test_data/test.data') success_count = 0 error_count = 0 for (sentence_vector, class_number) in test_data_set: results_classifiers = [] test_class = -1 for (classifier_index, classifier) in enumerate(self.perceptron_classifiers): results_classifiers.append(np.dot(classifier, sentence_vector)) if results_classifiers.index(max(results_classifiers)) == class_number: success_count += 1 else: error_count += 1 print "Classifier just finished. %s/%s ~= %s%% results are good" % (success_count, (error_count + success_count), success_count * 100 / (success_count + error_count)) def train_softmax(self): start_time = time.time() print "Starting softmax training session..." # We need to read data from datasmall and train the perceptron training_data_set = self.file_to_data_set('data/training_data/training.data') PERIODS = 10 for i in range(PERIODS): random.shuffle(training_data_set) # On apprend PERIODS fois et a chaque passage on test le classifier pour etudier l'evolution # Rappel : self.softmax_classifier = np.ones((5, universe_size)) self.softmax_classifier = self.softmax.train_epoch(self.softmax_classifier, training_data_set) self.test_softmax() training_end_time = time.time() training_duration = training_end_time - start_time print "Training session finished: duration %s seconds" % training_duration def test_softmax(self): print "Starting softmax testing session..." test_data_set = self.file_to_data_set('data/test_data/test.data') #test_data_set = self.file_to_data_set('data/training_data/training.data') error_count, success_count = self.softmax.test_classifier(self.softmax_classifier, test_data_set) print "Classifier just finished. %s/%s ~= %s%% results are good" % (success_count, (error_count + success_count), success_count * 100 / (success_count + error_count))
class Net: def __init__( self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01 ): # 畳み込み層のハイパーパラメータ filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int( filter_num * (conv_output_size / 2) * (conv_output_size / 2) ) # 重みパラメータ self.params = {} self.params['W1'] = weight_init_std * \ np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * \ np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # レイヤー self.layers = OrderedDict() self.layers['Conv1'] = Convolution( self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad'] ) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine3'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = Softmax() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): y = self.predict(x) return self.last_layer.forward(y, t) def accuracy(self, x, t, batch_size=100): if t.ndim != 1: t = np.argmax(t, axis=1) acc = 0.0 for i in range(int(x.shape[0] / batch_size)): tx = x[i*batch_size:(i+1)*batch_size] tt = t[i*batch_size:(i+1)*batch_size] y = self.predict(tx) y = np.argmax(y, axis=1) acc += np.sum(y == tt) return acc / x.shape[0] def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.last_layer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} grads['W1'] = self.layers['Conv1'].dW grads['b1'] = self.layers['Conv1'].db grads['W2'] = self.layers['Affine2'].dW grads['b2'] = self.layers['Affine2'].db grads['W3'] = self.layers['Affine3'].dW grads['b3'] = self.layers['Affine3'].db return grads
import numpy as np from softmax import Softmax from sklearn.datasets import load_iris data = load_iris() X = data.data y = data.target reg_strength = 1e-4 batch_size = 50 epochs = 1000 learning_rate = 5e-1 weight_update = 'sgd_with_momentum' sm = Softmax(batch_size=batch_size, epochs=epochs, learning_rate=learning_rate, reg_strength=reg_strength, weight_update=weight_update) sm.train(X, y) pred = sm.predict(X) print np.mean(np.equal(y, pred))
train_images, test_images, train_labels, test_labels = train_test_split(X, y_hot, test_size=.2, random_state=42) x_train, x_test, y_cat_train, y_cat_test = train_test_split(X, y_cat, test_size=.2, random_state=42) """ # We only use the first 1k examples of each set in the interest of time. # Feel free to change this if you want. (train_X, train_Y), (test_X, test_Y) = tf.keras.datasets.mnist.load_data() train_images = train_X[:1000] train_labels = train_Y[:1000] test_images = test_X[:1000] test_labels = test_Y[:1000] conv = Conv3x3(8) # 28x28x1 -> 26x26x8 pool = MaxPool2() # 26x26x8 -> 13x13x8 softmax = Softmax(13 * 13 * 8, 10) # 13x13x8 -> 10 def forward(image, label): """ Completes a forward pass of the CNN and calculates the accuracy and cross-entropy loss. - image is a 2d numpy array - label is a digit """ # We transform the image from [0, 255] to [-0.5, 0.5] to make it easier # to work with. This is standard practice. out = conv.forward((image / 255) - 0.5) out = pool.forward(out) out = softmax.forward(out)
# training dataset train_set = mnist_dataset.train # train_set.labels = normalize(train_set.labels) # test dataset test_set = mnist_dataset.test # test_set.labels = normalize(test_set.labels) print("Training dataset size: ", train_set.num_examples) print("Test dataset size: ", test_set.num_examples) batch_size = 200 max_epoch = 100 reg = 1e-5 loss_history = [] acc_history = [] classifier = Softmax(train_set.images.shape[1], len(np.unique(train_set.labels))) for epoch in range(0, max_epoch): iter_per_batch = train_set.num_examples // batch_size for batch_id in range(0, iter_per_batch): # get the data of next minibatch (have been shuffled) batch = train_set.next_batch(batch_size) X, label = batch label = normalize(label) # Compute loss and gradient loss, grad = classifier.vectorized_loss(X, label, reg) loss_history.append(loss)
def load(state): lstm = LSTM.load(state['lstm']) output = Softmax.load(state['output']) obj = CharacterGenerator(lstm, output) return obj
import mnist import numpy as np from Conv import Conv3x3 from softmax import Softmax from PoolingLayer import MaxPool2 train_images = mnist.train_images()[:1000] train_labels = mnist.train_labels()[:1000] test_images = mnist.test_images()[:1000] test_labels = mnist.test_labels()[:1000] conv = Conv3x3(8) pool = MaxPool2() softmax = Softmax(13*13*8,10) def forward(image,labels) : output = conv.forward((image/255)-0.5) output = pool.forward(output) output = softmax.forward(output) loss = -np.log(output[labels]) acc = 0 acc = 1 if np.argmax(output) == labels else 0 return output, loss ,acc def train(im,labels,learning_rate = 0.005) : out, loss, acc = forward(im,labels) gradient = np.zeros(10) gradient[labels] = -1/out[labels] gradient = softmax.backprop(gradient,learning_rate) gradient = pool.backprop(gradient) gradient = conv.backprop(gradient,learning_rate)