def __init__(self, dataset, ctx, labels=None, shape=None, label_shape=None, *args, **kwargs): super().__init__(*args, **kwargs) if labels is not None: llen = 0 for cond in labels: llen += (dataset._label == cond).sum() self._length = llen else: self._length = len(dataset) if shape is None: shape = dataset._data.shape[1:] if label_shape is None: label_shape = dataset._label.shape[1:] self._data = nd.zeros([self._length] + list(shape), dtype='float32', ctx=ctx) self._label = nd.zeros([self._length] + list(label_shape), dtype='int32', ctx=ctx) uniques = set() i = 0 for dat, dlab in dataset: lab = dlab.item() if labels is None or np.any([lab == cond for cond in labels]): self._data[i] = dat self._label[i] = lab i += 1 uniques.add(lab) self.classes = list(uniques)
def sample(prefix, num_chars, temperature=1.0): ##################################### # Initialize the string that we'll return to the supplied prefix ##################################### string = prefix ##################################### # Prepare the prefix as a sequence of one-hots for ingestion by RNN ##################################### prefix_numerical = [character_dict[char] for char in prefix] input = one_hots(prefix_numerical) ##################################### # Set the initial state of the hidden representation ($h_0$) to the zero vector ##################################### h = nd.zeros(shape=(1, num_hidden), ctx=ctx) c = nd.zeros(shape=(1, num_hidden), ctx=ctx) ##################################### # For num_chars iterations, # 1) feed in the current input # 2) sample next character from from output distribution # 3) add sampled character to the decoded string # 4) prepare the sampled character as a one_hot (to be the next input) ##################################### for i in range(num_chars): outputs, h, c = lstm_rnn(input, h, c, temperature=temperature) choice = np.random.choice(vocab_size, p=outputs[-1][0].asnumpy()) string += character_list[choice] input = one_hots([choice]) return string
def get_parameters(): # parameters for INPUT gate W_xi = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hi = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_i = nd.zeros(shape=config.hidden_dim) # parameters for FORGET gate W_xf = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hf = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_f = nd.zeros(shape=config.hidden_dim) # parameters for OUTPUT gate W_xo = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_ho = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_o = nd.zeros(shape=config.hidden_dim) # parameters for memory cell W_xc = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hc = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_c = nd.zeros(shape=config.hidden_dim) # output layer W_hy = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.output_dim)) b_y = nd.zeros(shape=config.output_dim) parameters = [W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hy, b_y] for parameter in parameters: parameter.attach_grad() return parameters
def get_parameters(): # parameters for UPDATE gate W_xz = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hz = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_z = nd.zeros(shape=config.hidden_dim) # parameters for RESET gate W_xr = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hr = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_r = nd.zeros(shape=config.hidden_dim) # parameters for candidate hidden state W_xh = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hh = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_h = nd.zeros(shape=config.hidden_dim) # output layer W_hy = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.output_dim)) b_y = nd.zeros(shape=config.output_dim) parameters = [W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h, W_hy, b_y] for parameter in parameters: parameter.attach_grad() return parameters
def forward(self,X,lrp_aware=False): ''' Realizes the forward pass of an input through the convolution layer. Parameters ---------- X : mxnet.ndarray.ndarray.NDArray a network input, shaped (N,H,W,D), with N = batch size H, W, D = input size in heigth, width, depth lrp_aware : bool controls whether the forward pass is to be computed with awareness for multiple following LRP calls. this will sacrifice speed in the forward pass but will save time if multiple LRP calls will follow for the current X, e.g. wit different parameter settings or for multiple target classes. Returns ------- Y : mxnet.ndarray.ndarray.NDArray the layer outputs. ''' self.lrp_aware = lrp_aware self.X = X N,H,W,D = X.shape hf, wf, df, nf = self.W.shape hstride, wstride = self.stride numfilters = self.n #assume the given pooling and stride parameters are carefully chosen. Hout = (H - hf) // hstride + 1 Wout = (W - wf) // wstride + 1 #initialize pooled output self.Y = nd.zeros((N,Hout,Wout,numfilters), ctx=self.ctx, dtype=self.dtype) if self.lrp_aware: self.Z = nd.zeros((N, Hout, Wout, hf, wf, df, nf), ctx=self.ctx, dtype=self.dtype) #initialize container for precomputed forward messages for i in range(Hout): for j in range(Wout): self.Z[:,i,j,...] = nd.expand_dims(self.W, axis=0) * nd.expand_dims(self.X[:, i*hstride:i*hstride+hf , j*wstride:j*wstride+wf , :], axis=4) # N, hf, wf, df, nf self.Y[:,i,j,:] = self.Z[:,i,j,...].sum(axis=(1,2,3)) + self.B else: for i in range(Hout): for j in range(Wout): self.Y[:,i,j,:] = nd.sum( nd.expand_dims( X[:, i*hstride:i*hstride+hf: , j*wstride:j*wstride+wf: , : ].transpose((1,2,3,0)), 4) * nd.expand_dims(self.W, 3), axis=(0,1,2)) + self.B return self.Y
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps, num_hiddens, lr, clipping_theta, batch_size, vocab_size, pred_period, pred_len, prefixes, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(1, num_epochs + 1): if not is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) train_l_sum = nd.array([0], ctx=ctx) train_l_cnt = 0 for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx): if is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) else: state_h = state_h.detach() if is_lstm: state_c = state_c.detach() with autograd.record(): if is_lstm: outputs, state_h, state_c = rnn( get_inputs(X, vocab_size), state_h, state_c, *params) else: outputs, state_h = rnn( get_inputs(X, vocab_size), state_h, *params) y = Y.T.reshape((-1,)) outputs = nd.concat(*outputs, dim=0) l = loss(outputs, y) l.backward() grad_clipping(params, clipping_theta, ctx) sgd(params, lr, 1) train_l_sum = train_l_sum + l.sum() train_l_cnt += l.size if epoch % pred_period == 0: print("\nepoch %d, perplexity %f" % (epoch, (train_l_sum / train_l_cnt).exp().asscalar())) for prefix in prefixes: print(' - ', predict_rnn( rnn, prefix, pred_len, params, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
def test_gluon_embedding(): m = gluon.nn.Embedding(SMALL_Y, MEDIUM_X) m.initialize() a = nd.zeros((MEDIUM_X, SMALL_Y)) b = m(a) assert b.shape == (MEDIUM_X, SMALL_Y, MEDIUM_X) assert b.asnumpy().size == LARGE_SIZE
def train_ch7(trainer_fn, states, hyperparams, features, labels, batch_size=10, num_epochs=2): """Train a linear regression model.""" net, loss = linreg, squared_loss w, b = nd.random.normal(scale=0.01, shape=(features.shape[1], 1)), nd.zeros(1) w.attach_grad() b.attach_grad() def eval_loss(): return loss(net(features, w, b), labels).mean().asscalar() ls = [eval_loss()] data_iter = gdata.DataLoader( gdata.ArrayDataset(features, labels), batch_size, shuffle=True) for _ in range(num_epochs): start = time.time() for batch_i, (X, y) in enumerate(data_iter): with autograd.record(): l = loss(net(X, w, b), y).mean() l.backward() trainer_fn([w, b], states, hyperparams) if (batch_i + 1) * batch_size % 100 == 0: ls.append(eval_loss()) print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start)) set_figsize() plt.plot(np.linspace(0, num_epochs, len(ls)), ls) plt.xlabel('epoch') plt.ylabel('loss')
def __setitem__(self, tokens, new_embedding): """Updates embedding vectors for tokens. If self.allow_extend is True, vectors for previously unknown tokens can be introduced. Parameters ---------- tokens : hashable object or a list or tuple of hashable objects A token or a list of tokens whose embedding vector are to be updated. new_embedding : mxnet.ndarray.NDArray An NDArray to be assigned to the embedding vectors of `tokens`. Its length must be equal to the number of `tokens` and its width must be equal to the dimension of embedding of the glossary. If `tokens` is a singleton, it must be 1-D or 2-D. If `tokens` is a list of multiple strings, it must be 2-D. """ if self.allow_extend and self._idx_to_vec is None: # Initialize self._idx_to_vec assert C.UNK_IDX == 0 self._idx_to_vec = self._init_unknown_vec(shape=(1, new_embedding.shape[-1])) tokens = self._check_vector_update(tokens, new_embedding) if self.allow_extend: # Add new / previously unknown tokens for token in filter(lambda t: t not in self._token_to_idx, tokens): idx = len(self._token_to_idx) self._token_to_idx[token] = idx self._idx_to_token.append(token) num_extended = len(self._token_to_idx) - self.idx_to_vec.shape[0] if num_extended == 1: warnings.warn( 'When adding new tokens via TokenEmbedding.__setitem__ ' 'the internal embedding matrix needs to be reallocated. ' 'Users are therefore encouraged to batch their updates ' '(i.e. add multiple new tokens at a time).') # Extend shape of idx_to_vec idx_to_vec = nd.zeros(shape=(len(self._token_to_idx), self.idx_to_vec.shape[1])) idx_to_vec[:self.idx_to_vec.shape[0]] = self._idx_to_vec self._idx_to_vec = idx_to_vec indices = [] for token in tokens: if token in self._token_to_idx: indices.append(self._token_to_idx[token]) else: if self.unknown_token: raise KeyError(('Token "{}" is unknown. To update the embedding vector for an' ' unknown token, please explicitly include "{}" as the ' '`unknown_token` in `tokens`. This is to avoid unintended ' 'updates.').format(token, self._idx_to_token[C.UNK_IDX])) else: raise KeyError(('Token "{}" is unknown. Updating the embedding vector for an ' 'unknown token is not allowed because `unknown_token` is not ' 'specified.').format(token)) self._idx_to_vec[nd.array(indices)] = new_embedding
def corr2d(X, K): """Compute 2D cross-correlation.""" h, w = K.shape Y = nd.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1)) for i in range(Y.shape[0]): for j in range(Y.shape[1]): Y[i, j] = (X[i: i + h, j: j + w] * K).sum() return Y
def test_ndarray2numpy(self): m = gluon.nn.Embedding(14000, 128) m.initialize() ind = nd.zeros((700000, 128)) x = m(ind) x.shape test = x.asnumpy() assert (x.shape == test.shape)
def getfake(samples, dimensions, epsilon): wfake = nd.random_normal(shape=(dimensions)) # fake weight vector for separation bfake = nd.random_normal(shape=(1)) # fake bias wfake = wfake / nd.norm(wfake) # rescale to unit length # making some linearly separable data, simply by chosing the labels accordingly X = nd.zeros(shape=(samples, dimensions)) Y = nd.zeros(shape=(samples)) i = 0 while (i < samples): tmp = nd.random_normal(shape=(1, dimensions)) margin = nd.dot(tmp, wfake) + bfake if (nd.norm(tmp).asscalar() < 3) & (abs(margin.asscalar()) > epsilon): X[i, :] = tmp Y[i] = 2 * (margin > 0) - 1 i += 1 return X, Y
def transform_mnist(data, label): # transform a batch of examples if resize: n = data.shape[0] new_data = nd.zeros((n, resize, resize, data.shape[3])) for i in range(n): new_data[i] = image.imresize(data[i], resize, resize) data = new_data # change data from batch x height x weight x channel to batch x channel x height x weight return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255, label.astype('float32')
def transform_mnist(data, label): # transform a batch of examples if resize:#改变形状 n = data.shape[0]#样本数量 n* 784 *1 ——————> n* 28 * 28 *1 new_data = nd.zeros((n, resize, resize, data.shape[3]))#data.shape[3]为通道数量 for i in range(n): new_data[i] = image.imresize(data[i], resize, resize) data = new_data # change data from batch x height x weight x channel to batch 0 x channel 3 x height 1 x weight 2 return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
def plotscore(w, d): xgrid = np.arange(-3, 3, 0.02) ygrid = np.arange(-3, 3, 0.02) xx, yy = np.meshgrid(xgrid, ygrid) zz = nd.zeros(shape=(xgrid.size, ygrid.size, 2)) zz[:, :, 0] = nd.array(xx) zz[:, :, 1] = nd.array(yy) vv = nd.dot(zz, w) + b CS = plt.contour(xgrid, ygrid, vv.asnumpy()) plt.clabel(CS, inline=1, fontsize=10)
def predict_rnn(rnn, prefix, num_chars, params, hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm=False): """Predict the next chars given the prefix.""" prefix = prefix.lower() state_h = nd.zeros(shape=(1, hidden_dim), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(1, hidden_dim), ctx=ctx) output = [char_to_idx[prefix[0]]] for i in range(num_chars + len(prefix)): X = nd.array([output[-1]], ctx=ctx) if is_lstm: Y, state_h, state_c = rnn(get_inputs(X), state_h, state_c, *params) else: Y, state_h = rnn(get_inputs(X), state_h, *params) if i < len(prefix)-1: next_input = char_to_idx[prefix[i+1]] else: next_input = int(Y[0].argmax(axis=1).asscalar()) output.append(next_input) return ''.join([idx_to_char[i] for i in output])
def sample(prefix, num_chars, temperature=1.0): string = prefix prefix_numerical = [character_dict[char] for char in prefix] input = one_hots(prefix_numerical) sample_state = nd.zeros(shape(1, num_hidden), ctx=ctx) for i in range(num_chars): outputs, sample_state = simple_rnn(input, sample_state, temperature= temperature) choice = np.random.choice(77, p=outputs[-1][0].asnumpy()) string += character_list[choice] input = one_hots([choice])
def smooth(label, classes, eta=0.1): if isinstance(label, nd.NDArray): label = [label] smoothed = [] for l in label: ind = l.astype('int') res = nd.zeros((ind.shape[0], classes), ctx = l.context) res += eta/classes res[nd.arange(ind.shape[0], ctx = l.context), ind] = 1 - eta + eta/classes smoothed.append(res) return smoothed
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) if num_pos_all < 1: # no positive samples found, return dummy losses return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,)) # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < (pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / num_pos_all) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def _build_vocab(data_name, train_dataset, test_dataset): all_token = [] max_len = 0 for i, line in enumerate(train_dataset): train_dataset[i][0] = _clean_str(line[0], data_name) line = train_dataset[i][0].split() max_len = max_len if max_len > len(line) else len(line) all_token.extend(line) for i, line in enumerate(test_dataset): test_dataset[i][0] = _clean_str(line[0], data_name) line = test_dataset[i][0].split() max_len = max_len if max_len > len(line) else len(line) all_token.extend(line) vocab = nlp.Vocab(nlp.data.count_tokens(all_token)) vocab.set_embedding(nlp.embedding.create('Word2Vec', source='GoogleNews-vectors-negative300')) for word in vocab.embedding._idx_to_token: if (vocab.embedding[word] == nd.zeros(300)).sum() == 300: vocab.embedding[word] = nd.random.normal(-1.0, 1.0, 300) vocab.embedding['<unk>'] = nd.zeros(300) vocab.embedding['<pad>'] = nd.zeros(300) vocab.embedding['<bos>'] = nd.zeros(300) vocab.embedding['<eos>'] = nd.zeros(300) print('maximum length (in tokens): ', max_len) return vocab, max_len
def set_embedding(self, *embeddings): """Attaches one or more embeddings to the indexed text tokens. Parameters ---------- embeddings : None or tuple of :class:`gluonnlp.embedding.TokenEmbedding` instances The embedding to be attached to the indexed tokens. If a tuple of multiple embeddings are provided, their embedding vectors will be concatenated for the same token. """ if len(embeddings) == 1 and embeddings[0] is None: self._embedding = None return for embs in embeddings: assert isinstance(embs, emb.TokenEmbedding), \ 'The argument `embeddings` must be an instance or a list of instances of ' \ '`gluonnlp.embedding.TokenEmbedding`.' assert all([embs.unknown_token for embs in embeddings]) or \ all([not embs.unknown_token for embs in embeddings]), \ 'Either all or none of the TokenEmbeddings must have an ' \ 'unknown_token set.' new_embedding = emb.TokenEmbedding(self.unknown_token, allow_extend=False) new_embedding._token_to_idx = self.token_to_idx new_embedding._idx_to_token = self.idx_to_token new_vec_len = sum(embs.idx_to_vec.shape[1] for embs in embeddings if embs and embs.idx_to_vec is not None) new_idx_to_vec = nd.zeros(shape=(len(self), new_vec_len)) col_start = 0 # Concatenate all the embedding vectors in embedding. for embs in embeddings: if embs and embs.idx_to_vec is not None: col_end = col_start + embs.idx_to_vec.shape[1] # Cancatenate vectors of the unknown token. new_idx_to_vec[0, col_start:col_end] = embs.idx_to_vec[0] new_idx_to_vec[1:, col_start:col_end] = embs[self._idx_to_token[1:]] col_start = col_end new_embedding._idx_to_vec = new_idx_to_vec self._embedding = new_embedding
def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, stride=1, ctx=None): """ An implementation of col2im based on fancy indexing and np.add.at """ N, C, H, W = x_shape H_padded, W_padded = H + 2 * padding, W + 2 * padding x_padded = nd.zeros((N, C, H_padded, W_padded), dtype=cols.dtype, ctx=ctx) k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride, ctx=ctx) cols_reshaped = cols.reshape((C * field_height * field_width, -1, N)) cols_reshaped = cols_reshaped.transpose((2, 0, 1)) # The for loop is probably a bottleneck, but cannot be avoided without a nd.add.at function #for l in nd.arange(cols.shape[1]): # x_padded[:,k,i[:,l], j[:,l]] += cols_reshaped[:,:,l] for col in nd.arange(cols.shape[0], ctx=ctx): x_padded[:,k[col],i[col,:], j[col,:]] += cols_reshaped[:,col,:] #np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) if padding == 0: return x_padded return x_padded[:, :, padding:-padding, padding:-padding]
def __init__(self, filtersize=(5,5,3,32), stride = (2,2), ctx=mx.cpu(), dtype='float32'): ''' Constructor for a Convolution layer. Parameters ---------- filtersize : 4-tuple with values (h,w,d,n), where h = filter heigth w = filter width d = filter depth n = number of filters = number of outputs stride : 2-tuple (h,w), where h = step size for filter application in vertical direction w = step size in horizontal direction ctx: mxnet.context.Context device used for all mxnet.ndarray operations dtype: string ('float32' | 'float64') dtype used for all mxnet.ndarray operations (mxnet default is 'float32', 'float64' supported for easier comparison with numpy) ''' Module.__init__(self) self.fh, self.fw, self.fd, self.n = filtersize self.stride = stride # context sensitive variables self.ctx = ctx self.W = nd.random.normal(0,1./(self.fh*self.fw*self.fd)**.5, shape=filtersize, ctx=ctx, dtype=dtype) self.B = nd.zeros([self.n], ctx=ctx, dtype=dtype) self.Y = None self.Z = None # precision: self.dtype = dtype
# Deals with only one random variable import mxnet as mx from mxnet import nd import matplotlib from matplotlib import pyplot as plt num = 3000 probabilities = nd.ones(6) / 6 rolls = nd.sample_multinomial(probabilities, shape=(num)) counts = nd.zeros((6,num)) totals = nd.zeros(6) # Counting the number of trials at each step and the total number of rolls for i, roll in enumerate(rolls): totals[ int(roll.asscalar())] += 1 counts[:, i] = totals # Generating the probability at each instant by creating an array of 1-n x = nd.arange(num).reshape((1,num)) + 1 estimates = counts / x # print(estimates[:, 0]) # print(estimates[:, 1]) # print(estimates[:, num - 1]) # Plotting all of the choices and their probability plt.plot(estimates[0, :].asnumpy(), label="Estimated P(die=1)") plt.plot(estimates[1, :].asnumpy(), label="Estimated P(die=2)")
from mxnet import nd # basis of ndarray # 1. create ndarrays x = nd.arange(12) #print("the shape of x: %d" % x.shape) #print("The size of x: %d" % x.size) X = x.reshape((3, 4)) Z = nd.zeros((2, 3, 4)) O = nd.ones((3, 4)) Y = nd.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) R = nd.random.normal(0, 1, shape=(3, 4)) # 2. operate ndarrays Add = X + Y Mul = X * Y Div = X / Y Exp = Y.exp() # same as np.exp(Y) Dot = nd.dot(X, Y.T) CCT_0, CCT_1 = nd.concat(X, Y, dim=0), nd.concat(X, Y, dim=1) Eq = X == Y Sum = X.sum() # same as np.sum(X) Tran_scal = X.norm().asscalar() # 3. broadcasting A = nd.arange(3).reshape((3, 1)) B = nd.arange(2).reshape((1, 2)) broad_add = A + B # 4. index sub_array = X[1:3]
colors = ['blue', 'green', 'red', 'black', 'magenta'] #plt.imshow(nd.ones((n, n, 3)).asnumpy()) anchors = boxes[20, 20, :, :] for i in range(anchors.shape[0]): plt.gca().add_patch(box_to_rect(anchors[i,:]*n, colors[i])) #plt.show() from mxnet.gluon import nn def class_predictor(num_anchors, num_classes): """return a layer to predict classes""" return nn.Conv2D(num_anchors * (num_classes + 1), 3, padding=1) cls_pred = class_predictor(5, 10) cls_pred.initialize() x = nd.zeros((2, 3, 20, 20)) y = cls_pred(x) #print y.shape def box_predictor(num_anchors): """return a layer to predict delta locations""" return nn.Conv2D(num_anchors * 4, 3, padding=1) box_pred = box_predictor(10) box_pred.initialize() x = nd.zeros((2, 3, 20, 20)) y = box_pred(x) #print y.shape def down_sample(num_filters):
'''-----------------------------------------------------''' #从零开始构建模型 #获取数据 import sys sys.path.append('..') from utils import load_data_from_mnist batch_size = 256 train_data, test_data = load_data_from_mnist(batch_size) #定义模型 import mxnet as mx try: ctx = mx.gpu() _ = nd.zeros((1,), ctx=ctx) except: ctx = mx.cpu() #定义参数(LeNet) weight_scale = .01 #output channels = 20, kernel = (5,5) W1 = nd.random_normal(shape=(20,1,5,5), scale=weight_scale, ctx=ctx) b1 = nd.zeros(W1.shape[0], ctx=ctx) #output channels = 50, kernel = (3,3) W2 = nd.random_normal(shape=(50,20,3,3), scale=weight_scale, ctx=ctx) b1 = nd,zeros(W2.shape[0], ctx=ctx)
n_inputs = 200 true_w = nd.ones(shape=(n_inputs, 1)) * 0.01 true_b = 0.05 features = nd.random.normal(shape=(n_test + n_train, n_inputs)) labels = nd.dot(features, true_w) + true_b labels += nd.random.normal(scale=0.01, shape=labels.shape) train_features, test_features = features[:n_train, :], features[n_train:, :] train_labels, test_labels = labels[:n_train], labels[n_train:] # 【初始化模型参数】 w = nd.random.normal(scale=0.01, shape=(n_inputs, 1)) b = nd.zeros(shape=(1, )) w.attach_grad() b.attach_grad() # 【定义模型】 def net(X): return nd.dot(X, w) + b # 【定义L2范数惩罚项】 def l2_penalty(w):
def run_epoch(e, network, dataloader, trainer, log_dir, print_name, update_cnn, update_metric, save_cnn): ''' Run one epoch to train or test the SSD network Parameters ---------- e: int The epoch number network: nn.Gluon.HybridSequential The SSD network dataloader: gluon.data.DataLoader The train or testing dataloader that is wrapped around the iam_dataset log_dir: Str The directory to store the log files for mxboard print_name: Str Name to print for associating with the data. usually this will be "train" and "test" update_cnn: bool Boolean to indicate whether or not the CNN should be updated. Update_cnn should only be set to true for the training data save_cnn: bool Boolean to indicate whether or not to save the CNN. Returns ------- network: gluon.nn.HybridSequential The class predictor network ''' total_losses = [nd.zeros(1, ctx_i) for ctx_i in ctx] for i, (X, Y) in enumerate(dataloader): X = gluon.utils.split_and_load(X, ctx) Y = gluon.utils.split_and_load(Y, ctx) with autograd.record(): losses = [] for x, y in zip(X, Y): default_anchors, class_predictions, box_predictions = network( x) box_target, box_mask, cls_target = network.training_targets( default_anchors, class_predictions, y) # losses loss_class = cls_loss(class_predictions, cls_target) loss_box = box_loss(box_predictions, box_target, box_mask) # sum all losses loss = loss_class + loss_box losses.append(loss) if update_cnn: for loss in losses: loss.backward() step_size = 0 for x in X: step_size += x.shape[0] trainer.step(step_size) for index, loss in enumerate(losses): total_losses[index] += loss.mean() / len(ctx) if update_metric: cls_metric.update([cls_target], [nd.transpose(class_predictions, (0, 2, 1))]) box_metric.update([box_target], [box_predictions * box_mask]) if i == 0 and e % send_image_every_n == 0 and e > 0: cls_probs = nd.SoftmaxActivation(nd.transpose( class_predictions, (0, 2, 1)), mode='channel') output_image, number_of_bbs = generate_output_image( box_predictions, default_anchors, cls_probs, box_target, box_mask, cls_target, x, y) print("Number of predicted {} BBs = {}".format( print_name, number_of_bbs)) with SummaryWriter(logdir=log_dir, verbose=False, flush_secs=5) as sw: sw.add_image('bb_{}_image'.format(print_name), output_image, global_step=e) total_loss = 0 for loss in total_losses: total_loss = loss.asscalar() epoch_loss = float(total_loss) / len(dataloader) with SummaryWriter(logdir=log_dir, verbose=False, flush_secs=5) as sw: if update_metric: name1, val1 = cls_metric.get() name2, val2 = box_metric.get() sw.add_scalar(name1, {"test": val1}, global_step=e) sw.add_scalar(name2, {"test": val2}, global_step=e) sw.add_scalar('loss', {print_name: epoch_loss}, global_step=e) if save_cnn and e % save_every_n == 0 and e > 0: network.save_parameters("{}/{}".format(checkpoint_dir, checkpoint_name)) return epoch_loss
def set_ctx(self): try: self.__ctx = mx.gpu() _ = nd.zeros((1, ), ctx=self.__ctx) except: self.__ctx = mx.cpu()
import mxnet as mx import numpy as np import random import time dirTrain='D:\\image\\txt\\2l\\' ctx=mx.gpu() f=np.loadtxt(dirTrain+"image_train_features.txt",delimiter=' ') l=np.loadtxt(dirTrain+"image_train_labels.txt",delimiter=' ') features=nd.array(f).copyto(ctx) labels=nd.array(l).copyto(ctx) labels_test=nd.zeros(labels.shape,ctx) data_num=len(f) batch_size=500 dataset=gdata.ArrayDataset(features,labels) data_iter = gdata.DataLoader(dataset,batch_size,shuffle=True) net = nn.Sequential() net.add(nn.Dense(100,activation='relu'), nn.Dense(100,activation='relu'), nn.Dense(3)) net.initialize(init.Uniform(scale=20),ctx=ctx)
# 28 * 28 = 784(图片的分辨率为28 x 28,总像素为784,所以输入层大小为784) num_inputs = 784 # 设置输出层的大小为10(输出层10个类别,最终输出层的个数为10,这个是输入图像分类问题) num_outputs = 10 # shape=(num_inputs, num_outputs) # 784 x 10 # 生成初始数据W,W为均值是0,标准差为0.01的正态分布 # (随便填写什么数值,反正后面会用梯度下降来校验) W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs), ctx=mx.gpu()) # y = w(1)1 * x1 + w(1)2 * x2 + w(1)3 * x3 + ...... + w(1)784 * x784 + b(1) # 生成偏置数据,初始数值0(长度为10,分别为b(1).b(2).b(3).b(4)......b(10)) b = nd.zeros(num_outputs, ctx=mx.gpu()) # W生成梯度 W.attach_grad() # b生成梯度 b.attach_grad() ##-----X.sum(axis=1, keepdims=True)---------------------------------W ##-----求SUM值的测试例子-----------------------------------------------W ##------------------------------------------------------------------W X = nd.array([[1, 2, 3], [4, 5, 6]], ctx=mx.gpu()) # 按照列求值 # 1 2 3 # 4 5 6
def test(ctx,val_data): acc_top1.reset() acc_top5.reset() #L = gluon.loss.SoftmaxCrossEntropyLoss() #L2 = gluon.loss.L2Loss(weight=1.0) #L2.initialize() num_test_iter = len(val_data) val_d_loss = 0 val_epoch_loss = 0 val_mse_loss = 0 val_cheat_loss = 0 val_pre_loss = 0 for i, batch in enumerate(val_data): val_output = [] data, label = batch_fn(batch, ctx) for X, y in zip(data,label): X1 = takeT(X) X2 = takeT(X,T=opt.predict_T) X1 = X1.reshape((-1,) + X1.shape[2:]) # for reconstraction feed g X2 = X2.reshape((-1,) + X2.shape[2:]) # for prodiction feed d pred, latel = net(X1.astype(opt.dtype, copy=False)) val_output.append(pred) x_hat = net1(latel[0].astype(opt.dtype, copy=False), latel[1].astype(opt.dtype, copy=False),latel[2].astype(opt.dtype, copy=False)) # AutoGrad train d #with ag.record(): x_hat_reshape = nd.transpose(data=x_hat ,axes=(0,2,1,3,4)) x_hat_reshape = x_hat_reshape.reshape((-1,) + x_hat_reshape.shape[2:]) #.reshape x2_reshape = nd.transpose(data=X2,axes=(0,2,1,3,4)) x2_reshape = x2_reshape.reshape((-1,) + x2_reshape.shape[2:]) #.reshape d_pred_real = net2(x2_reshape.astype(opt.dtype, copy=False)) d_pred_fake = net2(x_hat_reshape.astype(opt.dtype, copy=False)) loss_d = loss_fn(d_pred_real,nd.ones(shape=(batch_size),ctx=ctx[0])) + loss_fn(d_pred_fake,nd.zeros(shape=(batch_size),ctx=ctx[0])) # train g loss_g_l2 = loss_l2(x_hat, X1.astype(opt.dtype, copy=False) ) + loss_l2(x_hat, X2.astype(opt.dtype, copy=False) ) loss_g_cheat = loss_fn(d_pred_fake, nd.ones(shape=(batch_size),ctx=ctx[0]) )#net2(x_hat_reshape)) loss_g_ft = loss_fn(pred, y.astype(opt.dtype, copy=False)) loss_g = loss_g_l2 + loss_g_cheat + loss_g_ft val_epoch_loss += loss_g.mean().asscalar() / len(label) val_d_loss += loss_d.mean().asscalar() / len(label) val_mse_loss += loss_g_l2.mean().asscalar() / len(label) val_cheat_loss += loss_g_cheat.mean().asscalar() / len(label) val_pre_loss += loss_g_ft.mean().asscalar() / len(label) acc_top1.update(label, val_output) acc_top5.update(label, val_output) _, top1 = acc_top1.get() _, top5 = acc_top5.get() val_dloss = val_d_loss / num_test_iter val_loss = val_epoch_loss / num_test_iter loss_mse = val_mse_loss / num_test_iter loss_pre = val_pre_loss / num_test_iter loss_cheat = val_cheat_loss / num_test_iter return (top1, top5, val_loss, loss_mse,loss_pre,loss_cheat,val_dloss)
with ag.record(): """ making x_hat : feeding X1 into net and net1 """ _ , latel = net(X1.astype(opt.dtype, copy=False)) #output.append(pred) x_hat = net1(latel[0].astype(opt.dtype, copy=False), latel[1].astype(opt.dtype, copy=False),latel[2].astype(opt.dtype, copy=False)) # train d x_hat_reshape = nd.transpose(data=x_hat ,axes=(0,2,1,3,4)) x_hat_reshape = x_hat_reshape.reshape((-1,) + x_hat_reshape.shape[2:]) #.reshape x2_reshape = nd.transpose(data=X2,axes=(0,2,1,3,4)) x2_reshape = x2_reshape.reshape((-1,) + x2_reshape.shape[2:]) #.reshape """ train discriminator """ d_pred_real = net2(x2_reshape.astype(opt.dtype, copy=False)) # feeding real X2 d_pred_fake = net2(x_hat_reshape.astype(opt.dtype, copy=False)) # feeding fake x_hat loss_d = loss_fn(d_pred_real,nd.ones(shape=(batch_size),ctx=ctx[0])) + loss_fn(d_pred_fake,nd.zeros(shape=(batch_size),ctx=ctx[0])) loss_d.backward() trainer_d.step(batch_size,ignore_stale_grad=True) # train g with ag.record(): """ generation x_hat : feeding X1 into net 4 predicting X2""" pred, latel = net(X1.astype(opt.dtype, copy=False)) output.append(pred) x_hat = net1(latel[0].astype(opt.dtype, copy=False), latel[1].astype(opt.dtype, copy=False),latel[2].astype(opt.dtype, copy=False)) """ reconstruction X1 and predicting X2 """ loss_g_l2 = loss_l2(x_hat, X1.astype(opt.dtype, copy=False) ) + loss_l2(x_hat, X2.astype(opt.dtype, copy=False) ) """ cheat discriminator """ loss_g_cheat = loss_fn(d_pred_fake, nd.ones(shape=(batch_size),ctx=ctx[0]) )#net2(x_hat_reshape)) """ finetuning btw """
if keep_prob == 0: return X.zeros_like() mask = nd.random.uniform(0, 1, X.shape) < keep_prob return mask * X / keep_prob X = nd.arange(16).reshape((2, 8)) print(dropout(X, 0)) print(dropout(X, 0.5)) print(dropout(X, 1)) # 定义模型参数 num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256 W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1)) b1 = nd.zeros(num_hiddens1) W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2)) b2 = nd.zeros(num_hiddens2) W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs)) b3 = nd.zeros(num_outputs) params = [W1, b1, W2, b2, W3, b3] for param in params: param.attach_grad() # 定义模型 drop_prob1, drop_prob2 = 0.2, 0.5 def net(X): X = X.reshape((-1, num_inputs))
def train_network(net, lr, input_shape, batch_size, train_path, test_path, epoch, ctx): train_data, val_data = prepare_data(train_path, test_path, input_shape, batch_size) for X, y in train_data: print("X shape {}, y shape", X.shape, y.shape) break net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) net.summary(nd.zeros(shape=(1, 3) + input_shape, ctx=ctx)) net.hybridize() lr_sched = mx.lr_scheduler.FactorScheduler(2000, factor=0.6, base_lr=1.0) optim = mx.optimizer.SGD(learning_rate=lr, momentum=0.9, wd=0.0001, lr_scheduler=lr_sched) trainer = gluon.Trainer(net.collect_params(), optim) loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() train_acc_meter = mx.metric.Accuracy() train_loss_meter = mx.metric.CrossEntropy() hybridized = False with mxboard.SummaryWriter(logdir="./vgg_logs", flush_secs=60) as sw: for ep in range(1, epoch + 1): epoch_start = timeit.default_timer() train_acc_meter.reset() train_loss_meter.reset() print("Current Learning Rate: {}".format(trainer.learning_rate)) for it, (data, label) in enumerate(train_data): data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output = net(data) loss_val = loss_fn(output, label) loss_val.backward() trainer.step(data.shape[0]) train_acc_meter.update(preds=[output], labels=[label]) train_loss_meter.update(labels=[label], preds=[nd.softmax(output, axis=1)]) if it % 10 == 0: print( "Epoch {}, batch {}, train loss {:.4f}, train acc {:.4f}" .format(ep, it, train_loss_meter.get()[1], train_acc_meter.get()[1])) nd.waitall() epoch_stop = timeit.default_timer() val_loss, val_acc = evaluate(val_data, net, ctx) nd.waitall() print( "Epoch {}, Training time {}, learning rate {}, validation loss {:.5f}, validatoin acc {:.5f}" .format(ep, epoch_stop - epoch_start, trainer.learning_rate, val_loss, val_acc)) sw.add_scalar(tag="train_loss", value=train_loss_meter.get()[1], global_step=ep) sw.add_scalar(tag="train_acc", value=train_acc_meter.get()[1], global_step=ep) sw.add_scalar(tag="val_acc", value=val_acc, global_step=ep) sw.add_scalar(tag="val_loss", value=val_loss, global_step=ep) sw.add_scalar(tag="learning_rate", value=trainer.learning_rate, global_step=ep) if not hybridized: sw.add_graph(net) hybridized = True if ep % 2 == 0: net.export("vgg_models/vgg", ep) return net
def get_linear_dense_input(self, input_sample): y = nd.zeros(shape=(input_sample.shape[0], 1), ctx=self.ctx) for single_feat in self.feature_dict['dense']: x = input_sample[:, single_feat.feat_name].reshape((-1,1)) y = nd.concat(y,x,dim=1) return y[:, 1:]
def render(self, bg, mode, pascal_rate=0.0, render_rate=1.0): ''' Parameters ---------- bg: mxnet.ndarray(4D) background array, dimension = bs * channel * h * w mode: str, {'train', 'valid'} use training dataset or not pascal: boolean use pascal_3D dataset or not render_rate: float probability of image contain a car pascal_rate: float probability of use pascal dataset Returns ---------- img_batch: mxnet.ndarray(4D) same as bg input label_batch: mxnet.ndarray(3D) bs * object * [cls, y(0~1), x(0~1), h(0~1), w(0~1), r(+-pi), all labels prob] ''' bs = len(bg) ctx = self.ctx mask = nd.zeros((bs, 3, self.h, self.w), ctx=ctx) img_batch = nd.zeros((bs, 3, self.h, self.w), ctx=ctx) label_batch = nd.ones((bs, 1, 6 + self.num_cls), ctx=ctx) * (-1) for i in range(bs): if np.random.rand() > render_rate: continue r1 = np.random.uniform(low=0.9, high=1.1) if np.random.rand() < pascal_rate: pil_img, r_box_l, r_box_t, r_box_r, r_box_b, r, \ img_cls, label_distribution = self._render_pascal(mode, r1) else: pil_img, r_box_l, r_box_t, r_box_r, r_box_b, r, \ img_cls, label_distribution = self._render_png(mode, r1) r_box_w = r_box_r - r_box_l # r_box_xx means after rotate r_box_h = r_box_b - r_box_t # r_box_xx means after rotate # -------------------- move -------------------- # paste_x = np.random.randint(low=int(-r_box_l - 0.3 * r_box_w), high=int(self.w - r_box_l - 0.7 * r_box_w)) paste_y = np.random.randint(low=int(-r_box_t - 0.3 * r_box_h), high=int(self.h - r_box_t - 0.7 * r_box_h)) box_y = (r_box_b + r_box_t) / 2. + paste_y box_x = (r_box_r + r_box_l) / 2. + paste_x box_h = float(r_box_b - r_box_t) box_w = float(r_box_r - r_box_l) # -------------------- -------------------- # tmp = PIL.Image.new('RGBA', (self.w, self.h)) tmp.paste(pil_img, (paste_x, paste_y)) fg = yolo_gluon.pil_rgb_2_rgb_ndarray(tmp, augs=self.augs) img_batch[i] = fg.as_in_context(ctx) m = yolo_gluon.pil_mask_2_rgb_ndarray(tmp.split()[-1]) mask[i] = m.as_in_context(ctx) label = nd.array([[ img_cls, box_y / self.h, box_x / self.w, box_h / self.h, box_w / self.w, r ]]) label = nd.concat(label, label_distribution, dim=-1) label_batch[i] = label #################################################################### img_batch = ((bg / 255.) * (1 - mask) + img_batch * mask) img_batch = nd.clip(img_batch, 0, 1) # 0~1 (batch_size, channels, h, w) return img_batch, label_batch
rect.xy[1], labels[i], va='center', ha='center', fontsize=9, color=text_color, bbox=dict(facecolor=color, lw=0)) bbox_scale = nd.array((w, h, w, h)) fig = plt.imshow(img) # show_boxes(fig.axes, boxes[250,250,:,:] * bbox_scale, # ['s=0.75,r=1','s=0.5,r=1','s=0.25,r=1','s=0.75,r=2','s=0.75,r=0.5']) # plt.show() ground_truth = nd.array([[0, 0.25, 0.1, 0.45, 0.42], [1, 0.55, 0.1, 0.75, 0.4]]) anchor = nd.array([[0, 0.1, 0.2, 0.3], [0.2, 0.1, 0.5, 0.9], [0.6, 0.1, 0.8, 0.5], [0.55, 0.3, 0.7, 0.5], [0.65, 0.15, 0.8, 0.9]]) show_boxes(fig.axes, ground_truth[:, 1:] * bbox_scale, labels=['dog', 'cat'], colors='k') show_boxes(fig.axes, anchor * bbox_scale, ['0', '1', '2', '3', '4']) # plt.show() labels = contrib.nd.MultiBoxTarget(anchor.expand_dims(axis=0), ground_truth.expand_dims(axis=0), nd.zeros((1, 3, 5)))
import d2lzh as d2l from mxnet import autograd, nd batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) num_input = 784 num_output = 10 W = nd.random.normal(scale=0.01, shape=(num_input, num_output)) b = nd.zeros(num_output) W.attach_grad() b.attach_grad() # softmax函数,对矩阵数据进行e^x def softmax(X): X_exp = X.exp() partition = X_exp.sum(axis=1, keepdims=True) return X_exp / partition X = nd.random.normal(shape=(2, 5)) X_prob = softmax(X) X_prob, X_prob.sum(axis=1) def net(X): return softmax(nd.dot(X.reshape((-1, num_input)), W) + b)
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps, num_hiddens, lr, clipping_theta, batch_size, vocab_size, pred_period, pred_len, prefixes, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(1, num_epochs + 1): if not is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) train_l_sum = nd.array([0], ctx=ctx) train_l_cnt = 0 for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx): if is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) else: state_h = state_h.detach() if is_lstm: state_c = state_c.detach() with autograd.record(): if is_lstm: outputs, state_h, state_c = rnn(get_inputs(X, vocab_size), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(X, vocab_size), state_h, *params) y = Y.T.reshape((-1, )) outputs = nd.concat(*outputs, dim=0) l = loss(outputs, y) l.backward() grad_clipping(params, clipping_theta, ctx) sgd(params, lr, 1) train_l_sum = train_l_sum + l.sum() train_l_cnt += l.size if epoch % pred_period == 0: print('\nepoch %d, perplexity %f' % (epoch, (train_l_sum / train_l_cnt).exp().asscalar())) for prefix in prefixes: print( ' - ', predict_rnn(rnn, prefix, pred_len, params, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
def _three(): return (_one( (num_inputs, num_hiddens)), _one( (num_hiddens, num_hiddens)), nd.zeros(num_hiddens, ctx=ctx))
_outputs = [] for X in _inputs: # compute INPUT gate from input and last/initial hidden state input_gate = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i) # compute FORGET gate from input and last/initial hidden state forget_gate = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f) # compute OUTPUT gate from input and last/initial hidden state output_gate = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o) # compute memory cell candidate from input and last/initial hidden state memory_cell_candidate = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c) # compute memory cell from last memory cell and memory cell candidate C = forget_gate * C + input_gate * memory_cell_candidate # compute hidden state from output gate and memory cell H = output_gate * nd.tanh(C) # compute output from hidden state Y = nd.dot(H, W_hy) + b_y _outputs.append(Y) return _outputs, H, C if __name__ == '__main__': initial_state_h = nd.zeros(shape=(config.batch_size, config.hidden_dim)) initial_state_c = nd.zeros(shape=(config.batch_size, config.hidden_dim)) dump_data = [nd.random_normal(shape=(config.batch_size, config.input_dim)) for _ in range(config.num_steps)] parameters = get_parameters() _outputs, final_state, memory_cell = lstm(dump_data, initial_state_h, initial_state_c, *parameters) print(_outputs, final_state, memory_cell)
def transform_preds(coords, center, scale, output_size): target_coords = nd.zeros(coords.shape) trans = get_affine_transform(center, scale, 0, output_size, inv=1) for p in range(coords.shape[0]): target_coords[p, 0:2] = affine_transform(coords[p, 0:2].asnumpy(), trans) return target_coords
######################## # run the model and generate sample text ######################## epochs = 2000 moving_loss = 0. learning_rate = 2.0 # state = nd.zeros(shape=(batch_size, num_hidden), ctx=ctx) for e in range(epochs): ############################ # Attenuate the learning rate by a factor of 2 every 100 epochs. ############################ if ((e+1) % 100 == 0): learning_rate = learning_rate / 2.0 h = nd.zeros(shape=(batch_size, num_hidden), ctx=ctx) c = nd.zeros(shape=(batch_size, num_hidden), ctx=ctx) for i in range(num_batches): data_one_hot = train_data[i] label_one_hot = train_label[i] with autograd.record(): outputs, h, c = gru_rnn(data_one_hot, h, c) loss = average_ce_loss(outputs, label_one_hot) loss.backward() SGD(params, learning_rate) ########################## # Keep a moving average of the losses ########################## if (i == 0) and (e == 0): moving_loss = nd.mean(loss).asscalar()
def check_ndarray_zeros(): a = nd.zeros(shape=LARGE_X) assert a[-1] == 0 assert a.shape == (LARGE_X, ) assert a.size == LARGE_X
def one_hots(numerical_list, vocab_size=vocab_size): result = nd.zeros((len(numerical_list), vocab_size), ctx=ctx) for i, idx in enumerate(numerical_list): result[i, idx] = 1.0 return result
def label_transform(label, classes): ind = label.astype('int') res = nd.zeros((ind.shape[0], classes), ctx = label.context) res[nd.arange(ind.shape[0], ctx = label.context), ind] = 1 return res
def init_rnn_state(batch_size, num_hiddens, ctx): return ( nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx), #初始化隐藏状态 nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx)) #初始化记忆细胞
def check_ones_like(): a = nd.zeros(LARGE_X) b = nd.ones_like(a) assert b[-1] == 1 assert b.shape == a.shape
exnd = nds_scale[__pw][:, :exin] nds_target[__pw] = nd.concat(*[exnd, nds_scale[__pw]], dim=1) else: exout = int(cout * expand) exin = int(cin * expand) exnd_0 = nds_scale[__pw][:exout] tmp = nd.concat(*[exnd_0, nds_scale[__pw]], dim=0) exnd_1 = tmp[:, :exin] nds_target[__pw] = nd.concat(*[exnd_1, tmp], dim=1) else: nds_target[__pw] = nds_scale[__w] #print('---------------------------') #print('{} : {}'.format(__w, nds_scale[__w].shape)) #print('{} : {}'.format(__pw, nds_target[__pw].shape)) nds_target['arg:fc7_bias'] = nd.zeros([1000, 1]) nd.save('mobilenet_v2_{}-0000.params'.format(scale), nds_target)
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, learning_rate, clipping_norm, batch_size, pred_period, pred_len, seqs, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() for e in range(1, epochs + 1): # If consecutive sampling is used, in the same epoch, the hidden state # is initialized only at the beginning of the epoch. if not is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) train_loss, num_examples = 0, 0 for data, label in data_iter(corpus_indices, batch_size, num_steps, ctx): # If random sampling is used, the hidden state has to be # initialized for each mini-batch. if is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) with autograd.record(): # outputs shape: (batch_size, vocab_size) if is_lstm: outputs, state_h, state_c = rnn(get_inputs(data), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(data), state_h, *params) # Let t_ib_j be the j-th element of the mini-batch at time i. # label shape: (batch_size * num_steps) # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]. label = label.T.reshape((-1,)) # Concatenate outputs: # shape: (batch_size * num_steps, vocab_size). outputs = nd.concat(*outputs, dim=0) # Now outputs and label are aligned. loss = softmax_cross_entropy(outputs, label) loss.backward() grad_clipping(params, clipping_norm, ctx) SGD(params, learning_rate) train_loss += nd.sum(loss).asscalar() num_examples += loss.size if e % pred_period == 0: print("Epoch %d. Training perplexity %f" % (e, exp(train_loss/num_examples))) for seq in seqs: print(' - ', predict_rnn(rnn, seq, pred_len, params, hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm)) print()
def init_momentum_states(): v_w = nd.zeros((features.shape[1], 1)) v_b = nd.zeros(1) return (v_w, v_b)
x = self.models(x) x = self.margin_inner_product(x, label) return self.softmax_loss(x, label) def _main_net(self, layer_type, feature_dim, label_num): model = nn.Sequential() if layer_type == "20layer": model.add( CNNResidualBlock(64, 64, 1), CNNResidualBlock(128, 128, 2), CNNResidualBlock(256, 256, 4), CNNResidualBlock(512, 512, 1) ) else: raise Exception("Unsupport layer type.") model.add(nn.Dense(feature_dim)) return model if __name__ == "__main__": margin_params = {"feature_dim": 512, "label_num": 6, "lamb_iter": 0, "lamb_base": 1000, "lamb_gamma": 0.12, "lamb_power": 1, "lamb_min": 10} margin_params["layer_type"] = "20layer" test = SphereFaceNet(512, 6, margin_params) print test test.initialize(ctx=mx.gpu()) x = nd.random.uniform(shape=(2, 3, 112, 112), ctx=mx.gpu()) label = nd.zeros([2], ctx=mx.gpu()) print test(x, label)
loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() # initialization g_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX) d_net.collect_params().initialize(mx.init.Xavier(), ctx=CTX) g_trainer = gluon.Trainer( g_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT}) d_trainer = gluon.Trainer( d_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT}) g_net.collect_params().zero_grad() d_net.collect_params().zero_grad() # define evaluation metric metric = mx.metric.CustomMetric(facc) # initialize labels real_label = nd.ones(BATCH_SIZE, CTX) fake_label = nd.zeros(BATCH_SIZE, CTX) for epoch in range(NUM_EPOCHS): for i, (d, _) in enumerate(train_data): # update D data = d.as_in_context(CTX) noise = nd.normal(loc=0, scale=1, shape=( BATCH_SIZE, Z_DIM, 1, 1), ctx=CTX) with autograd.record(): # train with real image output = d_net(data).reshape((-1, 1)) errD_real = loss(output, real_label) metric.update([real_label, ], [output, ]) # train with fake image fake_image = g_net(noise)
def forward(self, input_data): ep1 = input_data[:, 0].asnumpy().astype(np.int).tolist() ep2 = input_data[:, 1].asnumpy().astype(np.int).tolist() x_sen = input_data[:, 2:DIMENSION * FIXED_WORD_LENGTH + 2].reshape( (input_data.shape[0], FIXED_WORD_LENGTH, DIMENSION)) e1_start = DIMENSION * FIXED_WORD_LENGTH + 2 e1_infobox = input_data[:, e1_start:e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION].reshape( (input_data.shape[0], INFOBOX_LENGTH, INFOBOX_VALUE_LENGTH, DIMENSION)) # (batch_size,INFOBOX_LENGTH,INFOBOX_VALUE_LENGTH,100) e2_start = e1_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION e2_infobox = input_data[:, e2_start:e2_start + INFOBOX_LENGTH * INFOBOX_VALUE_LENGTH * DIMENSION].reshape( (input_data.shape[0], INFOBOX_LENGTH, INFOBOX_VALUE_LENGTH, DIMENSION)) # (batch_size,INFOBOX_LENGTH,INFOBOX_VALUE_LENGTH,100) h_sen = self.lstm(x_sen).expand_dims(axis=1) # (batch_size,60,128) be1_mask = nd.zeros(h_sen.shape, ctx=CTX) aes_mask = nd.zeros(h_sen.shape, ctx=CTX) be2_mask = nd.zeros(h_sen.shape, ctx=CTX) be1_pad = nd.ones(h_sen.shape, ctx=CTX) * (-100) aes_pad = nd.ones(h_sen.shape, ctx=CTX) * (-100) be2_pad = nd.ones(h_sen.shape, ctx=CTX) * (-100) for i in range(x_sen.shape[0]): if ep1[i] == 0: ep1[i] += 1 ep2[i] += 1 be1_mask[i, :, :ep1[i], :] = 1 be1_pad[i, :, :ep1[i], :] = 0 aes_mask[i, :, ep1[i]:ep2[i], :] = 1 aes_pad[i, :, ep1[i]:ep2[i], :] = 0 be2_mask[i, :, ep2[i]:, :] = 1 be2_pad[i, :, ep2[i]:, :] = 0 be1 = h_sen * be1_mask aes = h_sen * aes_mask be2 = h_sen * be2_mask be1 = be1 + be1_pad aes = aes + aes_pad be2 = be2 + be2_pad o1 = self.pool(be1) # (128, 1, 3, 128) o2 = self.pool(aes) o3 = self.pool(be2) y_sen = nd.concat(o1, o2, o3, dim=2) # (128, 384) y_out = self.sen_out(y_sen) e1_infobox_list_all = nd.ones((e1_infobox.shape[0], e1_infobox.shape[1], 51, 1), ctx=CTX) # (batch_size,INFOBOX_LENGTH,51,1) e2_infobox_list_all = nd.ones((e1_infobox.shape[0], e2_infobox.shape[1], 51, 1), ctx=CTX) # (batch_size,INFOBOX_LENGTH,51,1) for i in range(e1_infobox.shape[0]): e1 = self.conv1(x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e1_infobox[i].expand_dims(axis=1)) # e1_p = self.pool(e1) e1_infobox_list_all[i] = e1.reshape((e1.shape[1], e1.shape[2], e1.shape[3])) e2 = self.conv2(x_sen[i].expand_dims(axis=0).expand_dims(axis=1), e2_infobox[i].expand_dims(axis=1)) # e2_p = self.pool(e2) e2_infobox_list_all[i] = e2.reshape((e2.shape[1], e2.shape[2], e2.shape[3])) e1_infobox_list_all = e1_infobox_list_all.reshape( (e1_infobox.shape[0], e1_infobox.shape[1], -1)) # (batch_size,INFOBOX_LENGTH,51) e2_infobox_list_all = e2_infobox_list_all.reshape( (e2_infobox.shape[0], e2_infobox.shape[1], -1)) # (batch_size,INFOBOX_LENGTH,51) e1_infobox_list_all_new = self.dense1(e1_infobox_list_all) e2_infobox_list_all_new = self.dense2(e2_infobox_list_all) # g1 = nd.softmax(self.att(e1_infobox_list_all),axis=2) # (batch_size,INFOBOX_LENGTH,1) # g2 = nd.softmax(self.att(e2_infobox_list_all),axis=2) # (batch_size,INFOBOX_LENGTH,1) # g1_att = nd.batch_dot(nd.transpose(g1,axes = (0,2,1)),e1_infobox_list_all) # (batch_size,1,64) # g2_att = nd.batch_dot(nd.transpose(g2,axes = (0,2,1)),e2_infobox_list_all) # (batch_size,1,64) # g1_att = g1_att.reshape((g1_att.shape[0],-1)) # (batch_size,64) # g2_att = g2_att.reshape((g2_att.shape[0],-1)) # (batch_size,64) # (batch_size,128) e_infobox_list_all_att = nd.concat(e1_infobox_list_all_new, e2_infobox_list_all_new, dim=1) y_infobox = self.infobox_out(e_infobox_list_all_att) # h_sen_new = self.lstm_out(h_sen.expand_dims(1)) # h_sen_new = h_sen_new.reshape((h_sen_new.shape[0], -1)) # (batch_size,128) # (batch_size,256) h_sen_infobox = nd.concat(y_out, y_infobox, dim=1) # (128, 384) (128, 768) y = self.output(h_sen_infobox) return y
import mxnet as mx import os from mxnet import autograd, nd from mxboard import SummaryWriter import random train_data = nd.random.uniform(-1, 1, shape=(1000, 2)) true_w = nd.array([[5.3, 6.5]]) true_b = nd.array([[8.6]]) train_label = nd.dot(train_data, nd.transpose(true_w)) + true_b # print(train_label) weight = nd.random.normal(scale=1.0, shape=(1, 2)) bias = nd.zeros(shape=(1, 1)) # print(weight) # print(bias) def data_iter(datas, labels, batchsize): data_len = len(datas) indices = list(range(data_len)) random.shuffle(indices) for i in range(0, data_len, batchsize): j = nd.array(indices[i:min(i + batchsize, data_len)]) yield (datas.take(j), labels.take(j)) def mlp(x, w, b): return nd.dot(x, nd.transpose(w)) + b
def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, learning_rate, clipping_norm, batch_size, pred_period, pred_len, seqs, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): """Train an RNN model and predict the next item in the sequence.""" if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() for e in range(1, epochs + 1): # If consecutive sampling is used, in the same epoch, the hidden state # is initialized only at the beginning of the epoch. if not is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) train_loss, num_examples = 0, 0 for data, label in data_iter(corpus_indices, batch_size, num_steps, ctx): # If random sampling is used, the hidden state has to be # initialized for each mini-batch. if is_random_iter: state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) with autograd.record(): # outputs shape: (batch_size, vocab_size) if is_lstm: outputs, state_h, state_c = rnn(get_inputs(data), state_h, state_c, *params) else: outputs, state_h = rnn(get_inputs(data), state_h, *params) # Let t_ib_j be the j-th element of the mini-batch at time i. # label shape: (batch_size * num_steps) # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]. label = label.T.reshape((-1, )) # Concatenate outputs: # shape: (batch_size * num_steps, vocab_size). outputs = nd.concat(*outputs, dim=0) # Now outputs and label are aligned. loss = softmax_cross_entropy(outputs, label) loss.backward() grad_clipping(params, clipping_norm, ctx) SGD(params, learning_rate) train_loss += nd.sum(loss).asscalar() num_examples += loss.size if e % pred_period == 0: print("Epoch %d. Training perplexity %f" % (e, exp(train_loss / num_examples))) for seq in seqs: print( ' - ', predict_rnn(rnn, seq, pred_len, params, hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm)) print()
def init_rnn_state(batch_size, num_hiddens, ctx): return (nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx), ) # 元祖表示不会改变数组,用圆括号表示