def next(self): if not self.is_init: self.reset() self.is_init = True """Returns the next batch of data.""" #print('in next', self.cur, self.labelcur) self.nbatch+=1 batch_size = self.batch_size c, h, w = self.data_shape batch_data = nd.empty((batch_size, c, h, w)) if self.provide_label is not None: batch_label = nd.empty(self.provide_label[0][1]) i = 0 try: while i < batch_size: label, s, bbox, landmark = self.next_sample() #if label[1]>=0.0 or label[2]>=0.0: # print(label[0:10]) _data = self.imdecode(s) if self.rand_mirror: _rd = random.randint(0,1) if _rd==1: _data = mx.ndarray.flip(data=_data, axis=1) #_data = _data.astype('float32') #_data -= 127.5 #_data *= 0.0078125 if self.cutoff>0: centerh = random.randint(0, _data.shape[0]-1) centerw = random.randint(0, _data.shape[1]-1) half = self.cutoff//2 starth = max(0, centerh-half) endh = min(_data.shape[0], centerh+half) startw = max(0, centerw-half) endw = min(_data.shape[1], centerw+half) _data = _data.astype('float32') #print(starth, endh, startw, endw, _data.shape) _data[starth:endh, startw:endw, :] = 127.5 data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue #print('aa',data[0].shape) #data = self.augmentation_transform(data) #print('bb',data[0].shape) for datum in data: assert i < batch_size, 'Batch size must be multiples of augmenter output length' #print(datum.shape) batch_data[i][:] = self.postprocess_data(datum) batch_label[i][:] = label i += 1 except StopIteration: if i<batch_size: raise StopIteration return io.DataBatch([batch_data], [batch_label], batch_size - i)
def copy_param(exe, new_param=None): """Create copy of parameters""" if new_param is None: new_param = {k: nd.empty(v.shape, ctx=mx.cpu()) for k, v in exe.arg_dict.items()} for k, v in new_param.items(): exe.arg_dict[k].copyto(v) return new_param
def run_synthetic_SGLD(): theta1 = 0 theta2 = 1 sigma1 = numpy.sqrt(10) sigma2 = 1 sigmax = numpy.sqrt(2) X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100) minibatch_size = 1 total_iter_num = 1000000 lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num, factor=0.55) optimizer = mx.optimizer.create('sgld', learning_rate=None, rescale_grad=1.0, lr_scheduler=lr_scheduler, wd=0) updater = mx.optimizer.get_updater(optimizer) theta = mx.random.normal(0, 1, (2,), mx.cpu()) grad = nd.empty((2,), mx.cpu()) samples = numpy.zeros((2, total_iter_num)) start = time.time() for i in xrange(total_iter_num): if (i + 1) % 100000 == 0: end = time.time() print("Iter:%d, Time spent: %f" % (i + 1, end - start)) start = time.time() ind = numpy.random.randint(0, X.shape[0]) synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad= X.shape[0] / float(minibatch_size), grad=grad) updater('theta', grad, theta) samples[:, i] = theta.asnumpy() plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet) plt.colorbar() plt.show()
def __init__(self, data_shapes, sym_gen, params=None, aux_states=None, default_bucket_kwargs=None, learn_init_keys=None, initializer=mx.init.Xavier(factor_type="in", rnd_type="gaussian", magnitude=2), ctx=mx.gpu(), name='Net'): self.sym_gen = sym_gen bucket_kwargs = default_bucket_kwargs.copy() if \ default_bucket_kwargs is not None else dict() self.curr_bucket_key = None self.ctx = ctx self.name = name self.initializer = initializer if params is None: self.params = None self.params_grad = None else: self.params = OrderedDict([(k, v.copyto(ctx)) for k, v in params.items()]) self.params_grad = OrderedDict([(n, nd.empty(v.shape, ctx=ctx)) for n, v in self.params.items()]) if aux_states is not None: self.aux_states = OrderedDict([(k, v.copyto(ctx)) for k, v in aux_states.items()]) else: self.aux_states = None self._buckets = dict() self.learn_init_keys = learn_init_keys if learn_init_keys is not None else [] self.learn_init_key_shapes = {k: data_shapes[k] for k in self.learn_init_keys} self.switch_bucket(bucket_kwargs=bucket_kwargs, data_shapes=data_shapes) self.acc_grad = None
def get_executor(sym, ctx, data_inputs, initializer=None): data_shapes = {k: v.shape for k, v in data_inputs.items()} arg_names = sym.list_arguments() aux_names = sym.list_auxiliary_states() param_names = list(set(arg_names) - set(data_inputs.keys())) arg_shapes, output_shapes, aux_shapes = sym.infer_shape(**data_shapes) arg_name_shape = {k: s for k, s in zip(arg_names, arg_shapes)} params = {n: nd.empty(arg_name_shape[n], ctx=ctx) for n in param_names} params_grad = {n: nd.empty(arg_name_shape[n], ctx=ctx) for n in param_names} aux_states = {k: nd.empty(s, ctx=ctx) for k, s in zip(aux_names, aux_shapes)} exe = sym.bind(ctx=ctx, args=dict(params, **data_inputs), args_grad=params_grad, aux_states=aux_states) if initializer is not None: for k, v in params.items(): initializer(k, v) return exe, params, params_grad, aux_states
def get_executor(sym, ctx, data_inputs, initializer=None): data_shapes = {k: v.shape for k, v in data_inputs.items()} arg_names = sym.list_arguments() aux_names = sym.list_auxiliary_states() param_names = list(set(arg_names) - set(data_inputs.keys())) arg_shapes, output_shapes, aux_shapes = sym.infer_shape(**data_shapes) arg_name_shape = {k: s for k, s in zip(arg_names, arg_shapes)} params = {n: nd.empty(arg_name_shape[n], ctx=ctx) for n in param_names} params_grad = { n: nd.empty(arg_name_shape[n], ctx=ctx) for n in param_names } aux_states = { k: nd.empty(s, ctx=ctx) for k, s in zip(aux_names, aux_shapes) } exe = sym.bind(ctx=ctx, args=dict(params, **data_inputs), args_grad=params_grad, aux_states=aux_states) if initializer is not None: for k, v in params.items(): initializer(k, v) return exe, params, params_grad, aux_states
def next(self): """Returns the next batch of data.""" #print('next') batch_size = self.batch_size batch_data = nd.empty((batch_size,)+self.data_shape) batch_label = nd.empty((batch_size,)+self.label_shape) i = 0 #self.cutoff = random.randint(800,1280) try: while i < batch_size: #print('N', i) data, label = self.next_sample() data = nd.array(data) data = nd.transpose(data, axes=(2, 0, 1)) label = nd.array(label) #print(data.shape, label.shape) batch_data[i][:] = data batch_label[i][:] = label i += 1 except StopIteration: if not i: raise StopIteration return mx.io.DataBatch([batch_data], [batch_label], batch_size - i)
def next(self): """Returns the next batch of data.""" #print('next') batch_size = self.batch_size batch_data = nd.empty((batch_size, ) + self.data_shape) batch_label = nd.empty((batch_size, ) + self.label_shape) i = 0 #self.cutoff = random.randint(800,1280) try: while i < batch_size: #print('N', i) data, label = self.next_sample() data = nd.array(data) data = nd.transpose(data, axes=(2, 0, 1)) label = nd.array(label) #print(data.shape, label.shape) batch_data[i][:] = data batch_label[i][:] = label i += 1 except StopIteration: if not i: raise StopIteration return mx.io.DataBatch([batch_data], [batch_label], batch_size - i)
def backward(self, grad_out): in_data_nd, out_data_nd, degs = self.saved_tensors grad_in = nd.empty(in_data_nd.shape, ctx=grad_out.context, dtype=grad_out.dtype) if self.reducer == 'mean': grad_out = grad_out / degs grad_out_nd = zerocopy_to_dgl_ndarray(grad_out) K.backward_copy_reduce( self.reducer if self.reducer != 'mean' else 'sum', self.graph, self.target, in_data_nd, out_data_nd, grad_out_nd, zerocopy_to_dgl_ndarray_for_write(grad_in), self.in_map[1], self.out_map[1]) # clear saved tensors explicitly self.saved_tensors = None return grad_in
def backward(self, grad_out): lhs_data_nd, rhs_data_nd, out_data_nd, feat_shape = self.saved_tensors grad_out_nd = zerocopy_to_dgl_ndarray(grad_out) grad_lhs = nd.empty((lhs_data_nd.shape[0], ) + feat_shape, ctx=grad_out.context, dtype=grad_out.dtype) K.backward_lhs_binary_op_reduce( self.reducer, self.binary_op, self.graph, self.lhs, self.rhs, lhs_data_nd, rhs_data_nd, out_data_nd, grad_out_nd, zerocopy_to_dgl_ndarray_for_write(grad_lhs), self.lhs_map[1], self.rhs_map[1], self.out_map[1]) grad_lhs = _reduce_grad(grad_lhs, lhs_data_nd.shape) grad_rhs = nd.empty((rhs_data_nd.shape[0], ) + feat_shape, ctx=grad_out.context, dtype=grad_out.dtype) K.backward_rhs_binary_op_reduce( self.reducer, self.binary_op, self.graph, self.lhs, self.rhs, lhs_data_nd, rhs_data_nd, out_data_nd, grad_out_nd, zerocopy_to_dgl_ndarray_for_write(grad_rhs), self.lhs_map[1], self.rhs_map[1], self.out_map[1]) grad_rhs = _reduce_grad(grad_rhs, rhs_data_nd.shape) # clear saved tensors explicitly self.saved_tensors = None return grad_lhs, grad_rhs
def load_bin(path, image_size): bins, issame_list = pickle.load(open(path, 'rb')) data_list = [] for flip in [0,1]: data = nd.empty((len(issame_list)*2, 3, image_size[0], image_size[1])) data_list.append(data) for i in xrange(len(issame_list)*2): _bin = bins[i] img = mx.image.imdecode(_bin) img = nd.transpose(img, axes=(2, 0, 1)) for flip in [0,1]: if flip==1: img = mx.ndarray.flip(data=img, axis=2) data_list[flip][i][:] = img if i%1000==0: print('loading bin', i) print(data_list[0].shape) return (data_list, issame_list)
def load_bin(path, image_size): bins, issame_list = pickle.load(open(path, 'rb')) data_list = [] for flip in [0, 1]: data = nd.empty((len(issame_list) * 2, 3, image_size[0], image_size[1])) data_list.append(data) for i in range(len(issame_list) * 2): _bin = bins[i] img = mx.image.imdecode(_bin) img = nd.transpose(img, axes=(2, 0, 1)) for flip in [0, 1]: if flip == 1: img = mx.ndarray.flip(data=img, axis=2) data_list[flip][i][:] = img if i % 1000 == 0: print('loading bin', i) print(data_list[0].shape) return (data_list, issame_list)
def hybrid_forward(self, F, x, step=0, alpha=-1, noise=None, mean_style=None, style_weight=0, mixing_range=(-1, -1)): styles = [] if type(x) not in (list, tuple): x = [x] for i in x: styles.append(self.style(i)) batch = x[0].shape[0] if noise is None: noise = [] for i in range(step + 1): size = 4 * 2**i noise.append( nd.random.randn(batch, 1, size, size, ctx=x[0].context)) if mean_style is not None: styles_norm = [] for style in styles: styles_norm.append(mean_style + style_weight * (style - mean_style)) styles = styles_norm nd_styles = nd.empty( (len(styles), styles[0].shape[0], styles[0].shape[1])) for i, style in enumerate(styles): nd_styles[i] = style return self.generator(nd_styles, noise, step, alpha, mixing_range)
def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None): """Get synthetic gradient value""" if grad is None: grad = nd.empty(theta.shape, theta.context) theta1 = theta.asnumpy()[0] theta2 = theta.asnumpy()[1] v1 = sigma1 ** 2 v2 = sigma2 ** 2 vx = sigmax ** 2 denominator = numpy.exp(-(X - theta1) ** 2 / (2 * vx)) + numpy.exp( -(X - theta1 - theta2) ** 2 / (2 * vx)) grad_npy = numpy.zeros(theta.shape) grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (X - theta1 - theta2) / vx) / denominator).sum() + theta1 / v1 grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (X - theta1 - theta2) / vx) / denominator).sum() + theta2 / v2 grad[:] = grad_npy return grad
def test1(): a = mx.symbol.Variable('a') b = mx.symbol.sum_mid_internal(a) a_npy = numpy.random.rand(120, 111, 12) a_grad = nd.empty((120, 111, 12)) b_grad_npy = numpy.random.rand(120, 12) net = b.bind(mx.gpu(), args={'a': nd.array(a_npy)}, args_grad={'a': a_grad}) net.forward(is_train=True) print numpy.square(net.outputs[0].asnumpy() - a_npy.sum(axis=1)).sum() net.backward(out_grads=nd.array(b_grad_npy)) print numpy.square(a_grad.asnumpy() - b_grad_npy.reshape((120, 1, 12))).sum()
def run_synthetic_SGLD(): """Run synthetic SGLD""" theta1 = 0 theta2 = 1 sigma1 = numpy.sqrt(10) sigma2 = 1 sigmax = numpy.sqrt(2) X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100) minibatch_size = 1 total_iter_num = 1000000 lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num, factor=0.55) optimizer = mx.optimizer.create('sgld', learning_rate=None, rescale_grad=1.0, lr_scheduler=lr_scheduler, wd=0) updater = mx.optimizer.get_updater(optimizer) theta = mx.random.normal(0, 1, (2, ), mx.cpu()) grad = nd.empty((2, ), mx.cpu()) samples = numpy.zeros((2, total_iter_num)) start = time.time() for i in range(total_iter_num): if (i + 1) % 100000 == 0: end = time.time() print("Iter:%d, Time spent: %f" % (i + 1, end - start)) start = time.time() ind = numpy.random.randint(0, X.shape[0]) synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=X.shape[0] / float(minibatch_size), grad=grad) updater('theta', grad, theta) samples[:, i] = theta.asnumpy() plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet) plt.colorbar() plt.show()
def load_dataset_bin(self): name = 'lfw' path = os.path.join(self.lfw_dir, name+".bin") bins, issame_list = pickle.load(open(path, 'rb')) data_list = [] for flip in [0,1]: data = nd.empty((len(issame_list)*2, 3, self.image_size[0], self.image_size[1])) data_list.append(data) for i in xrange(len(issame_list)*2): _bin = bins[i] img = mx.image.imdecode(_bin) img = nd.transpose(img, axes=(2, 0, 1)) for flip in [0,1]: if flip==1: img = mx.ndarray.flip(data=img, axis=2) data_list[flip][i][:] = img if i%1000==0: print('loading bin', i) print(data_list[0].shape) return (data_list, issame_list)
def __init__(self, data_shapes, sym_gen, params=None, aux_states=None, default_bucket_kwargs=None, learn_init_keys=None, initializer=mx.init.Xavier(factor_type="in", rnd_type="gaussian", magnitude=2), ctx=mx.gpu(), name='Net'): self.sym_gen = sym_gen bucket_kwargs = default_bucket_kwargs.copy() if \ default_bucket_kwargs is not None else dict() self.curr_bucket_key = None self.ctx = ctx self.name = name self.initializer = initializer if params is None: self.params = None self.params_grad = None else: self.params = OrderedDict([(k, v.copyto(ctx)) for k, v in params.items()]) self.params_grad = OrderedDict([(n, nd.empty(v.shape, ctx=ctx)) for n, v in self.params.items()]) if aux_states is not None: self.aux_states = OrderedDict([(k, v.copyto(ctx)) for k, v in aux_states.items()]) else: self.aux_states = None self._buckets = dict() self.learn_init_keys = learn_init_keys if learn_init_keys is not None else [] self.learn_init_key_shapes = { k: data_shapes[k] for k in self.learn_init_keys } self.switch_bucket(bucket_kwargs=bucket_kwargs, data_shapes=data_shapes) self.acc_grad = None
def load_bin(path, image_size): # path为: ../datasets/faces_ms1m_112x112/lfw.bin # image_size为: [112, 112] #bins, issame_list = pickle.load(open(path, 'rb'),encoding='bytes') bins, issame_list = pickle.load(open(path, 'rb')) # bins存储图片对数据,issame_list存储标签 print('len(bins):', len(bins)) # sys.exit() # print('issame_list:',issame_list) # sys.exit() # pdb.set_trace() data_list = [] for _ in [0, 1]: data = nd.empty((len(issame_list) * 2, 3, image_size[0], image_size[1])) # (12000L, 3L, 112L, 112L)的NDArray data_list.append(data) for i in range(len(issame_list) * 2): # 12000 _bin = bins[i] img = mx.image.imdecode(_bin) # print(img.shape) img = mx.image.imresize(img, 96, 112) #img = mx.image.imresize(img,96,112) # print(img.shape) img = nd.transpose(img, axes=(2, 0, 1)) for flip in [0, 1]: if flip == 1: img = mx.ndarray.flip(data=img, axis=2) # 水平翻转 data_list[flip][i][:] = img if i % 1000 == 0: print('loading bin', i) print('issame_list[0]:', issame_list[0]) # sys.exit() # data_list: 两个元素,每个元素为(12000L, 3L, 112L, 112L)的NDArray # issame_list: 长度为6000的列表,每个元素为bool return (data_list, issame_list)
def load_bin(path, image_size): try: with open(path, 'rb') as f: bins, issame_list = pickle.load(f) #py2 except UnicodeDecodeError as e: with open(path, 'rb') as f: bins, issame_list = pickle.load(f, encoding='bytes') #py3 data_list = [] for flip in [0, 1]: data = nd.empty( (len(issame_list) * 2, 3, image_size[0], image_size[1])) data_list.append(data) for i in range(len(issame_list) * 2): _bin = bins[i] img = mx.image.imdecode(_bin) if img.shape[1] != image_size[0]: img = mx.image.resize_short(img, image_size[0]) img = nd.transpose(img, axes=(2, 0, 1)) for flip in [0, 1]: if flip == 1: img = mx.ndarray.flip(data=img, axis=2) data_list[flip][i][:] = img return (data_list, issame_list)
def load_dataset(lfw_dir, image_size): lfw_pairs = read_pairs(os.path.join(lfw_dir, 'pairs.txt')) lfw_paths, issame_list = get_paths(lfw_dir, lfw_pairs, 'jpg') lfw_data_list = [] for flip in [0,1]: lfw_data = nd.empty((len(lfw_paths), 3, image_size[0], image_size[1])) lfw_data_list.append(lfw_data) i = 0 for path in lfw_paths: with open(path, 'rb') as fin: _bin = fin.read() img = mx.image.imdecode(_bin) img = nd.transpose(img, axes=(2, 0, 1)) for flip in [0,1]: if flip==1: img = mx.ndarray.flip(data=img, axis=2) lfw_data_list[flip][i][:] = img i+=1 if i%1000==0: print('loading lfw', i) print(lfw_data_list[0].shape) print(lfw_data_list[1].shape) return (lfw_data_list, issame_list)
def load_dataset(lfw_dir, image_size): lfw_pairs = read_pairs(os.path.join(lfw_dir, 'pairs.txt')) lfw_paths, issame_list = get_paths(lfw_dir, lfw_pairs, 'jpg') lfw_data_list = [] for flip in [0, 1]: lfw_data = nd.empty((len(lfw_paths), 3, image_size[0], image_size[1])) lfw_data_list.append(lfw_data) i = 0 for path in lfw_paths: with open(path, 'rb') as fin: _bin = fin.read() img = mx.image.imdecode(_bin) img = nd.transpose(img, axes=(2, 0, 1)) for flip in [0, 1]: if flip == 1: img = mx.ndarray.flip(data=img, axis=2) lfw_data_list[flip][i][:] = img i += 1 if i % 1000 == 0: print('loading lfw', i) print(lfw_data_list[0].shape) print(lfw_data_list[1].shape) return (lfw_data_list, issame_list)
def test2(): a = mx.symbol.Variable('a') c = mx.symbol.Variable('c') b = mx.symbol.sum_mid_internal(a) d = mx.symbol.sum(mx.symbol.square(b - c)) d = mx.symbol.MakeLoss(mx.symbol.Reshape(d, shape=(1, 1))) a_npy = numpy.random.rand(120, 111, 12) c_npy = numpy.random.rand(120, 12) a_grad = nd.empty((120, 111, 12)) a_ndarray = nd.array(a_npy) net = d.bind(mx.gpu(), args={ 'a': a_ndarray, 'c': nd.array(c_npy) }, args_grad={'a': a_grad}) lr = 0.001 for i in range(100): net.forward(is_train=True) loss = net.outputs[0].asnumpy() print loss net.backward() a_ndarray -= lr * a_grad
def copy_param(exe, new_param=None): if new_param is None: new_param = {k: nd.empty(v.shape, ctx=mx.cpu()) for k,v in exe.arg_dict.items()} for k, v in new_param.items(): exe.arg_dict[k].copyto(v) return new_param
def load_bin(bin_data_file_path, bin_name_list, image_shape=(112, 112, 3)): """ 加载 .bin data :param bin_data_file_path: :param bin_name_list: :param image_shape: :return: """ # 保存加载.bin,其中保存则data_set,data_set有两个元素, # 一个表示两张图片像素,一个表示两张图片是否相同 ver_list = [] for bin_name in bin_name_list: bin_data_path = os.path.join(bin_data_file_path, bin_name) if os.path.exists(bin_data_path): print("loading {} data.....".format(bin_name)) with open(bin_data_path, "rb") as file: bins, is_same_list = pickle.load(file, encoding="bytes") pass print("bins_len: {}, is_same_list_len: {}".format( len(bins), len(is_same_list))) is_same_list_len = len(is_same_list) # 使用 data_len 避免 data 为单数的时候,没对应有 is_same_list 信息 data_len = is_same_list_len * 2 data_list = [] for _ in [0, 1]: data = nd.empty( (data_len, image_shape[2], image_shape[0], image_shape[1])) data_list.append(data) pass for i in range(data_len): bin_data = bins[i] image = mx.image.imdecode(bin_data) if image.shape[1] != image_shape[0]: image = mx.image.resize_short(image, image_shape[0]) pass image = nd.transpose(image, axes=(2, 0, 1)) for flip in [0, 1]: if flip == 1: image = mx.ndarray.flip(data=image, axis=2) pass data_list[flip][i][:] = image pass if i % 1000 == 0: print("loading {} bin....".format(i)) pass # break pass print("{} {}".format(bin_name, data_list[0].shape)) ver_list.append((data_list, is_same_list)) pass # break pass return ver_list pass
def next(self): """Returns the next batch of data.""" # print('next') batch_size = self.batch_size batch_data = nd.empty((batch_size, ) + self.data_shape) batch_label = nd.empty((batch_size, ) + self.label_shape) if self.use_coherent: batch_label2 = nd.empty((batch_size, ) + self.label_shape) batch_coherent_label = nd.empty((batch_size, 6)) i = 0 # self.cutoff = random.randint(800,1280) try: while i < batch_size: # print('N', i) data, label, annot = self.next_sample() if not self.use_coherent: R = self.do_aug(data, label, annot) if R is None: continue data, label = R # data, label, data2, label2, M = R # ind = np.unravel_index(np.argmax(label[0], axis=None), label[0].shape) # print(label.shape, np.count_nonzero(label[0]), ind) # print(label[0,25:35,0:10]) data = nd.array(data) data = nd.transpose(data, axes=(2, 0, 1)) label = nd.array(label) # print(data.shape, label.shape) try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue batch_data[i][:] = data batch_label[i][:] = label i += 1 else: R = self.do_aug(data, label, annot) if R is None: continue data, label, data2, label2, M = R data = nd.array(data) data = nd.transpose(data, axes=(2, 0, 1)) label = nd.array(label) data2 = nd.array(data2) data2 = nd.transpose(data2, axes=(2, 0, 1)) label2 = nd.array(label2) M = nd.array(M) # print(data.shape, label.shape) try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue batch_data[i][:] = data batch_label[i][:] = label # batch_label2[i][:] = label2 batch_coherent_label[i][:] = M # i+=1 j = i + self.per_batch_size // 2 batch_data[j][:] = data2 batch_label[j][:] = label2 batch_coherent_label[j][:] = M i += 1 if j % self.per_batch_size == self.per_batch_size - 1: i = j + 1 except StopIteration: if not i: raise StopIteration # return {self.data_name : batch_data, # self.label_name : batch_label} # print(batch_data.shape, batch_label.shape) if not self.use_coherent: return mx.io.DataBatch([batch_data], [batch_label], batch_size - i) else: # return mx.io.DataBatch([batch_data], [batch_label, batch_label2, batch_coherent_label], batch_size - i) return mx.io.DataBatch([batch_data], [batch_label, batch_coherent_label], batch_size - i)
def next(self): if not self.is_init: self.reset() self.is_init = True """Returns the next batch of data.""" #print('in next', self.cur, self.labelcur) self.nbatch+=1 batch_size = self.batch_size c, h, w = self.data_shape batch_data = nd.empty((batch_size, c, h, w)) if self.provide_label is not None: batch_label = nd.empty(self.provide_label[0][1]) i = 0 try: while i < batch_size: label, s, bbox, landmark = self.next_sample() _data = self.imdecode(s) if self.rand_mirror: _rd = random.randint(0,1) if _rd==1: _data = mx.ndarray.flip(data=_data, axis=1) if self.nd_mean is not None: _data = _data.astype('float32') _data -= self.nd_mean _data *= 0.0078125 #_npdata = _data.asnumpy() #if landmark is not None: # _npdata = face_preprocess.preprocess(_npdata, bbox = bbox, landmark=landmark, image_size=self.image_size) #if self.rand_mirror: # _npdata = self.mirror_aug(_npdata) #if self.mean is not None: # _npdata = _npdata.astype(np.float32) # _npdata -= self.mean # _npdata *= 0.0078125 #nimg = np.zeros(_npdata.shape, dtype=np.float32) #nimg[self.patch[1]:self.patch[3],self.patch[0]:self.patch[2],:] = _npdata[self.patch[1]:self.patch[3], self.patch[0]:self.patch[2], :] #_data = mx.nd.array(nimg) data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue #print('aa',data[0].shape) #data = self.augmentation_transform(data) #print('bb',data[0].shape) for datum in data: assert i < batch_size, 'Batch size must be multiples of augmenter output length' #print(datum.shape) batch_data[i][:] = self.postprocess_data(datum) if self.provide_label is not None: if not self.coco_mode: if len(batch_label.shape)==1: batch_label[i][:] = label else: for ll in xrange(batch_label.shape[1]): v = label[ll] if ll>0: c2c = v #m = min(0.55, max(0.3,math.log(c2c+1)*4-1.85)) #v = math.cos(m) #v = v*v #_param = [0.5, 0.3, 0.85, 0.7] _param = [0.5, 0.25, 0.85, 0.65] _a = (_param[1]-_param[0])/(_param[3]-_param[2]) m = _param[1]+_a*(c2c-_param[3]) m = min(_param[0], max(_param[1],m)) #m = 0.5 #if c2c<0.77: # m = 0.3 #elif c2c<0.82: # m = 0.4 #elif c2c>0.88: # m = 0.55 v = math.cos(m) v = v*v #print('c2c', i,c2c,m,v) batch_label[i][ll] = v else: batch_label[i][:] = (i%self.per_batch_size)//self.images_per_identity i += 1 except StopIteration: if i<batch_size: raise StopIteration #print('next end', batch_size, i) _label = None if self.provide_label is not None: _label = [batch_label] if self.data_extra is not None: return io.DataBatch([batch_data, self.data_extra], _label, batch_size - i) else: return io.DataBatch([batch_data], _label, batch_size - i)
def next(self): if not self.is_init: self.reset() self.is_init = True """Returns the next batch of data.""" #print('in next', self.cur, self.labelcur) self.nbatch += 1 batch_size = self.batch_size c, h, w = self.data_shape batch_data = nd.empty((batch_size, c, h, w)) if self.provide_label is not None: batch_label = nd.empty(self.provide_label[0][1]) i = 0 try: while i < batch_size: label, s, bbox, landmark = self.next_sample() _data = self.imdecode(s) if self.rand_mirror: _rd = random.randint(0, 1) if _rd == 1: _data = mx.ndarray.flip(data=_data, axis=1) if self.nd_mean is not None: _data = _data.astype('float32') _data -= self.nd_mean _data *= 0.0078125 #_npdata = _data.asnumpy() #if landmark is not None: # _npdata = face_preprocess.preprocess(_npdata, bbox = bbox, landmark=landmark, image_size=self.image_size) #if self.rand_mirror: # _npdata = self.mirror_aug(_npdata) #if self.mean is not None: # _npdata = _npdata.astype(np.float32) # _npdata -= self.mean # _npdata *= 0.0078125 #nimg = np.zeros(_npdata.shape, dtype=np.float32) #nimg[self.patch[1]:self.patch[3],self.patch[0]:self.patch[2],:] = _npdata[self.patch[1]:self.patch[3], self.patch[0]:self.patch[2], :] #_data = mx.nd.array(nimg) data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue #print('aa',data[0].shape) #data = self.augmentation_transform(data) #print('bb',data[0].shape) for datum in data: assert i < batch_size, 'Batch size must be multiples of augmenter output length' #print(datum.shape) batch_data[i][:] = self.postprocess_data(datum) if self.provide_label is not None: if not self.coco_mode: if len(batch_label.shape) == 1: batch_label[i][:] = label else: for ll in xrange(batch_label.shape[1]): v = label[ll] if ll > 0: c2c = v #m = min(0.55, max(0.3,math.log(c2c+1)*4-1.85)) #v = math.cos(m) #v = v*v #_param = [0.5, 0.3, 0.85, 0.7] _param = [0.5, 0.4, 0.85, 0.75] #_param = [0.55, 0.4, 0.9, 0.75] _a = (_param[1] - _param[0]) / ( _param[3] - _param[2]) m = _param[1] + _a * (c2c - _param[3]) m = min(_param[0], max(_param[1], m)) #m = 0.5 #if c2c<0.77: # m = 0.3 #elif c2c<0.82: # m = 0.4 #elif c2c>0.88: # m = 0.55 v = math.cos(m) v = v * v #print('c2c', i,c2c,m,v) batch_label[i][ll] = v else: batch_label[i][:] = (i % self.per_batch_size ) // self.images_per_identity i += 1 except StopIteration: if i < batch_size: raise StopIteration #print('next end', batch_size, i) _label = None if self.provide_label is not None: _label = [batch_label] if self.data_extra is not None: return io.DataBatch([batch_data, self.data_extra], _label, batch_size - i) else: return io.DataBatch([batch_data], _label, batch_size - i)
def main(args): # sys.path.append("/home/gaomingda/insightface/recognition") from image_iter import FaceImageIter global image_shape global net print(args) ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) image_shape = [int(x) for x in args.image_size.split(',')] vec = args.model.split(',') assert len(vec)>1 prefix = vec[0] epoch = int(vec[1]) print('loading',prefix, epoch) net = edict() net.ctx = ctx net.sym, net.arg_params, net.aux_params = mx.model.load_checkpoint(prefix, epoch) #net.arg_params, net.aux_params = ch_dev(net.arg_params, net.aux_params, net.ctx) all_layers = net.sym.get_internals() net.sym = all_layers['fc1_output'] net.model = mx.mod.Module(symbol=net.sym, context=net.ctx, label_names = None) net.model.bind(data_shapes=[('data', (args.batch_size, 3, image_shape[1], image_shape[2]))]) net.model.set_params(net.arg_params, net.aux_params) train_dataiter = FaceImageIter( batch_size=4, data_shape=(3, 112, 112), path_imgrec=args.input_data, shuffle=True, rand_mirror=False, mean=None, cutoff=False, color_jittering=0, images_filter=0, ) data_size = train_dataiter.num_samples() i = 0 fstart = 0 features_all = np.zeros((data_size, 512), dtype=np.float32) features_all_flip = np.zeros((data_size, 512), dtype=np.float32) # features_all = np.zeros((102, 512), dtype=np.float32) # features_all_flip = np.zeros((102, 512), dtype=np.float32) data_buff = nd.empty((args.batch_size, 3, 112, 112)) count = 0 for i in range(train_dataiter.num_samples()): if i%1000==0: print("processing ",i) label, s, box, landmark = train_dataiter.next_sample() img = train_dataiter.imdecode(s) img = nd.transpose(img, axes=(2, 0, 1)) data_buff[count] = img count += 1 if count==args.batch_size: embedding = get_feature(data_buff, args.batch_size) count = 0 fend = fstart+embedding.shape[0] #print('writing', fstart, fend) features_all[fstart:fend,:] = embedding # flipped image data_buff_flip = mx.ndarray.flip(data=data_buff, axis=3) embedding_fliped = get_feature(data_buff_flip, args.batch_size) features_all_flip[fstart:fend, :] = embedding_fliped fstart = fend # if i==102: # break if count>0: embedding = get_feature(data_buff, args.batch_size) fend = fstart+count print('writing', fstart, fend) features_all[fstart:fend,:] = embedding[:count, :] # flipped image data_buff_flip = mx.ndarray.flip(data=data_buff, axis=3) embedding_fliped = get_feature(data_buff_flip, args.batch_size) features_all_flip[fstart:fend, :] = embedding_fliped[:count, :] # write_bin(args.output, features_all) #os.system("bypy upload %s"%args.output) print("save features ...") features_all.tofile('train_features_oct200') print("save train_features_flip ...") features_all_flip.tofile('train_features_flip_oct200')
def next(self): if not self.is_init: self.reset() self.is_init = True """Returns the next batch of data.""" # print('in next', self.cur, self.labelcur) self.nbatch += 1 batch_size = self.batch_size c, h, w = self.data_shape batch_data = nd.empty((batch_size, c, h, w)) if self.provide_label is not None: batch_label = nd.empty(self.provide_label[0][1]) i = 0 try: while i < batch_size: label, s, bbox, landmark = self.next_sample() _data = self.imdecode(s) if _data.shape[0] != self.data_shape[1]: _data = mx.image.resize_short(_data, self.data_shape[1]) if self.rand_mirror: _rd = random.randint(0, 5) # change sai if _rd == 1: _data = mx.ndarray.flip(data=_data, axis=1) if self.color_jittering > 0: if self.color_jittering > 1: _rd = random.randint(0, 1) if _rd == 1: _data = self.compress_aug(_data) _rd = random.randint(0, 5) # change sai if _rd == 1: _data = _data.astype('float32', copy=False) _data = self.color_aug(_data, 0.125) if self.nd_mean is not None: _data = _data.astype('float32', copy=False) _data -= self.nd_mean _data *= 0.0078125 if self.cutoff > 0: _rd = random.randint(0, 10) # change sai if _rd == 1: rate = random.randint(1, 5) xmin = rate ymin = rate xmax = int(self.data_shape[1]) - rate ymax = int(self.data_shape[2]) - rate _data = _data[ymin:ymax, xmin:xmax, :] _data = mx.image.resize_short(_data, self.data_shape[1]) if self.shelter: _rd = random.randint(0, 10) # change sai if _rd == 1: # change sai xmin = random.randint(15, 100) ymin = random.randint(15, 100) xmax = xmin + random.randint(5, 10) ymax = ymin + random.randint(5, 10) _data = _data.astype('float32') _data[ymin:ymax, xmin:xmax, :] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue # print('aa',data[0].shape) # data = self.augmentation_transform(data) # print('bb',data[0].shape) for datum in data: assert i < batch_size, 'Batch size must be multiples of augmenter output length' # print(datum.shape) batch_data[i][:] = self.postprocess_data(datum) batch_label[i][:] = label i += 1 except StopIteration: if i < batch_size: raise StopIteration return io.DataBatch([batch_data], [batch_label], batch_size - i)
def next(self): """Returns the next batch of data.""" #print('next') batch_size = self.batch_size batch_data = nd.empty((batch_size,)+self.data_shape) batch_label = nd.empty((batch_size,)+self.label_shape) if self.use_coherent: batch_label2 = nd.empty((batch_size,)+self.label_shape) batch_coherent_label = nd.empty((batch_size,6)) i = 0 #self.cutoff = random.randint(800,1280) try: while i < batch_size: #print('N', i) data, label, annot = self.next_sample() if not self.use_coherent: R = self.do_aug(data, label, annot) if R is None: continue data, label = R #data, label, data2, label2, M = R #ind = np.unravel_index(np.argmax(label[0], axis=None), label[0].shape) #print(label.shape, np.count_nonzero(label[0]), ind) #print(label[0,25:35,0:10]) data = nd.array(data) data = nd.transpose(data, axes=(2, 0, 1)) label = nd.array(label) #print(data.shape, label.shape) try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue batch_data[i][:] = data batch_label[i][:] = label i += 1 else: R = self.do_aug(data, label, annot) if R is None: continue data, label, data2, label2, M = R data = nd.array(data) data = nd.transpose(data, axes=(2, 0, 1)) label = nd.array(label) data2 = nd.array(data2) data2 = nd.transpose(data2, axes=(2, 0, 1)) label2 = nd.array(label2) M = nd.array(M) #print(data.shape, label.shape) try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue batch_data[i][:] = data batch_label[i][:] = label #batch_label2[i][:] = label2 batch_coherent_label[i][:] = M #i+=1 j = i+self.per_batch_size//2 batch_data[j][:] = data2 batch_label[j][:] = label2 batch_coherent_label[j][:] = M i += 1 if j%self.per_batch_size==self.per_batch_size-1: i = j+1 except StopIteration: if not i: raise StopIteration #return {self.data_name : batch_data, # self.label_name : batch_label} #print(batch_data.shape, batch_label.shape) if not self.use_coherent: return mx.io.DataBatch([batch_data], [batch_label], batch_size - i) else: #return mx.io.DataBatch([batch_data], [batch_label, batch_label2, batch_coherent_label], batch_size - i) return mx.io.DataBatch([batch_data], [batch_label, batch_coherent_label], batch_size - i)
def copy_param(exe): new_param = {k: nd.empty(v.shape, ctx=v.context) for k,v in exe.arg_dict.items()} for k, v in new_param.items(): exe.arg_dict[k].copyto(v) return new_param
def next(self): if not self.is_init: self.reset() self.is_init = True """Returns the next batch of data.""" #print('in next', self.cur, self.labelcur) self.nbatch += 1 batch_size = self.batch_size c, h, w = self.data_shape batch_data = nd.empty((batch_size, c, h, w)) if self.provide_label is not None: batch_label = nd.empty(self.provide_label[0][1]) i = 0 try: while i < batch_size: #print('XXXX', i) label, s, bbox, landmark = self.next_sample() gender = int(label[0]) age = int(label[1]) assert age >= 0 #assert gender==0 or gender==1 plabel = np.zeros(shape=(101, ), dtype=np.float32) plabel[0] = gender if age == 0: age = 1 if age > 100: age = 100 plabel[1:age + 1] = 1 label = plabel _data = self.imdecode(s) if _data.shape[0] != self.data_shape[1]: _data = mx.image.resize_short(_data, self.data_shape[1]) if self.rand_mirror: _rd = random.randint(0, 1) if _rd == 1: _data = mx.ndarray.flip(data=_data, axis=1) if self.color_jittering > 0: if self.color_jittering > 1: _rd = random.randint(0, 1) if _rd == 1: _data = self.compress_aug(_data) #print('do color aug') _data = _data.astype('float32', copy=False) #print(_data.__class__) _data = self.color_aug(_data, 0.125) if self.nd_mean is not None: _data = _data.astype('float32', copy=False) _data -= self.nd_mean _data *= 0.0078125 if self.cutoff > 0: _rd = random.randint(0, 1) if _rd == 1: #print('do cutoff aug', self.cutoff) centerh = random.randint(0, _data.shape[0] - 1) centerw = random.randint(0, _data.shape[1] - 1) half = self.cutoff // 2 starth = max(0, centerh - half) endh = min(_data.shape[0], centerh + half) startw = max(0, centerw - half) endw = min(_data.shape[1], centerw + half) #print(starth, endh, startw, endw, _data.shape) _data[starth:endh, startw:endw, :] = 128 data = [_data] for datum in data: assert i < batch_size, 'Batch size must be multiples of augmenter output length' #print(datum.shape) batch_data[i][:] = self.postprocess_data(datum) batch_label[i][:] = label i += 1 except StopIteration: if i < batch_size: raise StopIteration return io.DataBatch([batch_data], [batch_label], batch_size - i)
import mxnet as mx import mxnet.ndarray as nd import numpy import time ww = nd.ones((10, ), ctx=mx.gpu()).asnumpy() time_npy = 0 time_mxcpu = 0 time_npymxcpu = 0 temp = nd.empty((32, 4, 84, 84), ctx=mx.cpu()) for i in range(100): arr_npy = numpy.random.normal(0, 1, (32, 4, 84, 84)) arr_mxcpu = mx.random.normal(0, 1, (32, 4, 84, 84), ctx=mx.cpu()) arr_mxcpu.asnumpy() start = time.time() arr_gpu = nd.array(arr_npy, ctx=mx.gpu()) arr_gpu.wait_to_read() end = time.time() print "Numpy CPU copying time:", end - start time_npy += end - start start = time.time() arr_gpu1 = arr_mxcpu.copyto(mx.gpu()) arr_gpu1.wait_to_read() end = time.time() print "MXNet CPU copying time:", end - start time_mxcpu += end - start start = time.time()
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix end_epoch = args.end_epoch pretrained = '../model/resnet-152' load_epoch = args.load_epoch args.image_size = 160 per_batch_size = 60 args.ctx_num = len(ctx) args.batch_size = per_batch_size*args.ctx_num #args.all_batch_size = args.batch_size*args.ctx_num args.bag_size = 3600 args.margin = 0.2 args.num_classes = 10575 #webface data_shape = (3,args.image_size,args.image_size) begin_epoch = 0 base_lr = 0.05 base_wd = 0.0002 base_mom = 0.0 lr_decay = 0.98 if not args.retrain: #load and initialize params print(pretrained) _, arg_params, aux_params = mx.model.load_checkpoint(pretrained, load_epoch) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) #arg_params, aux_params = load_param(pretrained, epoch, convert=True) data_shape_dict = {'data': (args.batch_size, 3, args.image_size, args.image_size), 'softmax_label': (args.batch_size,)} resnet_dcn.init_weights(sym, data_shape_dict, arg_params, aux_params) else: pretrained = args.prefix sym, arg_params, aux_params = mx.model.load_checkpoint(pretrained, load_epoch) begin_epoch = load_epoch end_epoch = begin_epoch+10 base_wd = 0.00005 lr_decay = 0.5 base_lr = 0.015 # infer max shape model = mx.mod.Module( context = ctx, symbol = sym, #label_names = [], #fixed_param_prefix = fixed_param_prefix, ) train_dataiter = FaceIter( path_imglist = "/raid5data/dplearn/faceinsight_align_webface.lst", data_shape = data_shape, mod = model, ctx_num = args.ctx_num, batch_size = args.batch_size, bag_size = args.bag_size, images_per_person = 5, ) #_dice = DiceMetric() _acc = AccMetric() eval_metrics = [mx.metric.create(_acc)] # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric #for child_metric in [fcn_loss_metric]: # eval_metrics.add(child_metric) # callback #batch_end_callback = callback.Speedometer(input_batch_size, frequent=args.frequent) #epoch_end_callback = mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True) # decide learning rate #lr_step = '10,20,30' #train_size = 4848 #nrof_batch_in_epoch = int(train_size/input_batch_size) #print('nrof_batch_in_epoch:', nrof_batch_in_epoch) #lr_factor = 0.1 #lr_epoch = [float(epoch) for epoch in lr_step.split(',')] #lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] #lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) #lr_iters = [int(epoch * train_size / batch_size) for epoch in lr_epoch_diff] #print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters #lr_scheduler = MultiFactorScheduler(lr_iters, lr_factor) # optimizer #optimizer_params = {'momentum': 0.9, # 'wd': 0.0005, # 'learning_rate': base_lr, # 'rescale_grad': 1.0, # 'clip_gradient': None} initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #opt = optimizer.SGD(learning_rate=base_lr, momentum=0.9, wd=base_wd, rescale_grad=(1.0/args.batch_size)) opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=1.0) #opt = optimizer.AdaGrad(learning_rate=base_lr, wd=base_wd, rescale_grad=1.0) _cb = mx.callback.Speedometer(args.batch_size, 10) lfw_dir = '/raid5data/dplearn/lfw_mtcnn' lfw_pairs = lfw.read_pairs(os.path.join(lfw_dir, 'pairs.txt')) lfw_paths, issame_list = lfw.get_paths(lfw_dir, lfw_pairs, 'png') imgs = [] lfw_data_list = [] for flip in [0,1]: lfw_data = nd.empty((len(lfw_paths), 3, args.image_size, args.image_size)) i = 0 for path in lfw_paths: with open(path, 'rb') as fin: _bin = fin.read() img = mx.image.imdecode(_bin) img = nd.transpose(img, axes=(2, 0, 1)) if flip==1: img = img.asnumpy() for c in xrange(img.shape[0]): img[c,:,:] = np.fliplr(img[c,:,:]) img = nd.array( img ) #print(img.shape) lfw_data[i][:] = img i+=1 if i%1000==0: print('loading lfw', i) print(lfw_data.shape) lfw_data_list.append(lfw_data) def lfw_test(nbatch): print('testing lfw..') embeddings_list = [] for i in xrange( len(lfw_data_list) ): lfw_data = lfw_data_list[i] embeddings = None ba = 0 while ba<lfw_data.shape[0]: bb = min(ba+args.batch_size, lfw_data.shape[0]) _data = nd.slice_axis(lfw_data, axis=0, begin=ba, end=bb) _label = nd.ones( (bb-ba,) ) db = mx.io.DataBatch(data=(_data,), label=(_label,)) model.forward(db, is_train=False) net_out = model.get_outputs() _embeddings = net_out[0].asnumpy() if embeddings is None: embeddings = np.zeros( (lfw_data.shape[0], _embeddings.shape[1]) ) embeddings[ba:bb,:] = _embeddings ba = bb embeddings_list.append(embeddings) acc_list = [] embeddings = embeddings_list[0] _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, issame_list, nrof_folds=10) acc_list.append(np.mean(accuracy)) print('[%d]Accuracy: %1.3f+-%1.3f' % (nbatch, np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) embeddings = np.concatenate(embeddings_list, axis=1) embeddings = sklearn.preprocessing.normalize(embeddings) print(embeddings.shape) _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, issame_list, nrof_folds=10) acc_list.append(np.mean(accuracy)) print('[%d]Accuracy-Flip: %1.3f+-%1.3f' % (nbatch, np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) pca = PCA(n_components=128) embeddings = pca.fit_transform(embeddings) embeddings = sklearn.preprocessing.normalize(embeddings) print(embeddings.shape) _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, issame_list, nrof_folds=10) acc_list.append(np.mean(accuracy)) print('[%d]Accuracy-PCA: %1.3f+-%1.3f' % (nbatch, np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) return max(*acc_list) #global_step = 0 highest_acc = [0.0] last_save_acc = [0.0] def _batch_callback(param): #global global_step mbatch = param.nbatch+1 if mbatch % 4000 == 0: opt.lr *= lr_decay #print(param.nbatch, opt.lr) _cb(param) if param.nbatch%100==0: print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) if param.nbatch>=0 and param.nbatch%400==0: acc = lfw_test(param.nbatch) if acc>highest_acc[0]: highest_acc[0] = acc if acc>0.9 and acc-last_save_acc[0]>=0.01: print('saving', mbatch, acc, last_save_acc[0]) _arg, _aux = model.get_params() mx.model.save_checkpoint(args.prefix, mbatch, model.symbol, _arg, _aux) last_save_acc[0] = acc print('[%d]highest Accu: %1.3f'%(param.nbatch, highest_acc[0])) sys.stdout.flush() sys.stderr.flush() epoch_cb = mx.callback.do_checkpoint(prefix, 1) #epoch_cb = None def _epoch_callback(epoch, sym, arg_params, aux_params): print('epoch-end', epoch) model.fit(train_dataiter, begin_epoch = begin_epoch, num_epoch = end_epoch, #eval_data = val_dataiter, eval_metric = eval_metrics, kvstore = 'device', optimizer = opt, #optimizer_params = optimizer_params, initializer = initializer, arg_params = arg_params, aux_params = aux_params, allow_missing = True, batch_end_callback = _batch_callback, epoch_end_callback = epoch_cb )
def next(self): if not self.is_init: self.reset() self.is_init = True self.nbatch += 1 batch_size1 = self.batch_size1 interclass_size = self.batchsize_id c, h, w = self.data_shape batch_data = nd.empty((batch_size1 + interclass_size, c, h, w)) batch_data_t = nd.empty((batch_size1 + interclass_size, c, h, w)) if self.provide_label is not None: batch_label = nd.empty(self.provide_label[0][1]) batch_label_t = nd.empty(self.provide_label[0][1]) i = 0 try: while i < batch_size1: label, s, bbox, landmark = self.next_sample() _data = self.imdecode(s) _data = _data.astype('float32') _data = image.RandomGrayAug(.2)(_data) if random.random() < 0.2: _data = image.ColorJitterAug(0.2, 0.2, 0.2)(_data) if self.rand_mirror: _rd = random.randint(0, 1) if _rd == 1: _data = mx.ndarray.flip(data=_data, axis=1) data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue for datum in data: assert i < batch_size1, 'Batch size must be multiples of augmenter output length' # print(datum.shape) batch_data[i][:] = self.postprocess_data(datum) batch_label[i][:] = label i += 1 except StopIteration: if i < batch_size1: raise StopIteration try: while i < interclass_size + batch_size1: label, s, bbox, landmark = self.next_sample2() _data = self.imdecode(s) _data = _data.astype('float32') _data = image.RandomGrayAug(.2)(_data) if random.random() < 0.2: _data = image.ColorJitterAug(0.2, 0.2, 0.2)(_data) if self.rand_mirror: _rd = random.randint(0, 1) if _rd == 1: _data = mx.ndarray.flip(data=_data, axis=1) data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue for datum in data: assert i < interclass_size + batch_size1, 'Batch size must be multiples of augmenter output length' # print(datum.shape) batch_data[i][:] = self.postprocess_data(datum) batch_label[i][:] = label i += 1 except StopIteration: if i < interclass_size + batch_size1: raise StopIteration margin = batch_size1 // self.ctx_num for i in xrange(self.ctx_num): batch_data_t[2 * i * margin:(2 * i + 1) * margin][:] = batch_data[i * margin:(i + 1) * margin][:] batch_data_t[(2 * i + 1) * margin:2 * (i + 1) * margin][:] = batch_data[batch_size1 + i * margin:batch_size1 + (i + 1) * margin][:] for i in xrange(self.ctx_num): batch_label_t[2 * i * margin:(2 * i + 1) * margin][:] = batch_label[i * margin:(i + 1) * margin][:] batch_label_t[(2 * i + 1) * margin:2 * (i + 1) * margin][:] = batch_label[batch_size1 + i * margin:batch_size1 + (i + 1) * margin][:] return io.DataBatch([batch_data_t], [batch_label_t])
def next(self): if not self.is_init: self.reset() self.is_init = True """Returns the next batch of data.""" self.nbatch += 1 c, h, w = self.data_shape batch_data_srctar = nd.empty((2 * self.batch_size_src, c, h, w)) batch_data_t = nd.empty((2 * self.batch_size_src, c, h, w)) batch_label_srctar = nd.empty(self.provide_label_srctar[0][1]) batch_label_t = nd.empty(self.provide_label_srctar[0][1]) batch_data_tar = nd.empty((self.batch_size_src, c, h, w)) batch_data_adv = nd.empty((self.batch_size_src, c, h, w)) batch_data = nd.empty((3 * self.batch_size_src, c, h, w)) batch_label = nd.empty(self.provide_label[0][1]) #time_now1 = datetime.datetime.now() arg_t, aux_t = self.model.get_params() self.model_adv.set_params(arg_t, aux_t) #print("update model_adv params") #time_now2 = datetime.datetime.now() #print("update params time", time_now2-time_now1) i = 0 try: while i < self.batch_size_src: label, s, bbox, landmark = self.next_sample1() _data = self.imdecode(s) if self.rand_mirror: _rd = random.randint(0, 1) if _rd == 1: _data = mx.ndarray.flip(data=_data, axis=1) if self.nd_mean is not None: _data = _data.astype('float32') _data -= self.nd_mean _data *= 0.0078125 if self.cutoff > 0: centerh = random.randint(0, _data.shape[0] - 1) centerw = random.randint(0, _data.shape[1] - 1) half = self.cutoff // 2 starth = max(0, centerh - half) endh = min(_data.shape[0], centerh + half) startw = max(0, centerw - half) endw = min(_data.shape[1], centerw + half) _data = _data.astype('float32') _data[starth:endh, startw:endw, :] = 127.5 data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue for datum in data: assert i < self.batch_size_src, 'Batch size must be multiples of augmenter output length' batch_data_srctar[i][:] = self.postprocess_data(datum) batch_label_srctar[i][:] = label i += 1 except StopIteration: if i < self.batch_size_src: raise StopIteration try: while i < 2 * self.batch_size_src: label, s, bbox, landmark = self.next_sample2() _data = self.imdecode(s) if self.rand_mirror: _rd = random.randint(0, 1) if _rd == 1: _data = mx.ndarray.flip(data=_data, axis=1) if self.nd_mean is not None: _data = _data.astype('float32') _data -= self.nd_mean _data *= 0.0078125 if self.cutoff > 0: centerh = random.randint(0, _data.shape[0] - 1) centerw = random.randint(0, _data.shape[1] - 1) half = self.cutoff // 2 starth = max(0, centerh - half) endh = min(_data.shape[0], centerh + half) startw = max(0, centerw - half) endw = min(_data.shape[1], centerw + half) _data = _data.astype('float32') _data[starth:endh, startw:endw, :] = 127.5 data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue for datum in data: assert i < 2 * self.batch_size_src, 'Batch size must be multiples of augmenter output length' batch_data_srctar[i][:] = self.postprocess_data(datum) batch_label_srctar[i][:] = label i += 1 except StopIteration: if i < 2 * self.batch_size_src: raise StopIteration #print("batch_label_srctar:", batch_label_srctar) margin = self.batch_size_src // self.ctx_num #print("margin: ",margin) for i in xrange(self.ctx_num): batch_data_t[2 * i * margin:(2 * i + 1) * margin][:] = batch_data_srctar[i * margin:(i + 1) * margin][:] batch_data_t[ (2 * i + 1) * margin:2 * (i + 1) * margin][:] = batch_data_srctar[self.batch_size_src + i * margin:self.batch_size_src + (i + 1) * margin][:] for i in xrange(self.ctx_num): batch_label_t[2 * i * margin:(2 * i + 1) * margin][:] = batch_label_srctar[i * margin:(i + 1) * margin][:] batch_label_t[ (2 * i + 1) * margin:2 * (i + 1) * margin][:] = batch_label_srctar[self.batch_size_src + i * margin:self.batch_size_src + (i + 1) * margin][:] #print("batch_label_t:", batch_label_t) db = mx.io.DataBatch([batch_data_t]) self.model_adv.forward(db, is_train=True) ori_out = self.model_adv.get_outputs()[-1].asnumpy() #print("ori_dis: ", ori_out) self.model_adv.backward() grad = self.model_adv.get_input_grads()[0] #print("grad: ", grad) grad = mx.nd.array(grad) #print("batch_data_t: ", batch_data_t.asnumpy().shape) for i in xrange(self.ctx_num): batch_data_t[2 * i * margin:(2 * i + 1) * margin][:] -= self.sigma * mx.nd.sign( grad[2 * i * margin:(2 * i + 1) * margin][:]) batch_data_t[2 * i * margin:(2 * i + 1) * margin][:] = mx.nd.maximum( batch_data_t[2 * i * margin:(2 * i + 1) * margin][:], mx.nd.zeros_like( batch_data_t[2 * i * margin:(2 * i + 1) * margin][:])) batch_data_t[2 * i * margin:(2 * i + 1) * margin][:] = mx.nd.minimum( batch_data_t[2 * i * margin:(2 * i + 1) * margin][:], 255 * mx.nd.ones_like( batch_data_t[2 * i * margin:(2 * i + 1) * margin][:])) #print("first") for i in range(0, self.round - 1): db = mx.io.DataBatch([batch_data_t]) self.model_adv.forward(db, is_train=True) adv_out = self.model_adv.get_outputs()[-1].asnumpy() #print("adv_dis: ", i, adv_out, np.max(adv_out)) if np.max(adv_out) > self.thd: self.model_adv.backward() grad = self.model_adv.get_input_grads()[0] grad = mx.nd.array(grad) for i in xrange(self.ctx_num): batch_data_t[2 * i * margin:(2 * i + 1) * margin][:] -= self.sigma * mx.nd.sign( grad[2 * i * margin:(2 * i + 1) * margin][:]) batch_data_t[2 * i * margin:(2 * i + 1) * margin][:] = mx.nd.maximum( batch_data_t[2 * i * margin:(2 * i + 1) * margin][:], mx.nd.zeros_like( batch_data_t[2 * i * margin:(2 * i + 1) * margin][:])) batch_data_t[2 * i * margin:(2 * i + 1) * margin][:] = mx.nd.minimum( batch_data_t[2 * i * margin:(2 * i + 1) * margin][:], 255 * mx.nd.ones_like( batch_data_t[2 * i * margin:(2 * i + 1) * margin][:])) else: #print("adv_dis: ", i) break db = mx.io.DataBatch([batch_data_t]) self.model_adv.forward(db, is_train=True) adv_out = self.model_adv.get_outputs()[-1].asnumpy() #print("adv_dis: ", adv_out) #imgadv_show = np.squeeze(batch_data_t[0][:].asnumpy()) #imgadv_show = imgadv_show.astype(np.uint8) # print("imgadv_show.type: ", imgadv_show.astype) #imgadv_show = np.transpose(imgadv_show, (1, 2, 0)) #plt.imshow(imgadv_show) #plt.show() for i in xrange(self.ctx_num): batch_data_adv[i * margin:(i + 1) * margin][:] = batch_data_t[2 * i * margin:(2 * i + 1) * margin][:] batch_data_src = batch_data_srctar[0:self.batch_size_src][:] batch_data_tar = batch_data_srctar[self.batch_size_src:2 * self.batch_size_src][:] #for i in xrange(self.ctx_num): # batch_data_tar[i * margin: (i + 1) * margin][:] = batch_data_t[(2 * i + 1) * margin:2 * (i + 1) * margin][:] batch_label_src = batch_label_srctar[0:self.batch_size_src][:] batch_label_tar = batch_label_srctar[self.batch_size_src:2 * self.batch_size_src][:] #print("labels: " , batch_label_src , batch_label_tar) margin = self.batch_size_src // self.main_ctx_num # 30 for i in xrange(self.main_ctx_num): # 0 1 2 3 batch_data[margin * 3 * i:margin * 3 * i + margin][:] = batch_data_src[margin * i:margin * i + margin][:] batch_data[margin * 3 * i + margin:margin * 3 * i + 2 * margin][:] = batch_data_tar[margin * i:margin * i + margin][:] batch_data[margin * 3 * i + 2 * margin:margin * 3 * i + 3 * margin][:] = batch_data_adv[margin * i:margin * i + margin][:] for i in xrange(self.main_ctx_num): batch_label[margin * 3 * i:margin * 3 * i + margin][:] = batch_label_src[margin * i:margin * i + margin][:] batch_label[margin * 3 * i + margin:margin * 3 * i + 2 * margin][:] = batch_label_tar[margin * i:margin * i + margin][:] batch_label[margin * 3 * i + 2 * margin:margin * 3 * i + 3 * margin][:] = batch_label_src[margin * i:margin * i + margin][:] #print("batch labels: ", batch_label) return io.DataBatch([batch_data], [batch_label])
def next(self): if not self.is_init: self.reset() self.is_init = True """Returns the next batch of data.""" #print('in next', self.cur, self.labelcur) self.nbatch+=1 batch_size = self.batch_size c, h, w = self.data_shape batch_data = nd.empty((batch_size, c, h, w)) if self.provide_label is not None: batch_label = nd.empty(self.provide_label[0][1]) i = 0 try: while i < batch_size: label, s, bbox, landmark = self.next_sample() _data = self.imdecode(s) if self.rand_mirror: _rd = random.randint(0,1) if _rd==1: _data = mx.ndarray.flip(data=_data, axis=1) if self.cutoff>0: centerh = random.randint(0, _data.shape[0]-1) centerw = random.randint(0, _data.shape[1]-1) half = self.cutoff//2 starth = max(0, centerh-half) endh = min(_data.shape[0], centerh+half) startw = max(0, centerw-half) endw = min(_data.shape[1], centerw+half) _data = _data.astype('float32') #print(starth, endh, startw, endw, _data.shape) _data[starth:endh, startw:endw, :] = 127.5 #_npdata = _data.asnumpy() #if landmark is not None: # _npdata = face_preprocess.preprocess(_npdata, bbox = bbox, landmark=landmark, image_size=self.image_size) #if self.rand_mirror: # _npdata = self.mirror_aug(_npdata) #if self.mean is not None: # _npdata = _npdata.astype(np.float32) # _npdata -= self.mean # _npdata *= 0.0078125 #nimg = np.zeros(_npdata.shape, dtype=np.float32) #nimg[self.patch[1]:self.patch[3],self.patch[0]:self.patch[2],:] = _npdata[self.patch[1]:self.patch[3], self.patch[0]:self.patch[2], :] #_data = mx.nd.array(nimg) data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue #print('aa',data[0].shape) #data = self.augmentation_transform(data) #print('bb',data[0].shape) for datum in data: assert i < batch_size, 'Batch size must be multiples of augmenter output length' #print(datum.shape) batch_data[i][:] = self.postprocess_data(datum) if self.provide_label is not None: batch_label[i][:] = label i += 1 except StopIteration: if i<batch_size: raise StopIteration #print('next end', batch_size, i) _label = None if self.provide_label is not None: _label = [batch_label] return io.DataBatch([batch_data], _label, batch_size - i)
def next(self): if not self.is_init: self.reset() self.is_init = True """Returns the next batch of data.""" #print('in next', self.cur, self.labelcur) self.nbatch += 1 batch_size = self.batch_size c, h, w = self.data_shape batch_data = nd.empty((batch_size, c, h, w)) if self.provide_label is not None: batch_label = nd.empty(self.provide_label[0][1]) i = 0 try: while i < batch_size: label, s, bbox, landmark = self.next_sample() _data = self.imdecode(s) if _data.shape[0] != self.data_shape[1]: _data = mx.image.resize_short(_data, self.data_shape[1]) if self.rand_mirror: _rd = random.randint(0, 1) if _rd == 1: _data = mx.ndarray.flip(data=_data, axis=1) if self.color_jittering > 0: if self.color_jittering > 1: _rd = random.randint(0, 1) if _rd == 1: _data = self.compress_aug(_data) #print('do color aug') _data = _data.astype('float32', copy=False) #print(_data.__class__) _data = self.color_aug(_data, 0.125) if self.nd_mean is not None: _data = _data.astype('float32', copy=False) _data -= self.nd_mean _data *= 0.0078125 if self.cutoff > 0: _rd = random.randint(0, 1) if _rd == 1: #print('do cutoff aug', self.cutoff) centerh = random.randint(0, _data.shape[0] - 1) centerw = random.randint(0, _data.shape[1] - 1) half = self.cutoff // 2 starth = max(0, centerh - half) endh = min(_data.shape[0], centerh + half) startw = max(0, centerw - half) endw = min(_data.shape[1], centerw + half) #print(starth, endh, startw, endw, _data.shape) _data[starth:endh, startw:endw, :] = 128 data = [_data] try: self.check_valid_image(data) except RuntimeError as e: logging.debug('Invalid image, skipping: %s', str(e)) continue #print('aa',data[0].shape) #data = self.augmentation_transform(data) #print('bb',data[0].shape) for datum in data: assert i < batch_size, 'Batch size must be multiples of augmenter output length' #print(datum.shape) batch_data[i][:] = self.postprocess_data(datum) batch_label[i][:] = label i += 1 except StopIteration: if i < batch_size: raise StopIteration return io.DataBatch([batch_data], [batch_label], batch_size - i)
def next(self): if not self.is_init: self.reset() self.is_init = True pass self.n_batch += 1 batch_size = self.batch_size c, h, w = self.data_shape batch_data = nd.empty((batch_size, c, h, w)) batch_label = nd.empty(self.provide_label[0][1]) i = 0 try: while i < batch_size: label, image_str = self.next_sample() # image_str ---> NDArray 112x112x3 # 可以 image_arr.asnumpy() ---> numpy array type # plt.imshow() 来展示 image_arr = mx.image.imdecode(image_str) if image_arr.shape[0] != image_arr.shape[1]: image_arr = mx.image.resize_short(image_arr, self.data_shape[1]) pass # 镜像翻转 if self.rand_mirror: rand_int = np.random.randint(0, 2) if rand_int == 1: image_arr = mx.ndarray.flip(data=image_arr, axis=1) pass pass if self.color_jitter > 0: if self.color_jitter > 1: rand_int = np.random.randint(0, 2) if rand_int == 1: # 精简增强 image_arr = self.compress_aug(image_arr) pass pass # 将 像素转为 float32 image_arr = image_arr.astype("float32", copy=False) # 颜色增强 image_arr = self.color_jitter_aug(image_arr) pass if self.nd_mean is not None: image_arr = image_arr.astype('float32', copy=False) image_arr -= self.nd_mean image_arr *= 0.0078125 pass # 随机裁剪 if self.cutoff > 0: rand_int = np.random.randint(0, 2) if rand_int == 1: center_h = np.random.randint(0, image_arr.shape[0]) center_w = np.random.randint(0, image_arr.shape[1]) half = self.cutoff // 2 start_h = max(0, center_h - half) end_h = min(image_arr.shape[0], center_h + half) start_w = max(0, center_w - half) end_w = min(image_arr.shape[1], center_w + half) image_arr[start_h:end_h, start_w:end_w, :] = 128 pass pass image_data = [image_arr] try: # 检测图像数据 self.check_valid_image(image_data) pass except RuntimeError as e: print("Invalid image, skipping: {}".format(e)) continue pass for image_info in image_data: assert i < batch_size, 'Batch size must be multiples of augmenter output length' # [height, width, channel] ---> [channel, height, width] batch_data[i][:] = self.post_process_data(image_info) batch_label[i][:] = label i += 1 pass pass pass except StopIteration: if i < batch_size: raise StopIteration pass return io.DataBatch([batch_data], [batch_label], batch_size - i) pass
def switch_bucket(self, bucket_kwargs=None, data_shapes=None): if bucket_kwargs is not None: self.curr_bucket_key = get_bucket_key(bucket_kwargs=bucket_kwargs) # 1. Check if bucket key exists if self.curr_bucket_key in self._buckets: if data_shapes is not None: if tuple(data_shapes.items()) not in self._buckets[ self.curr_bucket_key]['exe']: #TODO Optimize the reshaping functionality! self._buckets[self.curr_bucket_key]['exe'][tuple(data_shapes.items())] = \ self.exe.reshape(partial_shaping=True, allow_up_sizing=True, **data_shapes) self._buckets[ self.curr_bucket_key]['data_shapes'] = data_shapes else: self._buckets[ self.curr_bucket_key]['data_shapes'] = data_shapes return # 2. If the bucket key does not exist, create new symbol + executor assert data_shapes is not None, "Must set data_shapes for new bucket!" if isinstance(self.sym_gen, mx.symbol.Symbol): sym = self.sym_gen else: sym = self.sym_gen(**dict(self.curr_bucket_key)) arg_names = sym.list_arguments() aux_names = sym.list_auxiliary_states() param_names = [ n for n in arg_names if n in self.learn_init_keys or (n not in data_shapes.keys()) ] for k, v in data_shapes.items(): assert isinstance(v, tuple), "Data_shapes must be tuple! Find k=%s, v=%s, " \ "data_shapes=%s" % (k, str(v), str(data_shapes)) arg_shapes, _, aux_shapes = sym.infer_shape(**data_shapes) arg_name_shape = OrderedDict([(k, s) for k, s in zip(arg_names, arg_shapes)]) if self.params is None: self.params = OrderedDict([(n, nd.empty(arg_name_shape[n], ctx=self.ctx)) for n in param_names]) self.params_grad = OrderedDict([(n, nd.empty(arg_name_shape[n], ctx=self.ctx)) for n in param_names]) if len(self.params) > 0: assert self.initializer is not None, \ 'We must set the initializer if we donnot initialize' \ 'manually the free parameters of the network!!' for k, v in self.params.items(): self.initializer(k, v) else: assert set(arg_name_shape.items()) == \ set(list(data_shapes.items()) + list([(k, v.shape) for k, v in self.params.items()])) if self.aux_states is None: self.aux_states = OrderedDict([ (k, nd.empty(s, ctx=self.ctx)) for k, s in zip(aux_names, aux_shapes) ]) data_inputs = { k: mx.nd.empty(data_shapes[k], ctx=self.ctx) for k in set(data_shapes.keys()) - set(self.learn_init_keys) } if len(self._buckets) > 0: shared_exe = list(list( self._buckets.values())[0]['exe'].values())[0] else: shared_exe = None self._buckets[self.curr_bucket_key] = { 'exe': { tuple(data_shapes.items()): sym.bind(ctx=self.ctx, args=dict(self.params, **data_inputs), args_grad=dict(self.params_grad.items()), aux_states=self.aux_states, shared_exec=shared_exe) }, 'data_shapes': data_shapes, 'sym': sym }
def switch_bucket(self, bucket_kwargs=None, data_shapes=None): if bucket_kwargs is not None: self.curr_bucket_key = get_bucket_key(bucket_kwargs=bucket_kwargs) # 1. Check if bucket key exists if self.curr_bucket_key in self._buckets: if data_shapes is not None: if tuple(data_shapes.items()) not in self._buckets[self.curr_bucket_key]['exe']: #TODO Optimize the reshaping functionality! self._buckets[self.curr_bucket_key]['exe'][tuple(data_shapes.items())] = \ self.exe.reshape(partial_shaping=True, allow_up_sizing=True, **data_shapes) self._buckets[self.curr_bucket_key]['data_shapes'] = data_shapes else: self._buckets[self.curr_bucket_key]['data_shapes'] = data_shapes return # 2. If the bucket key does not exist, create new symbol + executor assert data_shapes is not None, "Must set data_shapes for new bucket!" if isinstance(self.sym_gen, mx.symbol.Symbol): sym = self.sym_gen else: sym = self.sym_gen(**dict(self.curr_bucket_key)) arg_names = sym.list_arguments() aux_names = sym.list_auxiliary_states() param_names = [n for n in arg_names if n in self.learn_init_keys or (n not in data_shapes.keys())] for k, v in data_shapes.items(): assert isinstance(v, tuple), "Data_shapes must be tuple! Find k=%s, v=%s, " \ "data_shapes=%s" % (k, str(v), str(data_shapes)) arg_shapes, _, aux_shapes = sym.infer_shape(**data_shapes) arg_name_shape = OrderedDict([(k, s) for k, s in zip(arg_names, arg_shapes)]) if self.params is None: self.params = OrderedDict([(n, nd.empty(arg_name_shape[n], ctx=self.ctx)) for n in param_names]) self.params_grad = OrderedDict([(n, nd.empty(arg_name_shape[n], ctx=self.ctx)) for n in param_names]) if len(self.params) > 0: assert self.initializer is not None, \ 'We must set the initializer if we donnot initialize' \ 'manually the free parameters of the network!!' for k, v in self.params.items(): self.initializer(k, v) else: assert set(arg_name_shape.items()) == \ set(data_shapes.items() + [(k, v.shape) for k, v in self.params.items()]) if self.aux_states is None: self.aux_states = OrderedDict([(k, nd.empty(s, ctx=self.ctx)) for k, s in zip(aux_names, aux_shapes)]) data_inputs = {k: mx.nd.empty(data_shapes[k], ctx=self.ctx) for k in set(data_shapes.keys()) - set(self.learn_init_keys)} if len(self._buckets) > 0: shared_exe = self._buckets.values()[0]['exe'].values()[0] else: shared_exe = None self._buckets[self.curr_bucket_key] = { 'exe': {tuple(data_shapes.items()): sym.bind(ctx=self.ctx, args=dict(self.params, **data_inputs), args_grad=dict(self.params_grad.items()), aux_states=self.aux_states, shared_exec=shared_exe) }, 'data_shapes': data_shapes, 'sym': sym }
minibatch_size = 32 input_dim = 10 data_shapes = { 'data': (minibatch_size, input_dim), 'out_label': (minibatch_size, 10) } ctx = mx.gpu() arg_names = net.list_arguments() aux_names = net.list_auxiliary_states() param_names = list(set(arg_names) - set(data_shapes.keys())) arg_shapes, output_shapes, aux_shapes = net.infer_shape(**data_shapes) arg_name_shape = {k: s for k, s in zip(arg_names, arg_shapes)} params = {n: nd.ones(arg_name_shape[n], ctx=ctx) * 0.0001 for n in param_names} params_grad = {n: nd.empty(arg_name_shape[n], ctx=ctx) for n in param_names} aux_states = {k: nd.empty(s, ctx=ctx) for k, s in zip(aux_names, aux_shapes)} exe_pool = ExecutorDataShapePool(ctx=ctx, sym=net, data_shapes=data_shapes, params=params, params_grad=params_grad, aux_states=aux_states) new_exe_pool = ExecutorDataShapePool(ctx=ctx, sym=net, data_shapes=data_shapes, params=params, params_grad=params_grad, aux_states=aux_states)
teacher = mx.symbol.Variable('data') teacher = mx.symbol.FullyConnected(data=teacher, name='teacher_fc1', num_hidden=100) teacher = mx.symbol.Activation(data=teacher, name='teacher_relu1', act_type="relu") teacher = mx.symbol.FullyConnected(data=teacher, name='teacher_pred', num_hidden=1) student = mx.symbol.Variable('data') student = mx.symbol.FullyConnected(data=student, name='student_fc1', num_hidden=100) student = mx.symbol.Activation(data=student, name='student_relu1', act_type="relu") student_mean = mx.symbol.FullyConnected(data=student, name='student_mean', num_hidden=1) student_var = mx.symbol.FullyConnected(data=student, name='student_var', num_hidden=1) student = mx.symbol.Group([student_mean, student_var]) batch_size = 1 data_shape = (batch_size, 1) data_inputs = {'data': nd.empty(data_shape, ctx=dev)} initializer = mx.initializer.Uniform(0.07) teacher_exe, teacher_params, teacher_params_grad, _ = get_executor(teacher, dev, data_inputs, initializer) student_exe, student_params, student_params_grad, _ = get_executor(student, dev, data_inputs, initializer) #X = numpy.random.uniform(-4, 4, (20, 1)) #Y = X * X * X + numpy.random.normal(0, 3, (20, 1)) training_data = numpy.loadtxt('toy_data_train.txt') testing_data = numpy.loadtxt('toy_data_test_whole.txt') X = training_data[:, 0].reshape((20, 1)) Y = training_data[:, 1].reshape((20, 1))