def init(args): layers = [784] + [args.hidden_size] * args.num_hidden + [10] biases = [mx.nd.zeros((1, x), ctx=mx.gpu(0)) for x in layers[1:]] weights = [ mx.nd.zeros((x, y), ctx=mx.gpu(0)) for x, y in zip(layers[:-1], layers[1:]) ] return weights, biases
def init_yolo(): # initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) initializer = mx.init.Uniform(5e-4) pretrained_model = mx.model.FeedForward.load(PRETRAINED[0], PRETRAINED[1], ctx=mx.gpu()) # args = mx.nd.load('args2.nd') # auxs = mx.nd.load('auxs2.nd') arg_params = pretrained_model.arg_params aux_params = pretrained_model.aux_params symbol = get_yolo_symbol() arg_shapes, output_shapes, aux_shapes = symbol.infer_shape(data=(BATCHSIZE, 3, 448, 448)) arg_names = symbol.list_arguments() aux_names = symbol.list_auxiliary_states() arg_dict = dict(zip(arg_names, [mx.nd.zeros(shape, ctx=mx.gpu()) for shape in arg_shapes])) aux_dict = dict(zip(aux_names, [mx.nd.zeros(shape, ctx=mx.gpu()) for shape in aux_shapes])) grad_dict = dict(zip(arg_names, [mx.nd.zeros(shape, ctx=mx.gpu()) for shape in arg_shapes])) for name in arg_dict: if name.endswith('label'): continue if name.startswith('data'): continue if name in arg_params and not name.startswith('fullyconnected'): arg_params[name].copyto(arg_dict[name]) else: print name initializer(name, arg_dict[name]) for name in aux_dict: if 0 and name in aux_params and not name.startswith('fullyconnected'): aux_params[name].copyto(aux_dict[name]) else: initializer(name, aux_dict[name]) executor = symbol.bind(ctx=mx.gpu(), args=arg_dict, args_grad=grad_dict, aux_states=aux_dict, grad_req='write') # executor = symbol.bind(ctx=mx.gpu(), args=args, args_grad=grad_dict, aux_states=auxs, grad_req='write') return executor
def main(args): # Create data iterators for training and testing sets. net = mx.symbol.Variable('data') net = mx.symbol.FullyConnected(data=net, num_hidden=args.hidden_size) net = mx.symbol.Activation(data=net, act_type="relu") net = mx.symbol.FullyConnected(data=net, num_hidden=10) net = mx.symbol.SoftmaxOutput(data=net, name='softmax') arg_shapes, out_shapes, aux_shapes = net.infer_shape(data=(args.batch_size, 784)) arg_types, out_types, aux_types = net.infer_type(data=mx.base.mx_real_t) arg_arrays = [mx.nd.zeros(shape, mx.gpu(0), dtype=dtype) for shape, dtype in zip(arg_shapes, arg_types)] grad_dict = {name : mx.nd.zeros(shape, mx.gpu(0), dtype=dtype) for name, shape, dtype in zip(net.list_arguments(), arg_shapes, arg_types) if name != 'data'} executor = net.bind(ctx=mx.gpu(0), args=arg_arrays, args_grad=grad_dict, grad_req='write') start = time.time() for i in range(num_loops): outputs = executor.forward() if args.only_forward: for o in outputs: o.wait_to_read() continue executor.backward([outputs[0]]) for name, grad in grad_dict.items(): grad.wait_to_read() dur = time.time() - start print('Per Loop Time: %.6f' % (dur / num_loops))
def handleFolder(GUPid,tasks): #synset = [l.strip() for l in open(args.synset).readlines()] prefix = "full-resnet-152" num_round = 0 model = mx.model.FeedForward.load( prefix, num_round, ctx=mx.gpu(GUPid),numpy_batch_size=1) internals = model.symbol.get_internals() fea_symbol = internals["pool1_output"] feature_extractor = mx.model.FeedForward( ctx=mx.gpu(GUPid), symbol=fea_symbol, numpy_batch_size=1, \ arg_params=model.arg_params, aux_params=model.aux_params, allow_extra_params=True) #subfolders = [ fold for fold in os.listdir(folder)] k = 0 for subfolder in tasks: workspace_folder = os.path.join(folder,subfolder) print "extract label####",subfolder,"---GPU: ",GUPid," process: ",k,"/",len(tasks) i = 0 k +=1 feature_array = [] for filename in os.listdir(workspace_folder): if '.jpg' in filename or '.JPEG' in filename: i +=1 m = cv2.imread(os.path.join(workspace_folder,filename),1) img = cv2.cvtColor(m, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (224, 224)) # resize to 224*224 to fit model img = np.swapaxes(img, 0, 2) img = np.swapaxes(img, 1, 2) # change to (c, h,w) order img = img[np.newaxis, :] # extend to (n, c, h, w) f = feature_extractor.predict(img) f = np.ravel(f) #print f.shape feature_array.append((f[0],subfolder,filename)) random.shuffle(feature_array) #print len(feature_array) np.save((os.path.join(workspace_folder,"test.npy")),feature_array[:int(i*test_ratio)]) np.save((os.path.join(workspace_folder,"train.npy")),feature_array[int(i*(test_ratio)):])
def make_image(img, m, color_ref=None): generator = symbol.generator_symbol(m, 'style') args = mx.nd.load('args%s_style.nd'%img) for i in range(m): args['znoise_%d'%i] = mx.nd.zeros([1,1,16*2**i,16*2**i], mx.gpu()) args['znoise_%d'%i][:] = np.random.uniform(-250,250,[1,1,16*2**i,16*2**i]) auxs = mx.nd.load('auxs%s_style.nd'%img) with open('models/model%s.pkl'%img, 'w') as f: pickle.dump([args, auxs, generator], f) for i in range(m): args['znoise_%d'%i] = mx.nd.zeros([1,1,16*2**i,16*2**i], mx.gpu()) args['zim_%d'%i] = mx.nd.zeros([1,3,16*2**i, 16*2**i], mx.gpu()) gene_executor = generator.bind(ctx=mx.gpu(), args=args, aux_states=mx.nd.load('auxs%s_style.nd'%img)) for test_im in os.listdir('test_pics'): print test_im if color_ref: color_ref = cv2.cvtColor(crop_img('test_pics/%s'%test_im, 8*2**m), cv2.COLOR_RGB2HSV) for i in range(m): gene_executor.arg_dict['zim_%d'%i][:] = preprocess_img('test_pics/%s'%test_im, 16*2**i) for ii in range(4): t = time.clock() for i in range(m): gene_executor.arg_dict['znoise_%d'%i][:] = np.random.uniform(-150*ii,150*ii,[1,1,16*2**i,16*2**i]) gene_executor.forward(is_train=True) out = gene_executor.outputs[0].asnumpy() im = postprocess_img(out, color_ref) cv2.imwrite('models/%s_%s_%d.jpg'%(test_im.split('.')[0], img, ii), im)
def test_fullyconnected_with_type(): sym = mx.sym.FullyConnected(num_hidden=3, name='inner') ctx_list = [{'ctx': mx.gpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float64}}, {'ctx': mx.gpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float32}}, {'ctx': mx.cpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float64}}, {'ctx': mx.cpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float32}}] check_consistency(sym, ctx_list)
def test_convolution_with_type(): sym = mx.sym.Convolution(num_filter=3, kernel=(3,3), name='conv') ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}}, {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}}, {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}}] check_consistency(sym, ctx_list)
def test_convolution_with_type(): np.random.seed(1234) sym1 = mx.sym.Convolution(num_filter=3, kernel=(3,3), name='conv') data = mx.sym.Variable('conv_data') w = mx.sym.Variable('conv_weight') b = mx.sym.Variable('conv_bias') w = mx.sym.transpose(w, axes=(0,2,3,1)) sym2 = mx.sym.transpose(data, axes=(0,2,3,1)) sym2 = mx.sym.Convolution(sym2, w, b, layout='NHWC', num_filter=3, kernel=(3,3)) sym2 = mx.sym.transpose(sym2, axes=(0,3,1,2), name='conv') sym = [sym1, sym1, sym1, sym1, sym1, sym2, sym2] ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}}, {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float16}}, {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}}, {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}}, # NHWC {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'conv_weight': (3, 2, 3, 3), 'type_dict': {'conv_data': np.float32, 'conv_weight': np.float32}}, {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'conv_weight': (3, 2, 3, 3), 'type_dict': {'conv_data': np.float16, 'conv_weight': np.float16}} ] # wider tolerance needed for true-fp16 NCHW test above tol = {np.dtype(np.float16): 0.5, np.dtype(np.float32): 1e-3, np.dtype(np.float64): 1e-5, np.dtype(np.uint8): 0, np.dtype(np.int32): 0} check_consistency(sym, ctx_list, tol=tol) # test ability to turn off training on bias check_consistency(sym, ctx_list, grad_req={'conv_data': 'write', 'conv_weight': 'write', 'conv_bias': 'null'}, tol=tol)
def test_activation_with_type(): sym = mx.sym.Activation(name='act', act_type='sigmoid') ctx_list = [{'ctx': mx.gpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float64}}, {'ctx': mx.gpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float32}}, {'ctx': mx.cpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float64}}, {'ctx': mx.cpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float32}}] check_consistency(sym, ctx_list)
def test_elementwisesum_with_type(): sym = mx.sym.ElementWiseSum(name="ews", num_args=2) ctx_list = [ { "ctx": mx.gpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float64, "ews_arg1": np.float64}, }, { "ctx": mx.gpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float32, "ews_arg1": np.float32}, }, { "ctx": mx.gpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float16, "ews_arg1": np.float16}, }, { "ctx": mx.cpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float64, "ews_arg1": np.float64}, }, { "ctx": mx.cpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float32, "ews_arg1": np.float32}, }, ] check_consistency(sym, ctx_list)
def ffbp(self, X, y): h = mx.nd.zeros(self.hshape, ctx=mx.gpu(0)) # init hidden state rnn_cache = [] for t in xrange(self.num_unroll_steps): h, rnn_cache_t = rnn_step_forward(X, h, self.params['Wx'], self.params['Wh'], self.params['b']) rnn_cache.append(rnn_cache_t) predict, affine_cache = affine_forward(h, self.params['Wa'], self.params['ba']) loss, grad = l2_loss(predict, y) daffine, dWa, dba = affine_backward(grad, affine_cache) dx = mx.nd.zeros((X.shape[0], X.shape[1]), ctx=mx.gpu(0)) dWx = mx.nd.zeros((X.shape[1], daffine.shape[1]), ctx=mx.gpu(0)) dWh = mx.nd.zeros((daffine.shape[1], daffine.shape[1]), ctx=mx.gpu(0)) db = mx.nd.zeros((daffine.shape[1],), ctx=mx.gpu(0)) dnext_h_t = daffine for t in xrange(self.num_unroll_steps): dx_t, dprev_h_t, dWx_t, dWh_t, db_t = rnn_step_backward(dnext_h_t, rnn_cache[t]) dnext_h_t = dprev_h_t dx += dx_t dWx += dWx_t dWh += dWh_t db += db_t dx.wait_to_read() dWx.wait_to_read() dWh.wait_to_read() db.wait_to_read()
def test_concat_with_type(): sym = mx.sym.Concat(name="concat", num_args=2) ctx_list = [ { "ctx": mx.gpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float64, "concat_arg1": np.float64}, }, { "ctx": mx.gpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float32, "concat_arg1": np.float32}, }, { "ctx": mx.gpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float16, "concat_arg1": np.float16}, }, { "ctx": mx.cpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float64, "concat_arg1": np.float64}, }, { "ctx": mx.cpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float32, "concat_arg1": np.float32}, }, ] check_consistency(sym, ctx_list)
def test_row_sparse_pull(): kv = init_kv_with_str('row_sparse') kv.init('e', mx.nd.ones(shape).tostype('row_sparse')) def check_row_sparse_pull(kv, count, ctx=default_context()): num_rows = shape[0] vals = [] row_ids = [] all_row_ids = np.arange(num_rows) for i in range(count): vals.append(mx.nd.zeros(shape, ctx=ctx).tostype('row_sparse')) row_id = np.random.randint(num_rows, size=num_rows) row_ids.append(mx.nd.array(row_id, dtype='int64')) row_ids_to_pull = row_ids[0] if len(row_ids) == 1 else row_ids vals_to_pull = vals[0] if len(vals) == 1 else vals kv.row_sparse_pull('e', out=vals_to_pull, row_ids=row_ids_to_pull) for val, row_id in zip(vals, row_ids): retained = val.asnumpy() excluded_row_ids = np.setdiff1d(all_row_ids, row_id.asnumpy()) for row in range(num_rows): expected_val = np.zeros_like(retained[row]) expected_val += 0 if row in excluded_row_ids else 1 assert_almost_equal(retained[row], expected_val) check_row_sparse_pull(kv, 1, mx.gpu(0)) check_row_sparse_pull(kv, 4, mx.gpu(0))
def main(args): num_rnn_layers = 1 net = rnn_unroll(num_rnn_layers, args.num_unroll_steps, args.input_size, args.hidden_size, args.num_classes) arg_shapes, out_shapes, aux_shapes = net.infer_shape(data=(args.batch_size, args.num_unroll_steps, args.input_size)) arg_types, out_types, aux_types = net.infer_type(data=mx.base.mx_real_t) arg_arrays = [mx.nd.zeros(shape, mx.gpu(0), dtype=dtype) for shape, dtype in zip(arg_shapes, arg_types)] grad_dict = {name : mx.nd.zeros(shape, mx.gpu(0), dtype=dtype) for name, shape, dtype in zip(net.list_arguments(), arg_shapes, arg_types) if name != 'data'} executor = net.bind(ctx=mx.gpu(0), args=arg_arrays, args_grad=grad_dict, grad_req='write') for i in range(args.num_loops): if i == num_cold: start = time.time() outputs = executor.forward() if args.only_forward: for o in outputs: o.wait_to_read() continue executor.backward([outputs[0]]) for name, grad in grad_dict.items(): grad.wait_to_read() dur = time.time() - start print('Per Loop Time: %.6f' % (dur / (args.num_loops - num_cold)))
def test_tensorrt_resnet18_feature_vect(): print("downloading sample input") input_data = get_image(url) gluon_resnet18 = vision.resnet18_v2(pretrained=True) gluon_resnet18.hybridize() gluon_resnet18.forward(input_data) gluon_resnet18.export(model_file_name) sym, arg_params, aux_params = mx.model.load_checkpoint(model_file_name, 0) executor = sym.simple_bind(ctx=mx.gpu(), data=batch_shape, grad_req='null', force_rebind=True) executor.copy_params_from(arg_params, aux_params) y = executor.forward(is_train=False, data=input_data) trt_sym = sym.get_backend_symbol('TensorRT') mx.contrib.tensorrt.init_tensorrt_params(trt_sym, arg_params, aux_params) original_precision_value = mx.contrib.tensorrt.get_use_fp16() try: mx.contrib.tensorrt.set_use_fp16(True) executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape, grad_req='null', force_rebind=True) executor.copy_params_from(arg_params, aux_params) y_trt = executor.forward(is_train=False, data=input_data) mx.contrib.tensorrt.set_use_fp16(False) executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape, grad_req='null', force_rebind=True) executor.copy_params_from(arg_params, aux_params) y_trt_fp32 = executor.forward(is_train=False, data=input_data) no_trt_output = y[0].asnumpy()[0] trt_output = y_trt[0].asnumpy()[0] trt_fp32_output = y_trt_fp32[0].asnumpy()[0] assert_almost_equal(no_trt_output, trt_output, 1e-1, 1e-2) assert_almost_equal(no_trt_output, trt_fp32_output, 1e-4, 1e-4) finally: mx.contrib.tensorrt.set_use_fp16(original_precision_value)
def train(symbol_data, train_iterator, valid_iterator, data_column_names, target_names): """Train cnn model Parameters ---------- symbol_data: symbol train_iterator: DataIter Train DataIter valid_iterator: DataIter Valid DataIter data_column_names: list of str Defaults to ('data') for a typical model used in image classification target_names: list of str Defaults to ('softmax_label') for a typical model used in image classification """ devs = mx.cpu() # default setting if args.gpus is not None: for i in args.gpus.split(','): mx.gpu(int(i)) devs = mx.gpu() module = mx.mod.Module(symbol_data, data_names=data_column_names, label_names=target_names, context=devs) module.fit(train_data=train_iterator, eval_data=valid_iterator, eval_metric='acc', kvstore=args.kv_store, optimizer=args.optimizer, optimizer_params={'learning_rate': args.lr}, initializer=mx.initializer.Uniform(0.1), num_epoch=args.num_epochs, batch_end_callback=mx.callback.Speedometer(args.batch_size, args.disp_batches), epoch_end_callback=save_model())
def test_wrapper(*args, **kwargs): try: a = mx.nd.zeros((1,), ctx=mx.gpu(gpu_id)) ctx = mx.gpu(gpu_id) except Exception: ctx = mx.cpu() with ctx: orig_test(*args, **kwargs)
def test_upsampling_with_type(): sym = mx.sym.UpSampling(scale=2, num_filter=2, name='up', sample_type = 'nearest', num_args=1) ctx_list = [{'ctx': mx.gpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float64}}, {'ctx': mx.gpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float32}}, {'ctx': mx.gpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float16}}, {'ctx': mx.cpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float64}}, {'ctx': mx.cpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float32}}] check_consistency(sym, ctx_list)
def test_reshape_with_type(): sym = mx.sym.Reshape(name='reshape', shape=(-1,1,1,0)) ctx_list = [{'ctx': mx.gpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float64}}, {'ctx': mx.gpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float32}}, {'ctx': mx.gpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float16}}, {'ctx': mx.cpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float64}}, {'ctx': mx.cpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float32}}] check_consistency(sym, ctx_list)
def test_blockgrad_with_type(): sym = mx.sym.BlockGrad(name='bg') ctx_list = [{'ctx': mx.gpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float64}}, {'ctx': mx.gpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float32}}, {'ctx': mx.gpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float16}}, {'ctx': mx.cpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float64}}, {'ctx': mx.cpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float32}}] check_consistency(sym, ctx_list)
def test_swapaxis_with_type(): sym = mx.sym.SwapAxis(name='swap', dim1=1) ctx_list = [{'ctx': mx.gpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float64}}, {'ctx': mx.gpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float32}}, {'ctx': mx.gpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float16}}, {'ctx': mx.cpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float64}}, {'ctx': mx.cpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float32}}] check_consistency(sym, ctx_list)
def get_context(args): if args.gpu is None or args.gpu == '': context = [mx.cpu()] elif isinstance(args.gpu, int): context = [mx.gpu(args.gpu)] else: context = [mx.gpu(int(i)) for i in args.gpu] return context
def test_rsp_push_pull_large_rowid(): num_rows = 793470 val = mx.nd.ones((num_rows, 1)).tostype('row_sparse').copyto(mx.gpu()) kv = mx.kv.create('device') kv.init('a', val) out = mx.nd.zeros((num_rows,1), stype='row_sparse').copyto(mx.gpu()) kv.push('a', val) kv.row_sparse_pull('a', out=out, row_ids=mx.nd.arange(0, num_rows, dtype='int64')) assert(out.indices.shape[0] == num_rows)
def push_zeros(kv): for i in range(nrepeat): for j in range(len(keys)): kv.push(keys[j], [mx.nd.zeros(shapes[j], mx.gpu(g)) for g in range(nworker)]) out = [mx.nd.ones(shapes[j], mx.gpu(g)) for g in range(nworker)] kv.pull(keys[j], out=out) exp = np.zeros_like(out[0].asnumpy()) for o in out: assert_almost_equal(o.asnumpy(), exp)
def check_neg(kv, neg, rate, curval): for r in range(nrepeat): curval = curval + rate*nworker*neg for j in range(len(keys)): kv.push(keys[j], [mx.nd.ones(shapes[j], mx.gpu(g))*neg for g in range(nworker)]) out = [mx.nd.ones(shapes[j], mx.gpu(g)) for g in range(nworker)] kv.pull(keys[j], out=out) for o in out: check_diff_to_scalar(o, curval)
def test_embedding_with_type(): sym = mx.sym.Embedding(name='embedding', input_dim=10, output_dim=20) ctx_list = [{'ctx': mx.gpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float64}}, {'ctx': mx.gpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float32}}, {'ctx': mx.gpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float16}}, {'ctx': mx.cpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float64}}, {'ctx': mx.cpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float32}}, {'ctx': mx.cpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float16}}] check_consistency(sym, ctx_list, grad_req={'embedding_data': 'null','embedding_weight': 'write'})
def test_svmoutput_with_type(): sym = mx.sym.SVMOutput(name='svmoutput', use_linear=True) ctx_list = [{'ctx': mx.gpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float64}}, {'ctx': mx.gpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float32}}, {'ctx': mx.gpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float16}}, {'ctx': mx.cpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float64}}, {'ctx': mx.cpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float32}}, {'ctx': mx.cpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float16}}] check_consistency(sym, ctx_list)
def test_deconvolution_with_type(): sym = mx.sym.Deconvolution(num_filter=2, kernel=(3,3), name='deconv') ctx_list = [{'ctx': mx.gpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float64}}, {'ctx': mx.gpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float32}}, {'ctx': mx.gpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float16}}, {'ctx': mx.cpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float64}}, {'ctx': mx.cpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float32}}] check_consistency(sym, ctx_list) check_consistency(sym, ctx_list, grad_req="add")
def test_upsampling_with_type(): sym = mx.sym.UpSampling(scale=2, num_filter=2, name="up", sample_type="nearest", num_args=1) ctx_list = [ {"ctx": mx.gpu(0), "up_arg0": (2, 2, 2, 10), "type_dict": {"up_arg0": np.float64}}, {"ctx": mx.gpu(0), "up_arg0": (2, 2, 2, 10), "type_dict": {"up_arg0": np.float32}}, {"ctx": mx.gpu(0), "up_arg0": (2, 2, 2, 10), "type_dict": {"up_arg0": np.float16}}, {"ctx": mx.cpu(0), "up_arg0": (2, 2, 2, 10), "type_dict": {"up_arg0": np.float64}}, {"ctx": mx.cpu(0), "up_arg0": (2, 2, 2, 10), "type_dict": {"up_arg0": np.float32}}, ] check_consistency(sym, ctx_list)
def test_deconvolution_with_type(): sym = mx.sym.Deconvolution(num_filter=2, kernel=(3, 3), name="deconv") ctx_list = [ {"ctx": mx.gpu(0), "deconv_data": (2, 2, 10, 10), "type_dict": {"deconv_data": np.float64}}, {"ctx": mx.gpu(0), "deconv_data": (2, 2, 10, 10), "type_dict": {"deconv_data": np.float32}}, {"ctx": mx.gpu(0), "deconv_data": (2, 2, 10, 10), "type_dict": {"deconv_data": np.float16}}, {"ctx": mx.cpu(0), "deconv_data": (2, 2, 10, 10), "type_dict": {"deconv_data": np.float64}}, {"ctx": mx.cpu(0), "deconv_data": (2, 2, 10, 10), "type_dict": {"deconv_data": np.float32}}, ] check_consistency(sym, ctx_list)
def train(channel_input_dirs, hyperparameters, hosts, num_gpus, **kwargs): # SageMaker passes num_cpus, num_gpus and other args we can use to tailor training to # the current container environment, but here we just use simple cpu context. ctx = mx.gpu() if num_gpus > 0 else mx.cpu() # retrieve the hyperparameters we set in notebook (with some defaults) batch_size = hyperparameters.get('batch_size', 100) epochs = hyperparameters.get('epochs', 10) learning_rate = hyperparameters.get('learning_rate', 0.1) momentum = hyperparameters.get('momentum', 0.9) log_interval = hyperparameters.get('log_interval', 100) # load training and validation data # we use the gluon.data.vision.MNIST class because of its built in mnist pre-processing logic, # but point it at the location where SageMaker placed the data files, so it doesn't download them again. training_dir = channel_input_dirs['training'] train_data = get_train_data(training_dir + '/train', batch_size) val_data = get_val_data(training_dir + '/test', batch_size) # define the network net = define_network() # Collect all parameters from net and its children, then initialize them. net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) # Trainer is for updating parameters with gradient. if len(hosts) == 1: kvstore = 'device' if num_gpus > 0 else 'local' else: kvstore = 'dist_device_sync' if num_gpus > 0 else 'dist_sync' trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': learning_rate, 'momentum': momentum }, kvstore=kvstore) metric = mx.metric.Accuracy() loss = gluon.loss.SoftmaxCrossEntropyLoss() for epoch in range(epochs): # reset data iterator and metric at begining of epoch. metric.reset() btic = time.time() for i, (data, label) in enumerate(train_data): # Copy data to ctx if necessary data = data.as_in_context(ctx) label = label.as_in_context(ctx) # Start recording computation graph with record() section. # Recorded graphs can then be differentiated with backward. with autograd.record(): output = net(data) L = loss(output, label) L.backward() # take a gradient step with batch_size equal to data.shape[0] trainer.step(data.shape[0]) # update metric at last. metric.update([label], [output]) if i % log_interval == 0 and i > 0: name, acc = metric.get() print('[Epoch %d Batch %d] Training: %s=%f, %f samples/s' % (epoch, i, name, acc, batch_size / (time.time() - btic))) btic = time.time() name, acc = metric.get() print('[Epoch %d] Training: %s=%f' % (epoch, name, acc)) name, val_acc = test(ctx, net, val_data) print('[Epoch %d] Validation: %s=%f' % (epoch, name, val_acc)) return net
def main(): begin_epoch = args.model_load_epoch if args.model_load_epoch else 0 kv = mx.kvstore.create("device") epoch_size = max(int(args.num_examples / args.batch_size / kv.num_workers), 1) resnetv1 = ResNetV1() sym = resnetv1.get_resnet50(num_classes=args.num_classes) devs = mx.cpu() if len( args.gpus) == 0 else [mx.gpu(int(i)) for i in args.gpus.split(",")] print("Using Device {}".format(devs)) train_iter = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_path, "train_shuffle.rec"), path_imglist=os.path.join(args.data_path, "train_shuffle.lst"), data_name='data', label_name='softmax_label', data_shape=(3, 224, 224), batch_size=args.batch_size, rand_crop=True, max_random_scale=288.0 / 256.0, min_random_scale=224.0 / 256.0, max_aspect_ratio=0.25, mean_r=MEAN_COLOR[0], mean_g=MEAN_COLOR[1], mean_b=MEAN_COLOR[2], random_h=20, random_s=40, random_l=50, max_rotate_angle=10, max_shear_ratio=0.1, rand_mirror=True, shuffle=True, num_parts=kv.num_workers, part_index=kv.rank) val_iter = mx.io.ImageRecordIter( path_imgrec=os.path.join(args.data_path, "val.rec"), path_imglist=os.path.join(args.data_path, "val.lst"), data_name='data', label_name='softmax_label', data_shape=(3, 224, 224), batch_size=args.batch_size, rand_crop=False, mean_r=MEAN_COLOR[0], mean_g=MEAN_COLOR[1], mean_b=MEAN_COLOR[2], rand_mirror=False, shuffle=False, num_parts=kv.num_workers, part_index=kv.rank) model = ModuleEXT( context=devs, symbol=sym, data_names=("data", ), label_names=("softmax_label", ), ) arg_params = None aux_params = None if begin_epoch == 0 and args.finetune: arg_params, aux_params = load_checkpoint(args.pretrain_prefix, args.pretrain_epoch) else: # Load Params _, arg_params, aux_params = mx.model.load_checkpoint( args.model_prefix, begin_epoch) initializer = { "default": mx.init.MSRAPrelu(factor_type='in', slope=0), "xavier": mx.init.Xavier(rnd_type='gaussian', factor_type='in', magnitude=2) } optimizer = mx.optimizer.NAG(learning_rate=args.lr, momentum=args.mom, wd=args.wd, lr_scheduler=multi_factor_scheduler( begin_epoch, epoch_size, step=[30, 60, 90], factor=0.1), rescale_grad=1.0 / args.batch_size, sym=sym) checkpoint = mx.callback.module_checkpoint(model, args.model_prefix, save_optimizer_states=True) print("Start to fit the model") model.fit(train_data=train_iter, eval_data=val_iter, eval_metric=[ mx.metric.CrossEntropy(), mx.metric.Accuracy(), mx.metric.TopKAccuracy(5) ], begin_epoch=begin_epoch, num_epoch=args.num_epoch, optimizer=optimizer, arg_params=arg_params, aux_params=aux_params, initializer=initializer, allow_missing=False, kvstore=kv, batch_end_callback=mx.callback.Speedometer( args.batch_size, args.frequent), epoch_end_callback=checkpoint)
# out = [] # for i in range(len(outputs_forward[-1])): # # out[i, :, :] = self.decoder(mx.nd.concat(outputs_forward[-1][0], outputs_backward[-1][0], dim=1)) # out.append(self.decoder(mx.nd.concat(outputs_forward[-1][0], outputs_backward[-1][0], dim=1))) # out2 = mx.nd.stack(*out) forward_out = self.decoder(outputs_forward[-1]) backward_out = self.decoder(outputs_backward[-1]) return (forward_out, backward_out), (states_forward, states_backward) ############################################################################### # Load data ############################################################################### context = [mx.cpu()] if args.gpus is None or args.gpus == "" else \ [mx.gpu(int(i)) for i in args.gpus.split(',')] args.batch_size *= len(context) dataset_name = 'wikitext-2' train_dataset, val_dataset, test_dataset = [nlp.data.WikiText2(segment=segment, bos=None, eos='<eos>', skip_empty=False) for segment in ['train', 'val', 'test']] vocab = nlp.Vocab(nlp.data.Counter(train_dataset[0]), padding_token=None, bos_token=None) train_data, val_data, test_data = [x.bptt_batchify(vocab, args.bptt, args.batch_size, last_batch='keep') for x in [train_dataset, val_dataset, test_dataset]]
dest='force_nms', type=bool, default=True, help='force non-maximum suppression on different class') parser.add_argument('--timer', dest='show_timer', type=bool, default=True, help='show detection time') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() if args.cpu: ctx = mx.cpu() else: ctx = mx.gpu(args.gpu_id) # parse image list image_list = [i.strip() for i in args.images.split(',')] assert len(image_list) > 0, "No valid image specified to detect" detector = get_detector(args.network, args.prefix, args.epoch, args.data_shape, (args.mean_r, args.mean_g, args.mean_b), ctx, args.nms_thresh, args.force_nms) # run detection detector.detect_and_visualize(image_list, args.dir, args.extension, CLASSES, args.thresh, args.show_timer)
def main(docopts): docopts["--batch_size"] = int(docopts["--batch_size"]) docopts["--gpu"] = int(docopts["--gpu"]) docopts["--lambda_l2_reg"] = float(docopts["--lambda_l2_reg"]) docopts["--learning_rate"] = float(docopts["--learning_rate"]) docopts["--max_epochs"] = int(docopts["--max_epochs"]) # Logging logging.basicConfig(level=logging.INFO) # # Following http://nbviewer.jupyter.org/github/dmlc/mxnet/blob/master/example/notebooks/simple_bind.ipynb # X, Y = data.get_mnist() iter = mx.io.NDArrayIter(data=X, label=Y, batch_size=docopts["--batch_size"], shuffle=True) if docopts["train"] or docopts["continue"]: m = vae.VAE(ARCHITECTURE) sym = m.training_model() dbatch = iter.next() exe = sym.simple_bind(ctx=mx.gpu(docopts["--gpu"]), data = dbatch.data[0].shape) args = exe.arg_dict grads = exe.grad_dict outputs = dict(zip(sym.list_outputs(), exe.outputs)) if docopts["continue"]: loaded_args = mx.nd.load(os.path.join(docopts["--log"], "parameters")) for name in args: if name != "data": args[name][:] = loaded_args[name] # Initialize parameters xavier = mx.init.Xavier() for name, nd_array in args.items(): if name != "data": xavier(name, nd_array) optimizer = mx.optimizer.create(name="adam", learning_rate=docopts["--learning_rate"], wd=docopts["--lambda_l2_reg"]) updater = mx.optimizer.get_updater(optimizer) # Train keys = sym.list_arguments() optimizer = mx.optimizer.Adam() if docopts["--visualize"]: # Random image last_image_time = time.time() plt.ion() figure = plt.figure() imshow = plt.imshow(np.random.uniform(size=(28,28)), cmap="gray") for epoch in range(docopts["--max_epochs"]): iter.reset() epoch_start_time = time.time() batch = 0 for dbatch in iter: args["data"][:] = dbatch.data[0] exe.forward(is_train=True) exe.backward() if docopts["--visualize"]: # Throttle refresh ratio if time.time() - last_image_time > 0.1: last_image_time = time.time() imshow.set_data(exe.outputs[2][ random.randint(0, docopts["--batch_size"])].reshape( (28,28)).asnumpy()) figure.canvas.draw() figure.canvas.flush_events() for index, key in enumerate(keys): updater(index=index, grad=grads[key], weight=args[key]) kl_divergence = exe.outputs[3].asnumpy() cross_entropy = exe.outputs[4].asnumpy() logging.info("Batch %d: %f mean kl_divergence", batch, kl_divergence.mean()) logging.info("Batch %d: %f mean cross_entropy", batch, cross_entropy.mean()) batch += 1 logging.info("Finish training epoch %d in %f seconds", epoch, time.time() - epoch_start_time) # Save model parameters (including data, to simplify loading / binding) mx.nd.save(os.path.join(docopts["--log"], "parameters"), {x[0]: x[1] for x in args.items() if x[0] != "data"}) elif docopts["test"]: from matplotlib.widgets import Button m = vae.VAE(ARCHITECTURE) sym = m.testing_model() exe = sym.simple_bind(ctx=mx.gpu(docopts["--gpu"]), data=(docopts["--batch_size"], ARCHITECTURE[-1])) args = exe.arg_dict grads = exe.grad_dict outputs = dict(zip(sym.list_outputs(), exe.outputs)) loaded_args = mx.nd.load(os.path.join(docopts["--log"], "parameters")) for name in args: if name != "data": args[name][:] = loaded_args[name] args["data"][:] = np.random.randn(docopts["--batch_size"], ARCHITECTURE[-1]) exe.forward(is_train=True) # testing_model has only 1 output batch = exe.outputs[0].asnumpy().reshape(-1, 28, 28) np.save(os.path.join(docopts["--log"], "output"), batch) imshow = plt.imshow(batch[0], cmap="gray") callback = Index(imshow, batch) axnext = plt.axes([0.8, 0.7, 0.1, 0.075]) axprev = plt.axes([0.8, 0.6, 0.1, 0.075]) next_button = Button(axnext, 'Next') next_button.on_clicked(callback.next) prev_button = Button(axprev, 'Previous') prev_button.on_clicked(callback.prev) plt.show() plt.waitforbuttonpress()
args = vars(ap.parse_args()) le = pickle.loads(open(config.LABEL_ENCODER_PATH, "rb").read()) testIter = mx.io.ImageRecordIter(path_imgrec=config.TEST_MX_REC, data_shape=(3, 224, 224), batch_size=config.BATCH_SIZE, mean_r=config.R_MEAN, mean_g=config.G_MEAN, mean_b=config.B_MEAN) print("[INFO] Loading pre-trained model") checkpointsPath = os.path.sep.join([args["checkpoint"], args["prefix"]]) (symbol, argParams, auxParams) = mx.model.load_checkpoint(checkpointsPath, args["epoch"]) model = mx.mod.Module(symbol=symbol, context=[mx.gpu(0)]) model.bind(data_shapes=testIter.provide_data, label_shapes=testIter.provide_label) model.set_params(argParams, auxParams) print("[INFO] evaluating model...") predictions = [] targets = [] for (preds, _, batch) in model.iter_predict(testIter): preds = preds[0].asnumpy() labels = batch.label[0].asnumpy().astype("int") predictions.extend(preds) targets.extend(labels) targets = targets[:len(predictions)] (rank1, rank5) = rank5_accuracy(predictions, targets)
logging.getLogger().setLevel(logging.INFO) mnist = mx.test_utils.get_mnist() print mnist['train_data'].shape batch_size = 500 train_data = np.concatenate((mnist['train_data'], mnist['train_data'], mnist['train_data']), axis=1) val_data = np.concatenate((mnist['test_data'], mnist['test_data'], mnist['test_data']), axis=1) train_iter = mx.io.NDArrayIter(train_data, mnist['train_label'], batch_size, shuffle=True) val_iter = mx.io.NDArrayIter(val_data, mnist['test_label'], batch_size) shufflenet = get_shufflenet() shufflenet_mod = mx.mod.Module(symbol=shufflenet, context=[mx.gpu(0), mx.gpu(1)]) shufflenet_mod.fit(train_iter, eval_data=val_iter, optimizer='sgd', optimizer_params={'learning_rate':0.01}, eval_metric='acc', batch_end_callback = mx.callback.Speedometer(batch_size, 20), num_epoch=10)
import mxnet as mx import gzip, logging import pdb, pickle from mxnet import gluon from mxnet.gluon import nn from mxnet import autograd import numpy as np batchSize = 100 classNum = 10 ctx = mx.gpu() verbose = False trainIter = mx.io.MNISTIter(batch_size=batchSize, image='data/train-images-idx3-ubyte', label='data/train-labels-idx1-ubyte') validIter = mx.io.MNISTIter(batch_size=batchSize, image='data/t10k-images-idx3-ubyte', label='data/t10k-labels-idx1-ubyte') class DEMONET(nn.HybridBlock): def __init__(self, outputNum, verbose=False, **kwargs): super(DEMONET, self).__init__(**kwargs) self.verbose = verbose with self.name_scope(): self.flatten = nn.Flatten() self.fc1 = nn.Dense(512) self.fc2 = nn.Dense(256) #self.conv1 = nn.Conv2D(channels=10,kernel_size=3,strides=2,padding=1)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 os.environ['BETA'] = str(args.beta) data_dir_list = args.data_dir.split(',') assert len(data_dir_list) == 1 data_dir = data_dir_list[0] path_imgrec = None path_imglist = None prop = face_image.load_property(data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert (args.num_classes > 0) print('num_classes', args.num_classes) path_imgrec = os.path.join(data_dir, "train.rec") if args.loss_type == 1 and args.num_classes > 20000: args.beta_freeze = 5000 args.gamma = 0.06 print('Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained) == 0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) if args.network[0] == 's': data_shape_dict = {'data': (args.per_batch_size, ) + data_shape} spherenet.init_weights(sym, data_shape_dict, args.num_layers) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context=ctx, symbol=sym, ) val_dataiter = None train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, ) if args.loss_type < 10: _metric = AccMetric() else: _metric = LossValueMetric() eval_metrics = [mx.metric.create(_metric)] if args.network[0] == 'r' or args.network[0] == 'y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) som = 20 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], model, args.batch_size, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps) == 0: lr_steps = [40000, 60000, 80000] if args.loss_type >= 1 and args.loss_type <= 7: lr_steps = [100000, 140000, 160000] p = 512.0 / args.batch_size for l in xrange(len(lr_steps)): lr_steps[l] = int(lr_steps[l] * p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] for _lr in lr_steps: if mbatch == args.beta_freeze + _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.verbose == 0: acc_list = ver_test(mbatch) save_step[0] += 1 msave = save_step[0] do_save = False if len(acc_list) > 0: lfw_score = acc_list[0] if lfw_score > highest_acc[0]: highest_acc[0] = lfw_score if lfw_score >= 0.998: do_save = True if acc_list[-1] >= highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score >= 0.99: do_save = True if args.ckpt == 0: do_save = False elif args.ckpt > 1: do_save = True if do_save: print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if mbatch <= args.beta_freeze: _beta = args.beta else: move = max(0, mbatch - args.beta_freeze) _beta = max( args.beta_min, args.beta * math.pow(1 + args.gamma * move, -1.0 * args.power)) #print('beta', _beta) os.environ['BETA'] = str(_beta) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) epoch_cb = None model.fit( train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore='device', optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
if args.amp: amp.init() if args.horovod: if hvd is None: raise SystemExit( "Horovod not found, please check if you installed it correctly." ) hvd.init() # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) # training contexts if args.horovod: ctx = [mx.gpu(hvd.local_rank())] else: ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] # network net_name = '_'.join(('yolo3', args.network, args.dataset)) args.save_prefix += net_name # use sync bn if specified if args.syncbn and len(ctx) > 1: net = get_model(net_name, pretrained_base=True, norm_layer=gluon.contrib.nn.SyncBatchNorm, norm_kwargs={'num_devices': len(ctx)}) async_net = get_model(net_name, pretrained_base=False) # used by cpu worker
from training.training_src.networks.utils import sort_states_for_snake_id import numpy as np import mxnet as mx from collections import namedtuple ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu() def remove_borders_from_state(state, map_size): ''' Helper function to remove the -1 borders from the state representation ''' if -1 in state: y, x = map_size return state[int(y / 2):-int(y / 2), int(x / 2):-int(x / 2), :] else: return state def convert_food_maxtrix_to_list(in_array): ''' Helper function that converts a food matrix into a list of coordinates containing food Parameters: ---------- in_array: np.array of size [map_size[0], map_size[1], :] Return: ------- food: [{"x": int, "y": int}]
spatial_scale = 0.5 num_class = 11 num_cls = 10 channel_len = 64 feature_len = 3136 + 8 + num_cls * (num_cls + 1) + num_cls * channel_len label_len = 6 rpn_prefix = "model_vgg16/VGG16" finetune_prefix = "model_finetune/finetune" rpn_epoch = 813001 finetune_epoch = 201 logger = logging.getLogger() logger.setLevel(logging.INFO) ctx = mx.gpu(2) rgb_mean = np.array([123.68, 116.779, 103.939]) file_class = "file/class.txt" file_mface = "file/model3d.txt" path_dataset = "/home/yunzhu/face/AFW/testimages/" MAX_FACE_NUM = 50 num_proposal = 300 nms_iou = 0.5 nms_ios = 0.6 iou_self_threshold = 0.8 keep_ratio = 0.6666667
def __init__(self, prefix, epoch, ctx_id=0, network='net3', nms=0.4, nocrop=False, decay4=0.5, vote=False): self.ctx_id = ctx_id self.network = network self.decay4 = decay4 self.nms_threshold = nms self.vote = vote self.nocrop = nocrop self.debug = False self.fpn_keys = [] self.anchor_cfg = None pixel_means = [0.0, 0.0, 0.0] pixel_stds = [1.0, 1.0, 1.0] pixel_scale = 1.0 self.preprocess = False _ratio = (1., ) fmc = 3 if network == 'ssh' or network == 'vgg': pixel_means = [103.939, 116.779, 123.68] self.preprocess = True elif network == 'net3': _ratio = (1., ) elif network == 'net3a': _ratio = (1., 1.5) elif network == 'net6': #like pyramidbox or s3fd fmc = 6 elif network == 'net5': #retinaface fmc = 5 elif network == 'net5a': fmc = 5 _ratio = (1., 1.5) elif network == 'net4': fmc = 4 elif network == 'net4a': fmc = 4 _ratio = (1., 1.5) else: assert False, 'network setting error %s' % network if fmc == 3: self._feat_stride_fpn = [32, 16, 8] self.anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 4: self._feat_stride_fpn = [32, 16, 8, 4] self.anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '4': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 6: self._feat_stride_fpn = [128, 64, 32, 16, 8, 4] self.anchor_cfg = { '128': { 'SCALES': (32, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '64': { 'SCALES': (16, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '32': { 'SCALES': (8, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (4, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '4': { 'SCALES': (1, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 5: self._feat_stride_fpn = [64, 32, 16, 8, 4] self.anchor_cfg = {} _ass = 2.0**(1.0 / 3) _basescale = 1.0 for _stride in [4, 8, 16, 32, 64]: key = str(_stride) value = { 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 } scales = [] for _ in range(3): scales.append(_basescale) _basescale *= _ass value['SCALES'] = tuple(scales) self.anchor_cfg[key] = value print(self._feat_stride_fpn, self.anchor_cfg) for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) dense_anchor = False #self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._anchors_fpn = dict( zip( self.fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=self.anchor_cfg))) for k in self._anchors_fpn: v = self._anchors_fpn[k].astype(np.float32) self._anchors_fpn[k] = v self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) #self._bbox_pred = nonlinear_pred #self._landmark_pred = landmark_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) if self.ctx_id >= 0: self.ctx = mx.gpu(self.ctx_id) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) else: self.ctx = mx.cpu() self.nms = cpu_nms_wrapper(self.nms_threshold) self.pixel_means = np.array(pixel_means, dtype=np.float32) self.pixel_stds = np.array(pixel_stds, dtype=np.float32) self.pixel_scale = float(pixel_scale) print('means', self.pixel_means) self.use_landmarks = False if len(sym) // len(self._feat_stride_fpn) == 3: self.use_landmarks = True print('use_landmarks', self.use_landmarks) if self.debug: c = len(sym) // len(self._feat_stride_fpn) sym = sym[(c * 0):] self._feat_stride_fpn = [32, 16, 8] print('sym size:', len(sym)) image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params)
model_name = args.bert_model dataset_name = args.bert_dataset only_predict = args.only_predict model_parameters = args.model_parameters pretrained_bert_parameters = args.pretrained_bert_parameters if pretrained_bert_parameters and model_parameters: raise ValueError('Cannot provide both pre-trained BERT parameters and ' 'BertForQA model parameters.') lower = args.uncased epochs = args.epochs batch_size = args.batch_size test_batch_size = args.test_batch_size lr = args.lr ctx = mx.cpu() if args.gpu is None else mx.gpu(args.gpu) accumulate = args.accumulate log_interval = args.log_interval * accumulate if accumulate else args.log_interval if accumulate: log.info('Using gradient accumulation. Effective batch size = {}'.format( accumulate * batch_size)) optimizer = args.optimizer warmup_ratio = args.warmup_ratio version_2 = args.version_2 null_score_diff_threshold = args.null_score_diff_threshold max_seq_length = args.max_seq_length doc_stride = args.doc_stride
from __future__ import print_function import numpy as np import mxnet as mx from mxnet import nd, autograd, gluon import dataloader from sklearn.model_selection import train_test_split from matplotlib import pyplot as plt from datetime import datetime #Set Contexts ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu() data_ctx = ctx model_ctx = ctx #load the data num_inputs = 784 batch_size = 64 num_instances = 60000 data = dataloader.DataLoader() train_data,train_labels = data.load_data() test_data,test_labels = data.load_data(mode = 'test') X_train, X_val, y_train, y_val = train_test_split(train_data, train_labels, test_size=0.30, random_state=42) train_data = [] for index,data in enumerate(X_train): temp = y_train[index] train_data.append((data,temp)) num_instances = len(train_data) val_data = []
#!/usr/bin/env python import mxnet as mx print(mx.test_utils.list_gpus()) print(mx.gpu()) a = mx.nd.ones((2, 3), mx.gpu()) b = a * 2 + 1 print(b)
def prepare(self, ctx_id, nms=0.4, fix_image_size=None): pos = self.param_file.rfind('-') prefix = self.param_file[0:pos] pos2 = self.param_file.rfind('.') epoch = int(self.param_file[pos + 1:pos2]) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) if ctx_id >= 0: ctx = mx.gpu(ctx_id) else: ctx = mx.cpu() model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) if fix_image_size is not None: data_shape = (1, 3) + fix_image_size else: data_shape = (1, 3) + self.default_image_size model.bind(data_shapes=[('data', data_shape)]) model.set_params(arg_params, aux_params) #warmup data = mx.nd.zeros(shape=data_shape) db = mx.io.DataBatch(data=(data, )) model.forward(db, is_train=False) out = model.get_outputs()[0].asnumpy() self.model = model self.nms_threshold = nms _ratio = (1., ) fmc = 3 if self.rac == 'net3': _ratio = (1., ) elif network == 'net5': #retinaface fmc = 5 else: assert False, 'rac setting error %s' % self.rac if fmc == 3: self._feat_stride_fpn = [32, 16, 8] self.anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 5: self._feat_stride_fpn = [64, 32, 16, 8, 4] self.anchor_cfg = {} _ass = 2.0**(1.0 / 3) _basescale = 1.0 for _stride in [4, 8, 16, 32, 64]: key = str(_stride) value = { 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 } scales = [] for _ in range(3): scales.append(_basescale) _basescale *= _ass value['SCALES'] = tuple(scales) self.anchor_cfg[key] = value print(self._feat_stride_fpn, self.anchor_cfg) self.use_landmarks = False if len(sym) // len(self._feat_stride_fpn) == 3: self.use_landmarks = True print('use_landmarks', self.use_landmarks) self.fpn_keys = [] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) self._anchors_fpn = dict( zip(self.fpn_keys, generate_anchors_fpn(cfg=self.anchor_cfg))) for k in self._anchors_fpn: v = self._anchors_fpn[k].astype(np.float32) self._anchors_fpn[k] = v self.anchor_plane_cache = {} if fix_image_size is None: self.anchor_plane_cache = None self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
def main(): opt = parse_args() print(opt) # Garbage collection, default threshold is (700, 10, 10). # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading. gc.set_threshold(100, 5, 5) # set env num_gpus = opt.num_gpus batch_size = opt.batch_size batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) # get model classes = opt.num_classes model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, num_segments=opt.num_segments) net.cast(opt.dtype) net.collect_params().reset_ctx(context) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if opt.resume_params is not '' and not opt.use_pretrained: net.load_parameters(opt.resume_params, ctx=context) print('Pre-trained model %s is successfully loaded.' % (opt.resume_params)) else: print('Pre-trained model is successfully loaded from the model zoo.') # get data if opt.ten_crop: transform_test = transforms.Compose([ video.VideoTenCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) elif opt.three_crop: transform_test = transforms.Compose([ video.VideoThreeCrop(opt.input_size), video.VideoToTensor(), video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: transform_test = video.VideoGroupValTransform( size=opt.input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if opt.dataset == 'ucf101': val_dataset = ucf101.classification.UCF101( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) elif opt.dataset == 'kinetics400': val_dataset = kinetics400.classification.Kinetics400( setting=opt.val_list, root=opt.data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step, target_width=opt.input_size, target_height=opt.input_size, test_mode=True, num_segments=opt.num_segments, transform=transform_test) else: logger.info('Dataset %s is not supported yet.' % (opt.dataset)) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch=int(opt.prefetch_ratio * num_workers), batchify_fn=tsn_mp_batchify_fn, last_batch='discard') print('Load %d test samples.' % len(val_dataset)) start_time = time.time() acc_top1_val, acc_top5_val = test(context, val_data, opt, net) end_time = time.time() print('Test accuracy: acc-top1=%f acc-top5=%f' % (acc_top1_val * 100, acc_top5_val * 100)) print('Total evaluation time is %4.2f minutes' % ((end_time - start_time) / 60))
parser.add_argument('--frequent', dest="frequent", help="frequency of logging", default=20, type=int) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() # choose ctx if args.cpu: ctx = mx.cpu() else: ctx = [mx.gpu(int(i)) for i in args.gpu_id.split(',')] # parse # classes and class_names if applicable num_class = args.num_class if len(args.class_names) > 0: if os.path.isfile(args.class_names): # try to open it to read class names with open(args.class_names, 'r') as f: class_names = [l.strip() for l in f.readlines()] else: class_names = [c.strip() for c in args.class_names.split(',')] assert len(class_names) == num_class for name in class_names: assert len(name) > 0 else: class_names = None
def train_object_detection(args, reporter): # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) # training contexts ctx = [mx.gpu(i) for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()] if args.meta_arch == 'yolo3': net_name = '_'.join((args.meta_arch, args.net, 'custom')) kwargs = {} elif args.meta_arch == 'faster_rcnn': net_name = '_'.join(('custom', args.meta_arch, 'fpn')) kwargs = { 'base_network_name': args.net, 'short': 600, 'max_size': 1000, 'nms_thresh': 0.5, 'nms_topk': -1, 'min_stage': 2, 'max_stage': 6, 'post_nms': -1, 'roi_mode': 'align', 'roi_size': (7, 7), 'strides': (4, 8, 16, 32, 64), 'clip': 4.14, 'rpn_channel': 256, 'base_size': 16, 'scales': (2, 4, 8, 16, 32), 'ratios': (0.5, 1, 2), 'alloc_size': (384, 384), 'rpn_nms_thresh': 0.7, 'rpn_train_pre_nms': 12000, 'rpn_train_post_nms': 2000, 'rpn_test_pre_nms': 6000, 'rpn_test_post_nms': 1000, 'rpn_min_size': 1, 'per_device_batch_size': args.batch_size // args.num_gpus, 'num_sample': 512, 'pos_iou_thresh': 0.5, 'pos_ratio': 0.25, 'max_num_gt': 100 } else: raise NotImplementedError(args.meta_arch, 'is not implemented.') args.save_prefix += net_name # use sync bn if specified if args.syncbn and len(ctx) > 1: net = gcv.model_zoo.get_model( net_name, classes=args.dataset.get_classes(), pretrained_base=True, transfer=args.transfer, norm_layer=gluon.contrib.nn.SyncBatchNorm, norm_kwargs={'num_devices': len(ctx)}, **kwargs) if not args.reuse_pred_weights: net.reset_class(args.dataset.get_classes(), reuse_weights=None) async_net = gcv.model_zoo.get_model(net_name, classes=args.dataset.get_classes(), pretrained_base=True, transfer=args.transfer, **kwargs) if not args.reuse_pred_weights: async_net.reset_class(args.dataset.get_classes(), reuse_weights=None) else: net = gcv.model_zoo.get_model(net_name, classes=args.dataset.get_classes(), pretrained_base=True, transfer=args.transfer, **kwargs) if not args.reuse_pred_weights: net.reset_class(args.dataset.get_classes(), reuse_weights=None) async_net = net if args.resume.strip(): net.load_parameters(args.resume.strip()) async_net.load_parameters(args.resume.strip()) else: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") net.initialize() async_net.initialize() # training data train_dataset, eval_metric = args.dataset.get_dataset_and_metric() if args.meta_arch == 'yolo3': train_data, val_data = get_dataloader(async_net, train_dataset, None, args.data_shape, args.batch_size, args.num_workers, args) elif args.meta_arch == 'faster_rcnn': train_data, val_data = get_faster_rcnn_dataloader( net, train_dataset, None, FasterRCNNDefaultTrainTransform, FasterRCNNDefaultValTransform, args.batch_size, args.num_gpus, args) # training train(net, train_data, val_data, eval_metric, ctx, args, reporter, args.final_fit) if args.final_fit: return {'model_params': collect_params(net)}
def check_quantize_model(qdtype): if is_test_for_native_cpu(): print( 'skipped testing test_quantize_model_with_forward for native cpu since it is not supported yet' ) return elif qdtype == 'int8' and is_test_for_mkldnn(): print( 'skipped testing test_quantize_model_with_forward for mkldnn cpu int8 since it is not supported yet' ) return elif qdtype == 'uint8' and is_test_for_gpu(): print( 'skipped testing test_quantize_model_with_forward for gpu uint8 since it is not supported yet' ) return def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params( qsym, params, th_dict={}) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape): mod = mx.mod.Module(symbol=qsym, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.set_params(qarg_params, qaux_params) data = [ mx.random.uniform(-1.0, 1.0, shape=shape) for _, shape in mod.data_shapes ] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() sym = get_fp32_residual() batch_size = 4 data_shape = (batch_size, 4, 10, 10) label_shape = (batch_size, 10) length = batch_size # specify num of outputs from split op msym = get_fp32_sym_with_multiple_outputs(length) msym_label_shape = (length, 10) msym_data_shape = (length, 4, 4, 10, 10) for s, dshape, lshape in zip((sym, msym), (data_shape, msym_data_shape), (label_shape, msym_label_shape)): mod = Module(symbol=s) mod.bind(data_shapes=[('data', dshape)], label_shapes=[('softmax_label', lshape)]) mod.init_params() arg_params, aux_params = mod.get_params() excluded_names = [] if mx.current_context() == mx.cpu(): excluded_names += ['fc', 'conv1'] if mx.current_context() == mx.gpu(): excluded_names += ['relu0', 'relu1'] excluded_names += ['concat'] optional_names = ['pool0'] for skip_optional_names in [False, True]: exclude_sym_names = [] if skip_optional_names: excluded_sym_names = excluded_names else: excluded_sym_names = excluded_names + optional_names qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_forward(qsym, qarg_params, qaux_params, dshape, lshape) calib_data = mx.nd.random.uniform(shape=dshape) calib_data = NDArrayIter(data=calib_data, batch_size=batch_size) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype) check_qsym_forward(qsym, qarg_params, qaux_params, dshape, lshape)
def load_resnet18_v1_yolo(): return cv.model_zoo.get_model('yolo3_resnet18_v1_voc', pretrained=False, pretrained_base=True, ctx=mx.gpu())
name2, loss2 = smoothl1_metric.get() name3, loss3 = acc_metric.get() logger.info('[Epoch %d] Training cost: %f, %s=%f, %s=%f, %s=%f'%( epoch, (time.time()-tic), name1, loss1, name2, loss2, name3, loss3)) map_name, mean_ap = validate(net, val_data, ctx, classes) val_msg = '\n'.join(['%s=%f'%(k, v) for k, v in zip(map_name, mean_ap)]) logger.info('[Epoch %d] Validation: \n%s'%(epoch, val_msg)) save_params(net, best_map, mean_ap[-1], epoch, args.save_interval, args.save_prefix) if __name__ == '__main__': args = parse_args() # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) # training contexts ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] # training data train_dataset, val_dataset = get_dataset(args.dataset) train_data, val_data = get_dataloader( train_dataset, val_dataset, args.data_shape, args.batch_size, args.num_workers) classes = train_dataset.classes # class names # network net_name = '_'.join(('ssd', str(args.data_shape), args.network, args.dataset)) net = get_model(net_name, pretrained_base=True) if args.resume.strip(): net.load_params(args.resume.strip()) # training
import math import mxnet as mx import numpy as np from almond import LatentModel, VAEEncoder, VAEDecoder, ConditionalNormal # Number of GPUs for computation num_gpus = 1 ctx = [mx.gpu(i) for i in range(num_gpus)] # Otherwise, use CPU # ctx = [mx.cpu()] # Generate data def gen_mu_exp_prior(n, scale=1.0): mu = np.random.exponential(scale=scale, size=n) x = mu + np.random.randn(n) return mu, x # Data np.random.seed(123) mx.random.seed(123) n = 1000 exp_scale = 2.0 mu, x = gen_mu_exp_prior(n, exp_scale) xt = mx.nd.array(x).reshape(-1, 1) # Model model = LatentModel(ConditionalNormal(dimu=1), encoder=VAEEncoder([1, 10], latent_dim=1), decoder=VAEDecoder([10, 1], latent_dim=1, npar=1),
# -*- coding: utf-8 -*- import sys sys.path.insert(0, '.') import mxnet as mx # from mxnet import nd ctx = mx.gpu(4) def test_1(): a = mx.nd.array([3, 2, 3, 4, 5]) a = a.expand_dims(axis=0) b = mx.nd.tile(a, reps=(11, 1)) b = b.expand_dims(axis=0).expand_dims(axis=0) b = mx.nd.tile(b, reps=(2, 2, 1, 1)) c = mx.nd.RightLeftPooling(b) print('in: ', b) print('out: ', c) print('shape of c: ', c) def test_2(): a = mx.nd.array([3, 2, 3, 4, 5]) a = a.expand_dims(axis=0) b = mx.nd.tile(a, reps=(5, 1)) b = b.expand_dims(axis=0).expand_dims(axis=0) # b = mx.nd.tile(b, reps=(1, 2, 1, 1)) data = mx.sym.Variable('data')
parser.add_argument('--optimizer', default='adagrad', help='optimizer (default: adagrad)') parser.add_argument('--seed', default=123, type=int, help='random seed (default: 123)') parser.add_argument('--use-gpu', action='store_true', help='whether to use GPU.') opt = parser.parse_args() logging.info(opt) context = [mx.gpu(0) if opt.use_gpu else mx.cpu()] rnn_hidden_size, sim_hidden_size, num_classes = 150, 50, 5 optimizer = opt.optimizer.lower() mx.random.seed(opt.seed) np.random.seed(opt.seed) random.seed(opt.seed) batch_size = opt.batch_size # read dataset if os.path.exists('dataset.cPickle'): with open('dataset.cPickle', 'rb') as f: train_iter, dev_iter, test_iter, vocab = cPickle.load(f) else:
def main(args): # load and preprocess dataset data = load_data(args) if args.gpu >= 0: ctx = mx.gpu(args.gpu) else: ctx = mx.cpu() if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i, i) for i in range(len(data.graph))]) train_nid = mx.nd.array(np.nonzero(data.train_mask)[0]).astype( np.int64).as_in_context(ctx) test_nid = mx.nd.array(np.nonzero(data.test_mask)[0]).astype( np.int64).as_in_context(ctx) features = mx.nd.array(data.features).as_in_context(ctx) labels = mx.nd.array(data.labels).as_in_context(ctx) train_mask = mx.nd.array(data.train_mask).as_in_context(ctx) val_mask = mx.nd.array(data.val_mask).as_in_context(ctx) test_mask = mx.nd.array(data.test_mask).as_in_context(ctx) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.sum().asscalar() n_val_samples = val_mask.sum().asscalar() n_test_samples = test_mask.sum().asscalar() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = DGLGraph(data.graph, readonly=True) g.ndata['features'] = features num_neighbors = args.num_neighbors degs = g.in_degrees().astype('float32').as_in_context(ctx) norm = mx.nd.expand_dims(1. / degs, 1) g.ndata['norm'] = norm # Create sampler receiver sampler = dgl.contrib.sampling.SamplerReceiver(graph=g, addr=args.ip, num_sender=args.num_sender) model = GCNSampling(in_feats, args.n_hidden, n_classes, args.n_layers, mx.nd.relu, args.dropout, prefix='GCN') model.initialize(ctx=ctx) loss_fcn = gluon.loss.SoftmaxCELoss() infer_model = GCNInfer(in_feats, args.n_hidden, n_classes, args.n_layers, mx.nd.relu, prefix='GCN') infer_model.initialize(ctx=ctx) # use optimizer print(model.collect_params()) trainer = gluon.Trainer(model.collect_params(), 'adam', { 'learning_rate': args.lr, 'wd': args.weight_decay }, kvstore=mx.kv.create('local')) # initialize graph dur = [] for epoch in range(args.n_epochs): idx = 0 for nf in sampler: print("epoch: %d, subgraph: %d" % (epoch, idx)) idx += 1 nf.copy_from_parent() # forward with mx.autograd.record(): pred = model(nf) batch_nids = nf.layer_parent_nid(-1).astype( 'int64').as_in_context(ctx) batch_labels = labels[batch_nids] loss = loss_fcn(pred, batch_labels) loss = loss.sum() / len(batch_nids) loss.backward() trainer.step(batch_size=1) infer_params = infer_model.collect_params() for key in infer_params: idx = trainer._param2idx[key] trainer._kvstore.pull(idx, out=infer_params[key].data()) num_acc = 0. for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size, g.number_of_nodes(), neighbor_type='in', num_hops=args.n_layers + 1, seed_nodes=test_nid): nf.copy_from_parent() pred = infer_model(nf) batch_nids = nf.layer_parent_nid(-1).astype('int64').as_in_context( ctx) batch_labels = labels[batch_nids] num_acc += (pred.argmax(axis=1) == batch_labels).sum().asscalar() print("Test Accuracy {:.4f}".format(num_acc / n_test_samples))
dest='set_cfg_list', help='Set the configuration fields from command line', default=None, nargs=argparse.REMAINDER) return arg_parser.parse_args() if __name__ == '__main__': args = parser() update_config(args.cfg) if args.set_cfg_list: update_config_from_list(args.set_cfg_list) context = [mx.gpu(int(gpu)) for gpu in config.gpus.split(',')] nGPUs = len(context) batch_size = nGPUs * config.TRAIN.BATCH_IMAGES if not os.path.isdir(config.output_path): os.mkdir(config.output_path) # The following is just to make sure the code reproduces # results in the paper after default scale settings are changed to resolution-based # However, new scale settings should lead to similar results if config.dataset.dataset == 'coco' and config.dataset.NUM_CLASSES == 81: # Change the scales to what we used in the paper for reproducibility config.TRAIN.SCALES = (3.0, 1.667, 512.0) # Create roidb image_sets = [iset for iset in config.dataset.image_set.split('+')]
fin.close() if __name__ == '__main__': list_file = 'sample.lst' save_dir = 'extract_map_rgb_seg_0815-v2' if not os.path.isdir(save_dir): os.mkdir(save_dir) resize = 144 crop_size = 128 data_type = 'rgb' prefix = "rgb_soft_mask_0815-v2" dataset = 'market' # tag = '_full' # to indicate the mat files epoch_idx = 300 context = mx.gpu(0) symbol, arg_params, aux_params = mx.model.load_checkpoint( "../models/%s/%s" % (dataset, prefix), epoch_idx) delta_sigmoid = symbol.get_internals()['delta_sigmoid_output'] if data_type == 'rgbm': data_shape = (4, crop_size * 2, crop_size) else: data_shape = (3, crop_size * 2, crop_size) data_shapes = [('data', (1, data_shape[0], crop_size * 2, crop_size))] data_names = ['data'] model = mx.mod.Module(symbol=delta_sigmoid, context=context,
epoch, speed, elapsed) if __name__ == '__main__': args = parse_args() # Initialize Horovod hvd.init() # Disable CUDA if there are no GPUs. if not mx.test_utils.list_gpus(): args.no_cuda = True # Horovod: pin context to local rank ctx = [mx.cpu(hvd.local_rank()) ] if args.no_cuda else [mx.gpu(hvd.local_rank())] per_worker_batch_size = args.batch_size // hvd.size() net = get_model('alexnet') net.hybridize() # set up logger logging.basicConfig(level=logging.INFO) logging.info(args) # Training data train_data = get_synthetic_dataloader(per_worker_batch_size, args.num_workers) train(net, train_data, per_worker_batch_size, ctx, logging, args)