def main(): batchsize = args.batch_size if args.gpus is '' else \ args.batch_size / len(args.gpus.split(',')) print 'batchsize is ', batchsize # define network structure net = get_symbol(batchsize) # load data train_img_list = './data/train_correct.txt' val_img_list = './data/test_correct.txt' train = PrefetchDataIter(train_img_list, batch_size=args.batch_size, is_color=True, root_dir="/home/donny/112x96/") val = PrefetchDataIter(val_img_list, batch_size=args.batch_size, is_color=True, root_dir="/home/donny/112x96/") # train, val = mnist_iterator(batch_size=args.batch_size, input_shape=data_shape) # train # ctx = mx.gpu(1) # mod = mx.mod.Module(net, context = ctx, data_names = ('data',), label_names = ('softmax_label', 'center_label',)) # mod.bind(data_shapes=train.provide_data, # label_shapes=train.provide_label) # mod.fit(train, eval_data=val, # optimizer_params={'learning_rate':0.01, 'momentum': 0.9}, num_epoch=30) print 'training model ...' train_model.fit(args, net, (train, val), data_shape)
def main(): batchsize = args.batch_size if args.gpus is '' else \ args.batch_size / len(args.gpus.split(',')) print 'batchsize is ', batchsize # define network structure net = get_symbol(batchsize) # load data train, val = mnist_iterator(batch_size=args.batch_size, input_shape=data_shape) # train print 'training model ...' train_model.fit(args, net, (train, val), data_shape)
def run(): net = get_mlp() # train train_model.fit(args, net, get_iterator(data_shape))
def __callback(param): if param.nbatch % every_n_batch == 0: (free, total) = cuda.mem_get_info() logging.info(' GPU Memory: %.2f%%' % (100.0 * free / total)) return __callback ################################################################################ print("*" * 80) print(" WITHOUT mirroring") print("*" * 80) # train train_model.fit(args, net, get_iterator, batch_end_callback=report_gpu_memory()) ################################################################################ print("*" * 80) print(" WITH mirroring via attributes") print("*" * 80) # train train_model.fit(args, net_mirrored, get_iterator, batch_end_callback=report_gpu_memory()) ################################################################################ import os
train = mx.io.ImageRecordIter( path_imgrec = args.data_dir + "Train-FromourList-multiLabel.bin",#.bin file mean_img = args.data_dir + "mean-train-multiLabel.bin", data_shape = data_shape, batch_size = args.batch_size, rand_crop = True, rand_mirror = True, num_parts = kv.num_workers, part_index = kv.rank, path_imglist="/Path-to-training-list.txt", label_width=3090) val = mx.io.ImageRecordIter( path_imgrec = args.data_dir + "Validation.bin", mean_img = args.data_dir + "mean-val-multiLabel.bin", rand_crop = False, rand_mirror = False, data_shape = data_shape, batch_size = args.batch_size, num_parts = kv.num_workers, part_index = kv.rank, path_imglist="/Path-to-val-list.txt", label_width=3090) return (train, val) # train train_model.fit(args, net, get_iterator)#call the train_model.py
help='the prefix of the model to save') parser.add_argument('--num-epochs', type=int, default=10, help='the number of training epochs') parser.add_argument('--load-epoch', type=int, help="load the model on an epoch using the model-prefix") parser.add_argument('--kv-store', type=str, default='local', help='the kvstore type') parser.add_argument('--lr-factor', type=float, default=1, help='times the lr with a factor for every lr-factor-epoch epoch') parser.add_argument('--lr-factor-epoch', type=float, default=1, help='the number of epoch to factor the lr, could be .5') return parser.parse_args() if __name__ == '__main__': args = parse_args() use_caffe_loss = args.caffe_loss if args.network == 'mlp': data_shape = (784, ) net = get_mlp() else: data_shape = (1, 28, 28) net = get_lenet() # train if use_caffe_loss: train_model.fit(args, net, get_iterator(data_shape), mx.metric.Caffe()) else: train_model.fit(args, net, get_iterator(data_shape))
parser.add_argument( '--lr-factor-epoch', type=float, default=1, help='the number of epoch to factor the lr, could be .5') return parser.parse_args() if __name__ == '__main__': args = parse_args() use_caffe_loss = args.caffe_loss use_caffe_data = args.caffe_data data_shape = () if args.network == 'mlp': data_shape = (784, ) net = get_mlp() elif args.network == 'lenet': if not use_caffe_data: data_shape = (1, 28, 28) net = get_lenet() else: net = get_network_from_json_file(args.network) # train if use_caffe_loss: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.gluon.metric.Caffe()) else: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))
def please_train(args, net): # parser = get_parser() # args = parser.parse_args(args) train_model.fit(args, net, get_iterator)
parser.add_argument('--batch-size', type=int, default=128, help='the batch size') parser.add_argument('--lr', type=float, default=.0001, help='the initial learning rate') parser.add_argument('--model-prefix', type=str, help='the prefix of the model to load/save') parser.add_argument('--save-model-prefix', type=str, help='the prefix of the model to save') parser.add_argument('--num-epochs', type=int, default=10, help='the number of training epochs') parser.add_argument('--load-epoch', type=int, help="load the model on an epoch using the model-prefix") parser.add_argument('--kv-store', type=str, default='local', help='the kvstore type') parser.add_argument('--lr-factor', type=float, default=1, help='times the lr with a factor for every lr-factor-epoch epoch') parser.add_argument('--lr-factor-epoch', type=float, default=1, help='the number of epoch to factor the lr, could be .5') return parser.parse_args() if __name__ == '__main__': args = parse_args() if args.network == 'lenet': data_shape = (1,128,128) net = get_lenet() # train train_model.fit(args, net, get_iterator(data_shape))
parser.add_argument('--kv-store', type=str, default='local', help='the kvstore type') parser.add_argument('--lr-factor', type=float, default=1, help='times the lr with a factor for every lr-factor-epoch epoch') parser.add_argument('--lr-factor-epoch', type=float, default=1, help='the number of epoch to factor the lr, could be .5') return parser.parse_args() if __name__ == '__main__': args = parse_args() use_caffe_loss = args.caffe_loss use_caffe_data = args.caffe_data data_shape = () if args.network == 'mlp': data_shape = (784, ) net = get_mlp() elif args.network == 'lenet': if not use_caffe_data: data_shape = (1, 28, 28) net = get_lenet() else: net = get_network_from_json_file(args.network) # train if use_caffe_loss: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe()) else: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))
d_ff=hp["d_ff"], dropout=0.1).to(device) pooling = SelfAttentionPooling(d_m, dropout=0.1).to(device) model = Transformer(encoder, pooling, d_m, label_shape, dropout=0.2).to(device) opt = torch.optim.Adam(model.parameters(), lr=hp["lr"], weight_decay=hp["weight_decay"]) loss_func = torch.nn.CrossEntropyLoss() best_eer = 99. if hp["comet"]: with experiment.train(): for epoch in tqdm(range(epochs)): cce_loss = fit(model, loss_func, opt, train_ds_gen, device) experiment.log_metric("cce", cce_loss, epoch=epoch) val_eer = test(model, val_ds_gen, val_utt, val_pwd, val_trial, device, tta=val_tta) experiment.log_metric("val eer", val_eer, epoch=epoch) if float(val_eer) < best_eer: print("New best EER: %f" % float(val_eer)) best_eer = float(val_eer) with experiment.test():
args.data_dir, args.train_dataset), mean_r=123.68, mean_g=116.779, mean_b=103.939, data_shape=data_shape, batch_size=args.batch_size, rand_crop=True, rand_mirror=True, num_parts=kv.num_workers, part_index=kv.rank) val = mx.io.ImageRecordIter(path_imgrec=os.path.join( args.data_dir, args.val_dataset), mean_r=123.68, mean_g=116.779, mean_b=103.939, rand_crop=False, rand_mirror=False, data_shape=data_shape, batch_size=args.batch_size, num_parts=kv.num_workers, part_index=kv.rank) return (train, val) if args.benchmark: train_model.fit(args, net, get_sythentic_data_iter) else: train_model.fit(args, net, get_iterator)
test = mx.io.ImageRecordIter( path_imgrec = "data/" + args.region + "-Fold" + `args.fold` + "-test.rec", data_shape = data_shape, batch_size = args.batch_size, rand_crop = False, rand_mirror = False, shuffle = False, num_parts = kv.num_workers, part_index = kv.rank ) return (train, test) # train trainedModel = train_model.fit(args, net, get_iterator) trainedModel.save("cratersTrained-" + args.region + "-Fold" + `args.fold`) # #test # test = mx.io.ImageRecordIter( # path_imgrec = "data/" + args.region + "-Fold" + `args.fold` + "-test.rec", # data_shape = data_shape, # batch_size = 10, # rand_crop = False, # rand_mirror = False, # shuffle = True # ) # # model = mx.model.FeedForward.load(args.model_prefix + "-0", 5)
def train(args): load_num = int(args.load_num) data_record_dir = args.data_record_dir data_record_dir = os.path.join(data_record_dir, cfgs.DATASET_NAME) log_dir = args.log_path gpu_list = args.gpu_list batch_size = args.batch_size #*****************************************************************set log logger = logging.getLogger() fh = logging.FileHandler( os.path.join( log_dir, time.strftime('%F-%T', time.localtime()).replace(':', '-') + '.log')) fh.setLevel(logging.DEBUG) # ch = logging.StreamHandler() # ch.setLevel(logging.INFO) logger.addHandler(fh) # logger.addHandler(ch) #*********************************************************get train model if gpu_list is None: devs = mx.cpu(0) else: #devs = [mx.gpu(int(i)) for i in range(len(gpu_list.split(',')))] devs = mx.gpu(0) #logging.info("use gpu list: ",devs) ''' sym,arg_param,aux_param = load_model(load_num) net,new_arg,new_aux = get_layer_output(sym,arg_param,aux_param,'flatten') net_load = load_parms(net,new_arg,new_aux,devs) ''' #model_prefix = cfgs.MODEL_PREFIX #assert model_prefix is not None #model_prefix = os.path.join(args.model_dir,cfgs.DATASET_NAME,model_prefix) #net_train = gluon.SymbolBlock.imports(model_prefix+'-symbol.json', ['data'], model_prefix+'-'+'%04d' % load_num +'.params') #mobilenetv20_features_pool0_fwd net_train = get_symbol(devs) net_train = get_pretrained_layer(net_train, 'resnetv10_pool1_fwd') sigmoid_layer = add_layer(devs) net_train = graph(net_train, sigmoid_layer) net_train.hybridize() #net_train.summary(nd.zeros((1, 3, 224, 224),ctx=mx.cpu(0))) #*******************************************************************load data train_rec_path = os.path.join(data_record_dir, 'train.rec') val_rec_path = os.path.join(data_record_dir, 'test.rec') train_dataiter = FaceImageIter( batch_size=batch_size, data_shape=(3, 112, 112), path_imgrec=train_rec_path, shuffle=True, cutoff=0, ) train_dataiter = mx.io.PrefetchingIter(train_dataiter) train_loader = DataIterLoader(train_dataiter) val_dataiter = FaceImageIter( batch_size=batch_size, data_shape=(3, 112, 112), path_imgrec=val_rec_path, shuffle=True, cutoff=0, ) val_dataiter = mx.io.PrefetchingIter(val_dataiter) val_loader = DataIterLoader(val_dataiter) #******************************************************************train fit(net_train,train_loader,val_loader,\ ctx=devs, epoch=args.epochs, save_epoch=args.save_weight_period, load_epoch=load_num, learning_rate=args.lr, batch_size=batch_size, model_dir=args.model_dir)
help="load the model on an epoch using the model-prefix") parser.add_argument('--kv-store', type=str, default='local', help='the kvstore type') parser.add_argument( '--lr-factor', type=float, default=1, help='times the lr with a factor for every lr-factor-epoch epoch') parser.add_argument( '--lr-factor-epoch', type=float, default=1, help='the number of epoch to factor the lr, could be .5') return parser.parse_args() if __name__ == '__main__': args = parse_args() if args.network == 'mlp': data_shape = (784, ) net = get_mlp() else: data_shape = (1, 28, 28) net = get_lenet() # train train_model.fit(args, net, get_iterator(data_shape))
mean_g=128, mean_b=128, scale=0.0078125, max_aspect_ratio=0.35, data_shape=data_shape, batch_size=args.batch_size, rand_crop=True, rand_mirror=True, ) # validate data iterator val = mx.io.ImageRecordIter( path_imgrec=args.data_dir + "va.rec", mean_r=128, mean_b=128, mean_g=128, scale=0.0078125, rand_crop=False, rand_mirror=False, data_shape=data_shape, batch_size=args.batch_size, ) return (train, val) # train tic = time.time() train_model.fit(args, net, get_iterator) print "time elapsed to train model", time.time() - tic
import pycuda.driver as cuda import logging def report_gpu_memory(every_n_batch=50): def __callback(param): if param.nbatch % every_n_batch == 0: (free, total) = cuda.mem_get_info() logging.info(' GPU Memory: %.2f%%' % (100.0*free / total)) return __callback ################################################################################ print("*" * 80) print(" WITHOUT mirroring") print("*" * 80) # train train_model.fit(args, net, get_iterator, batch_end_callback=report_gpu_memory()) ################################################################################ print("*" * 80) print(" WITH mirroring via attributes") print("*" * 80) # train train_model.fit(args, net_mirrored, get_iterator, batch_end_callback=report_gpu_memory()) ################################################################################ import os os.environ['MXNET_BACKWARD_DO_MIRROR'] = '1' print("*" * 80) print(" WITH mirroring via environment variable") print("*" * 80)
data_shape = (3, args.data_shape, args.data_shape) train = mx.io.ImageRecordIter(path_imgrec=os.path.join( args.data_dir, args.train_dataset), mean_r=123.68, mean_g=116.779, mean_b=103.939, data_shape=data_shape, batch_size=args.batch_size, rand_crop=True, rand_mirror=True, num_parts=kv.num_workers, part_index=kv.rank) val = mx.io.ImageRecordIter(path_imgrec=os.path.join( args.data_dir, args.val_dataset), mean_r=123.68, mean_g=116.779, mean_b=103.939, rand_crop=False, rand_mirror=False, data_shape=data_shape, batch_size=args.batch_size, num_parts=kv.num_workers, part_index=kv.rank) return (train, val) # train train_model.fit(args, net, get_iterator)
args = parser.parse_args() #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> from symbol_factory import symbol_factory as _sf from optimizer_factory import optimizer_factory as _of sf = _sf() of = _of() #check dirs if "hdfs:" in args.model_dir: print "making dir on hdfs" child = subprocess.Popen("hdfs dfs -mkdir " + args.model_dir, shell=True) return_code = child.wait() if return_code != 0: raise Exception("hdfs model dir exist or make dir error, abort") else: try: print "making dir on local server" os.mkdirs(args.model_dir) except: raise Exception("local model dir exist or make dir error, abort") #main code net_cmds = open("./.net_config").read() opt_cmds = open("./.opt_config").read() net = sf(net_cmds) opt_name, opt_params = of(opt_cmds) import train_model as train_model train_model.fit(args, net, opt_name, opt_params)