def main(config): with open('ECG_TRAIN.arff') as f: train = a2p.load(f) with open('ECG_TEST.arff') as f: test = a2p.load(f) #Merge dataset df = train.append(test) #shuffling data frame by sampling with frac=1 df = df.sample(frac=1.0) CLASS_NORMAL = 1 class_names = ['Normal','R on T','PVC','SP','UB'] normal_df = df[df.target == str(CLASS_NORMAL)].drop(labels='target', axis=1) #We'll merge all other classes and mark them as anomalies: anomaly_df = df[df.target != str(CLASS_NORMAL)].drop(labels='target', axis=1) #We'll split the normal examples into train, validation and test sets: train_df, val_df = train_test_split( normal_df, test_size=0.15, random_state=101 ) val_df, test_df = train_test_split( val_df, test_size=0.33, random_state=101 ) test_normal_dataset, seq_len, _ = create_dataset(test_df) test_anomaly_dataset, _, _ = create_dataset(anomaly_df)
def main(): train_images, test_images, train_labels, test_labels = dataset.train_test_data( './PetImages_resize', batch_size) train_size = len(train_labels) test_size = len(test_labels) train_dataset = dataset.create_dataset(train_images, train_labels) train_dataset = train_dataset.cache().shuffle( buffer_size=train_size).batch(batch_size).repeat( num_epoch).make_one_shot_iterator().get_next() test_dataset = dataset.create_dataset(test_images, test_labels) test_dataset = test_dataset.cache().shuffle( buffer_size=10).batch(test_size).make_one_shot_iterator().get_next() with tf.Session() as sess: model = DCGAN(sess, train_dataset=train_dataset, test_dataset=test_dataset, train_size=train_size, test_size=test_size, batch_size=batch_size, num_epoch=num_epoch) model.build_model() model.intialize_variables() #model.create_image_from_generator() model.train()
def run(args): ms.context.set_context(mode=ms.context.GRAPH_MODE, device_target=args.device, save_graphs=False) ds_train = create_dataset( data_path=os.path.join(args.data_path, "train"), batch_size=args.batch_size, repeat_num=1, # ) ds_test = create_dataset( data_path=os.path.join(args.data_path, "test"), batch_size=args.batch_size, repeat_num=1, ) step_size = ds_train.get_dataset_size() print('step_size: %d' % (step_size)) net = LeNet5(num_class=10, num_channel=1) model = build_model(args, net) if args.run_train: train(args, model, ds_train) if args.run_test: checkpoint_steps = [(i + 1) * 100 for i in range(12)] checkpoints = [ "%s-1_%d.ckpt" % (args.ckpt_prefix, n) for n in checkpoint_steps ] ckpt_dir = get_ckpt_dir(args) for ckpt_name in checkpoints: test(net, model, ds_test, os.path.join(ckpt_dir, ckpt_name)) break
def main(): train_set = create_dataset(N) test_set = create_dataset(N) df_ws = DataFrame() # 多項式近似の曲線を求めて表示 fig = plt.figure() for c, m in enumerate(M): f, ws, sigma = resolve(train_set, m) df_ws = df_ws.append(Series(ws, name="M = %d" % m)) subplot = fig.add_subplot(2, 2, c + 1) subplot.set_xlim(-0.05, 1.05) subplot.set_ylim(-1.5, 1.5) subplot.set_title("M = %d" % m) # トレーニングセットを表示 subplot.scatter(train_set.x, train_set.y, marker='o', color='blue', label=None) # 真の曲線を表示 linex = np.linspace(0, 1, 101) liney = np.sin(2 * np.pi * linex) subplot.plot(linex, liney, color='green', linestyle='--') # 多項式近似の曲線を表示 linex = np.linspace(0, 1, 101) liney = f(linex) label = "Sigma = %.2f" % sigma subplot.plot(linex, liney, color='red', label=label) subplot.plot(linex, liney + sigma, color='red', linestyle='--') subplot.plot(linex, liney - sigma, color='red', linestyle='--') subplot.legend(loc=1) # p97 図3.5 # fig.savefig("out/031-p97_fig3.5.png") # 多項式近似に対する最大対数尤度を計算 df = DataFrame() train_mlh = [] test_mlh = [] for m in range(0, 9): # 多項式の次数 f, ws, sigma = resolve(train_set, m) train_mlh.append(log_likelihood(train_set, f)) test_mlh.append(log_likelihood(test_set, f)) df = pd.concat([ df, DataFrame(train_mlh, columns=['Training set']), DataFrame(test_mlh, columns=['Test set']) ], axis=1) df.plot(title='Log likelihood for N = %d' % N, grid=True, style=['-', '--']) # p98 図3.6 # plt.savefig("out/031-p98_fig3.6.png") plt.show()
def run(args): ms.context.set_context( mode=ms.context.GRAPH_MODE, device_target=args.device, save_graphs=False, ) net = LeNet5( num_class=10, num_channel=3, use_bn=args.use_bn, dbg_log_tensor=args.log_tensor, ) loss = ms.nn.loss.SoftmaxCrossEntropyWithLogits( sparse=True, reduction='mean', ) opt = build_optimizer(args, net) if args.mode == 'init': save_checkpoint( net, ckpt_file_name=os.path.join('seeds', '%d.ckpt' % (time.time())), ) if args.mode == 'train': ds_train = create_dataset( args=args, data_path=os.path.join(args.data_path, 'train'), batch_size=args.device_batch_size, ) if args.init_ckpt: print('using init checkpoint %s' % (args.init_ckpt)) load_ckpt(net, args.init_ckpt) train(args, net, loss, opt, ds_train) if args.mode == 'test': if args.use_kungfu: rank = kfops.kungfu_current_rank() if rank > 0: return ds_test = create_dataset( args=args, data_path=os.path.join(args.data_path, 'test'), batch_size=args.device_batch_size, ) if args.ckpt_files: checkpoints = args.ckpt_files.split(',') else: checkpoint_dir = get_ckpt_dir(args) print('checkpoint_dir: %s' % (checkpoint_dir)) checkpoints = list(sorted(glob.glob(checkpoint_dir + '/*.ckpt'))) print('will test %d checkpoints' % (len(checkpoints))) # for i, n in enumerate(checkpoints): # print('[%d]=%s' % (i, n)) test(args, net, loss, opt, ds_test, checkpoints)
def resnet50_train(args_opt): device_id = 0 device_num = 1 epoch_size = args_opt.epoch_size batch_size = 32 class_num = 10 loss_scale_num = 1024 local_data_path = '/home/share/dataset/cifar-10-batches-bin/' # your cifar10 path # set graph mode and parallel mode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) context.set_context(device_id=device_id) if device_num > 1: context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) init() local_data_path = os.path.join(local_data_path, str(device_id)) # data download print('Download data.') mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, repeat_num=1, batch_size=batch_size) eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False, repeat_num=1, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size() print('Create dataset success.') # create model net = resnet50(class_num = class_num) # reduction='mean' means that apply reduction of mean to loss loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size)) opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num) loss_scale = FixedLossScaleManager(loss_scale_num, False) # amp_level="O2" means that the hybrid precision of O2 mode is used for training # the whole network except that batchnoram will be cast into float16 format and dynamic loss scale will be used # 'keep_batchnorm_fp32 = False' means that use the float16 format model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) # define performance callback to show ips and loss callback to show loss for every epoch performance_cb = PerformanceCallback(batch_size) loss_cb = LossMonitor() cb = [performance_cb, loss_cb] print(f'Start run training, total epoch: {epoch_size}.') model.train(epoch_size, train_dataset, callbacks=cb) if device_num == 1 or device_id == 0: print(f'=================================Start run evaluation.=================================') output = model.eval(eval_dataset) print(f'Evaluation result: {output}.')
def generate_training_dataset(): src_dir = 'train_raw' dst_dir = training_dir res_width = 320 res_height = 240 patch_width = 128 perturb_size = 32 batch_size = 64 dataset.create_dataset(src_dir, dst_dir, res_height, res_width, patch_width, perturb_size, batch_size)
def generate_test_dataset(): src_dir = 'test_raw' dst_dir = test_dir res_height = 480 res_width = 640 patch_width = 256 perturb_size = 64 batch_size = 64 dataset.create_dataset(src_dir, dst_dir, res_height, res_width, patch_width, perturb_size, batch_size)
def resnet50_train(args_opt): epoch_size = args_opt.epoch_size batch_size = 32 class_num = 10 loss_scale_num = 1024 local_data_path = '/cache/data' # set graph mode and parallel mode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) context.set_context(enable_task_sink=True, device_id=device_id) context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) if device_num > 1: context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) local_data_path = os.path.join(local_data_path, str(device_id)) # data download print('Download data.') mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, repeat_num=epoch_size, batch_size=batch_size) eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False, repeat_num=1, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size() print('Create dataset success.') # create model net = resnet50(class_num = class_num) loss = SoftmaxCrossEntropyWithLogits(sparse=True) lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size)) opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num) loss_scale = FixedLossScaleManager(loss_scale_num, False) model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) # define performance callback to show ips and loss callback to show loss for every epoch performance_cb = PerformanceCallback(batch_size) loss_cb = LossMonitor() cb = [performance_cb, loss_cb] print(f'Start run training, total epoch: {epoch_size}.') model.train(epoch_size, train_dataset, callbacks=cb) if device_num == 1 or device_id == 0: print(f'Start run evaluation.') output = model.eval(eval_dataset) print(f'Evaluation result: {output}.')
def run(args): ms.context.set_context( mode=ms.context.GRAPH_MODE, device_target=args.device, save_graphs=False, ) net = LeNet5( num_class=10, num_channel=3, use_bn=args.use_bn, ) loss = ms.nn.loss.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = build_optimizer(args, net) if args.mode == 'init': save_checkpoint( net, ckpt_file_name=os.path.join('seeds', '%d.ckpt' % (time.time())), ) if args.mode == 'train': ds_train = create_dataset( data_path=os.path.join(args.data_path, 'train'), batch_size=args.device_batch_size, ) if args.init_ckpt: print('using init checkpoint %s' % (args.init_ckpt)) load_ckpt(net, args.init_ckpt) train(args, net, loss, opt, ds_train) if args.mode == 'test': ds_test = create_dataset( data_path=os.path.join(args.data_path, 'test'), batch_size=args.device_batch_size, ) if args.ckpt_files: checkpoints = args.ckpt_files.split(',') else: steps = [10, 20, 30, 40] checkpoints = [get_ckpt_file_name(args, i) for i in steps] print('will test %d checkpoints' % (len(checkpoints))) # for i, n in enumerate(checkpoints): # print('[%d]=%s' % (i, n)) test(args, net, loss, opt, ds_test, checkpoints)
def main(): with open('ECG_TRAIN.arff') as f: train = a2p.load(f) with open('ECG_TEST.arff') as f: test = a2p.load(f) #Merge dataset df = train.append(test) #shuffling data frame by sampling with frac=1 df = df.sample(frac=1.0) CLASS_NORMAL = 1 class_names = ['Normal', 'R on T', 'PVC', 'SP', 'UB'] normal_df = df[df.target == str(CLASS_NORMAL)].drop(labels='target', axis=1) #We'll merge all other classes and mark them as anomalies: anomaly_df = df[df.target != str(CLASS_NORMAL)].drop(labels='target', axis=1) #We'll split the normal examples into train, validation and test sets: train_df, val_df = train_test_split(normal_df, test_size=0.15, random_state=101) val_df, test_df = train_test_split(val_df, test_size=0.33, random_state=101) train_dataset, seq_len, n_features = create_dataset(train_df) val_dataset, _, _ = create_dataset(val_df) test_normal_dataset, _, _ = create_dataset(test_df) test_anomaly_dataset, _, _ = create_dataset(anomaly_df) if torch.cuda.device_count() >= 1: print('Model pushed to {} GPU(s), type {}.'.format( torch.cuda.device_count(), torch.cuda.get_device_name(0))) model = model.cuda() else: raise ValueError('CPU training is not supported') model = RecurrentAutoencoder(seq_len, n_features, 128) model = model.to(device) model, history = train_model(model, train_dataset, val_dataset, n_epochs=150)
def country_based_on(): base = request.args.get('base') target = request.args.get('target') field = request.args.get('field') train_data = datasetMaker.create_dataset([base]) train_data = train_data[[field+base]] predi_data = datasetMaker.create_dataset([target]) predi_data = predi_data[[field+target]] lstm = LstmModel(lag=5) fake_data = lstm.predict_dataset(train_data,predi_data, field, target) datasetMaker.write_to_csv(fake_data, "tmpOn") return send_from_directory(".","tmpOn.csv", as_attachment=True)
def train_net(network, model, args, ckpoint_cb, sink_mode): """Define the training method.""" print("============== Starting Training ==============") # load training dataset ds_train = create_dataset(os.path.join(args.data_dir, "train"), args.batch_size, args.repeat_size) callbacks = [ # ckpoint_cb, LossMonitor(per_print_times=20), ] if args.use_kungfu: if args.use_kungfu_elastic: from kungfu_mindspore_callbacks import KungFuElasticCallback schedule = { 10: 2, 20: 3, 30: 4, 40: 1, 50: 2, 60: 3, 70: 4, 80: 1, } kungfu_elastic_callback = KungFuElasticCallback(schedule) callbacks.append(kungfu_elastic_callback) log_callbacks(callbacks) print('sink_mode: %s' % (sink_mode)) model.train(args.epoch_size, ds_train, callbacks=callbacks, dataset_sink_mode=sink_mode)
def test_method(gpu): os.environ["CUDA_VISIBLE_DEVICES"] = gpu config = TestConfig() # config.checkpoints_dir = "/media/data2/xyz_data/CelebA_full/full_third_2019-6-19_0.9135_ckp" print("{} model was initialized".format(config.model_name)) # dataset is test set or val config.isTest = True dataset = create_dataset(config=config) model = create_model(config) for j in range(0, 102, 1): config.load_iter = j model.setup() model.clear_precision() if config.eval: model.eval() dataset_size = len(dataset) print("test dataset len: %d " % dataset_size) total_iter = int(dataset_size / config.batch_size) model.set_validate_size(dataset_size) # fc_feature = np.zeros((dataset_size, 2048)) # label = np.zeros((dataset_size, 40)) for i, data in enumerate(dataset): model.set_input(data) print("[%s/%s]" % (i, total_iter)) model.test() print(model.get_model_precision()) print(model.get_model_class_balance_precision()) print("mean accuracy: {}".format(torch.mean(model.get_model_precision()))) print("class_balance accuracy: {}".format(torch.mean(model.get_model_class_balance_precision())))
def run_experiment(data_file_path, data_name, use_tvt=True): logging.info('Working on estimations with {} from {}.'.format( data_name, data_file_path)) export_dir = os.path.join(output_dir, data_name) if not os.path.isdir(export_dir): logging.info('Creating directory {}.'.format(export_dir)) os.makedirs(export_dir) logging.info("Processing the data.") input_df = pd.read_feather(data_file_path) my_dataset = create_dataset(input_df, use_tvt) data_export_path = os.path.join(export_dir, 'data.pkl') pickle.dump(my_dataset, open(data_export_path, 'wb')) logging.info('Wrote dataset to {}.'.format(data_export_path)) logging.info("Setting up the models for experiment.") my_models = get_candidate_models() my_experiment = EvalExperiment( models=[my_models[x] for x in my_models.keys()], model_names=list(my_models.keys()), dataset=my_dataset, req_freq=10, max_iter=2000 if not smoke_test else 2, record_param_freq=10) logging.info("Running experiment.") my_experiment.train_and_save(export_dir=export_dir, auto_stop=True, stop_threshold=0.0005) logging.info("Making predictions.") my_experiment.make_prediction(export_dir=export_dir)
def eval_quant(): context.set_context(mode=context.GRAPH_MODE, device_target=device_target) cfg = quant_cfg ds_eval = create_dataset(os.path.join(data_path, "test"), cfg.batch_size, 1) ckpt_path = './ckpt_lenet_quant-10_937.ckpt' # define fusion network network = LeNet5Fusion(cfg.num_classes) # convert fusion network to quantization aware network quantizer = QuantizationAwareTraining(quant_delay=0, bn_fold=False, freeze_bn=10000, per_channel=[True, False], symmetric=[True, False]) network = quantizer.quantize(network) # define loss net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") # define network optimization net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) # call back and monitor model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) # load quantization aware network checkpoint param_dict = load_checkpoint(ckpt_path) not_load_param = load_param_into_net(network, param_dict) if not_load_param: raise ValueError("Load param into net fail!") print("============== Starting Testing ==============") acc = model.eval(ds_eval, dataset_sink_mode=True) print("============== {} ==============".format(acc)) assert acc['Accuracy'] > 0.98
def test_mobilenetv2_quant(): set_seed(1) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") config = config_ascend_quant print("training configure: {}".format(config)) epoch_size = config.epoch_size # define network network = mobilenetV2(num_classes=config.num_classes) # define loss if config.label_smooth > 0: loss = CrossEntropyWithLabelSmooth( smooth_factor=config.label_smooth, num_classes=config.num_classes) else: loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # define dataset dataset = create_dataset(dataset_path=dataset_path, config=config, repeat_num=1, batch_size=config.batch_size) step_size = dataset.get_dataset_size() # convert fusion network to quantization aware network quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False]) network = quantizer.quantize(network) # get learning rate lr = Tensor(get_lr(global_step=config.start_epoch * step_size, lr_init=0, lr_end=0, lr_max=config.lr, warmup_epochs=config.warmup_epochs, total_epochs=epoch_size + config.start_epoch, steps_per_epoch=step_size)) # define optimization opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum, config.weight_decay) # define model model = Model(network, loss_fn=loss, optimizer=opt) print("============== Starting Training ==============") monitor = Monitor(lr_init=lr.asnumpy(), step_threshold=config.step_threshold) callback = [monitor] model.train(epoch_size, dataset, callbacks=callback, dataset_sink_mode=False) print("============== End Training ==============") export_time_used = 650 train_time = monitor.step_mseconds print('train_time_used:{}'.format(train_time)) assert train_time < export_time_used expect_avg_step_loss = 2.32 avg_step_loss = np.mean(np.array(monitor.losses)) print("average step loss:{}".format(avg_step_loss)) assert avg_step_loss < expect_avg_step_loss
def calibration(): """ do the calibration to get the scale offset record file""" dataset = create_dataset( dataset_path=ARGS_OPT.eval_dataset, do_train=False, batch_size=config.batch_size, # pylint: disable=no-member target=ARGS_OPT.device_target) dataset = dataset.take(1) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') network = resnet(10) network.set_train(False) param_dict = load_checkpoint(ARGS_OPT.pre_trained) load_param_into_net(network, param_dict) input_data = np.random.uniform(0.0, 1.0, size=[32, 3, 224, 224]).astype(np.float32) config_file = os.path.join(CUR_DIR, './config.json') amct.create_quant_config(config_file, network, input_data) calibration_network = amct.quantize_model(config_file, network, input_data) model = Model(calibration_network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) _ = model.eval(dataset) amct.save_model('./resnet50_quant_calibration', calibration_network, input_data)
def main(): """ main function """ os.environ["DEVICE_NUM"] = "1" os.environ["RANK_ID"] = "0" target = 'Ascend' context.set_context(mode=context.GRAPH_MODE, device_target=target) # step1: create_dataset for evaluation, prepare the input data # and initialize the network, load the pretrained checkpoint to the network. # Ensure that the network before quant_resnet50 is proper functioning. dataset = create_dataset(dataset_path=ARGS_OPT.dataset_path, do_train=False, batch_size=32, target=target) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') dataset = dataset.take(1) input_shape = [32, 3, 224, 224] class_num = 10 input_data = np.random.uniform(0.0, 1.0, size=input_shape).astype(np.float32) network = resnet50(class_num) param_dict = load_checkpoint(ARGS_OPT.checkpoint_path) load_param_into_net(network, param_dict) network.set_train(False) quant_resnet50(network, dataset, loss, input_data)
def main(): model = create_U_net(INPUT_WIDTH,INPUT_HEIGHT,IMG_CHANNEL,LEARNING_RATE) data_info = create_dataset( train_image_path = TRAIN_IMAGES, train_mask_path = TRAIN_MASK, test_image_path = TEST_IMAGES, test_mask_path = TEST_MASK, img_height = INPUT_HEIGHT, img_width = INPUT_WIDTH, img_channels = IMG_CHANNEL, shear = SHEAR, rotation = ROTATION, zoom = ZOOM, width_shift = WIDTH_SHIFT, height_shift = HEIGHT_SHIFT, split_ratio = SPLIT_RATIO, batch_size = BATCH_SIZE, rand_seed = RAND_SEED) train_generator = data_info["train_generator"] validation_generator = data_info["validation_generator"] test_images = data_info["test_images"] test_masks = data_indo["test_labels"] results = model.fit_generator(train_generator,steps_per_epoch=7, validation_data=val_generator, validation_steps=2, epochs=100) save_model(model,SAVING_NAME,SAVING_PATH) plot_metrics(results)
def train_lenet_quant(): context.set_context(mode=context.GRAPH_MODE, device_target=device_target) cfg = quant_cfg ckpt_path = './ckpt_lenet_noquant-10_1875.ckpt' ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1) step_size = ds_train.get_dataset_size() # define fusion network network = LeNet5Fusion(cfg.num_classes) # load quantization aware network checkpoint param_dict = load_checkpoint(ckpt_path) load_nonquant_param_into_quant_net(network, param_dict) # convert fusion network to quantization aware network network = quant.convert_quant_network(network, quant_delay=900, bn_fold=False, per_channel=[True, False], symmetric=[False, False]) # define network loss net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") # define network optimization net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) # call back and monitor config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpt_callback = ModelCheckpoint(prefix="ckpt_lenet_quant", config=config_ckpt) # define model model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) print("============== Starting Training ==============") model.train(cfg['epoch_size'], ds_train, callbacks=[ckpt_callback, LossMonitor()], dataset_sink_mode=True) print("============== End Training ==============")
def create_data_loader(name, mode, mean_var=None, batch_size=32, shuffle=True, num_workers=32): dataset = create_dataset(name)
def create_training_dataset(batch_size, increment_dataset, repeat): """Create the dataset of training... It creates a variable named dataset_size, it is the number of registers to be taken each training. An operation is defined to increment 10 times the dataset_size, so in each new training, the dataset_size increments N times and the number of registers to be taken will be greater... Args: batch_size: Size of the batch of the training dataset. increment_dataset: The number of registers to increment each training. Ex: First train: 10 registers. Next train: 20 registers. increment_dataset = 10 repeat: Number of times to repeat the registers of the dataset. Returns: A dictionary with the variables: dataset: The dataset for training. dataset_resize_op: The resize operation to increment the number of registers to take each training. """ dataset_size = tf.Variable(0, dtype=tf.int64) dataset_resize_op = dataset_size.assign( tf.add(dataset_size, increment_dataset)) complete_dataset = create_dataset( "./datasets/normalizado/pnode06_03000_train.txt" ) # Loads all training dataset trainable_dataset = complete_dataset.take(dataset_size).repeat(repeat) shuffled_dataset = trainable_dataset.shuffle(dataset_size) batched_dataset = shuffled_dataset.batch(batch_size) return {"dataset": batched_dataset, "dataset_resize_op": dataset_resize_op}
def ding_test(cfg:BaseConfigByEpoch, net=None, val_dataloader=None, show_variables=False, convbuilder=None, init_hdf5=None, ): with Engine() as engine: engine.setup_log( name='test', log_dir='./', file_name=DETAIL_LOG_FILE) if net is None: net = get_model_fn(cfg.dataset_name, cfg.network_type) if convbuilder is None: convbuilder = ConvBuilder(base_config=cfg) model = net(cfg, convbuilder).cuda() if val_dataloader is None: val_dataloader = create_dataset(cfg.dataset_name, cfg.dataset_subset, batch_size=cfg.global_batch_size) val_iters = 50000 // cfg.global_batch_size if cfg.dataset_name == 'imagenet' else 10000 // cfg.global_batch_size print('NOTE: Data prepared') print('NOTE: We have global_batch_size={} on {} GPUs, the allocated GPU memory is {}'.format(cfg.global_batch_size, torch.cuda.device_count(), torch.cuda.memory_allocated())) criterion = get_criterion(cfg).cuda() engine.register_state( scheduler=None, model=model, optimizer=None, cfg=cfg) if show_variables: engine.show_variables() if engine.distributed: print('Distributed training, engine.world_rank={}'.format(engine.world_rank)) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[engine.world_rank], broadcast_buffers=False, ) # model = DistributedDataParallel(model, delay_allreduce=True) elif torch.cuda.device_count() > 1: print('Single machine multiple GPU training') model = torch.nn.parallel.DataParallel(model) if cfg.init_weights: engine.load_checkpoint(cfg.init_weights, just_weights=True) if init_hdf5: engine.load_hdf5(init_hdf5) model.eval() eval_dict, _ = run_eval(val_dataloader, val_iters, model, criterion, 'TEST', dataset_name=cfg.dataset_name) val_top1_value = eval_dict['top1'].item() val_top5_value = eval_dict['top5'].item() val_loss_value = eval_dict['loss'].item() msg = '{},{},{},top1={:.5f},top5={:.5f},loss={:.7f}'.format(cfg.network_type, init_hdf5 or cfg.init_weights, cfg.dataset_subset, val_top1_value, val_top5_value, val_loss_value) log_important(msg, OVERALL_LOG_FILE)
def initialize(args): # Create folders args.cache_dir = os.path.join(tempfile.gettempdir(),str(args.local_rank)) os.makedirs(args.cache_dir,exist_ok = True) if args.local_rank == -1 or (args.is_distributed and args.global_rank == 0): os.makedirs(args.output_dir, exist_ok=True) # Create datasets train_df = pd.read_pickle(os.path.join(args.input_dir,args.train_file)) if args.sample_limit: train_df = train_df.iloc[:args.sample_limit] val_df = pd.read_pickle(os.path.join(args.input_dir,args.val_file)) train_dataset = create_dataset(train_df,args.cache_dir, args.bert_type) val_dataset = create_dataset(val_df,args.cache_dir, args.bert_type) #Create model model = MODEL_CLASSES[args.model_type].from_pretrained(args.bert_type, cache_dir = args.cache_dir) if type(model) is BertForPronounResolution_Segment: model.post_init() logger.info(f'Model used = {type(model)}') model = model.to(args.device) optimizer = torch.optim.Adam(model.parameters(),lr = args.lr) #change it to AdamW later #fp16 AMP changes if args.fp16: from apex import amp # This needs to be done before wrapping with DDP or horovod. torch.cuda.set_device(args.device) #not sure if it's required amp.initialize(model,optimizer,args.amp_opt_level) if args.isaml: from azureml.core import Run args.run = Run.get_context() return model, optimizer, train_dataset, val_dataset
def test_net(network, model, args): """Define the evaluation method.""" print("============== Starting Testing ==============") # load the saved model for evaluation param_dict = load_checkpoint("checkpoint_lenet-1_1875.ckpt") # load parameter to the network load_param_into_net(network, param_dict) # load testing dataset ds_eval = create_dataset(os.path.join(args.data_dir, "test")) acc = model.eval(ds_eval, dataset_sink_mode=False) print("============== Accuracy:{} ==============".format(acc))
def main(): config = setup_argument_parser() log.info("Starting...") log.info("Model will train with following parameters:") log.info(config) #Create the dataset: if not config.crossvalidation: train_ds, val_ds = create_dataset(config) model = inception.create_model(config, train_ds, val_ds) else: # Crossvalidation comes down to using a different dataset generation system cv.evaluate(config)
def create_testing_dataset(): """Create the dataset of testing... Returns: A dictionary with the variables: dataset: The dataset for training. dataset_resize_op: The resize operation to increment the number of registers to take each training. """ complete_dataset = create_dataset( "./datasets/input/0_1_test.txt") batched_dataset = complete_dataset.batch(1) return batched_dataset
def ensure_datasets(batch_size, train_params, test_params): """ Subroutine to ensure that train and test datasets exist """ if not os.path.isdir('instances/train'): print('Creating {} Train instances'.format(train_params['samples']), flush=True) create_dataset( 'instances/train', train_params['n_min'], train_params['n_max'], conn_min=train_params['conn_min'], conn_max=train_params['conn_max'], samples=train_params['samples'], distances=train_params['distances']) #end if not os.path.isdir('instances/test'): print('Creating {} Test instances'.format(test_params['samples']), flush=True) create_dataset( 'instances/test', test_params['n_min'], test_params['n_max'], conn_min=test_params['conn_min'], conn_max=test_params['conn_max'], samples=test_params['samples'], distances=test_params['distances'])
def train_keras(): """ Distributed strategy with Keras API """ epochs = 2 strategy = tf.distribute.MirroredStrategy() global_batch_size = strategy.num_replicas_in_sync * 32 train_dataset = create_dataset(global_batch_size) with strategy.scope(): model = ResNet50(input_shape=(224, 224, 3), num_classes=1000) model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.fit(train_dataset, epochs=epochs)
def predict_salespeed(itemid): """ itemid: item for which we want to predict sale speed regr: regression mode to be trained """ svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) # svr_lin = SVR(kernel='linear', C=1e3) # svr_poly = SVR(kernel='poly', C=1e3, degree=2) # regr = linear_model.LinearRegression() regr = svr_rbf item = get_item(itemid) print "Creating dataset of similar items (same category and condition)" df = create_dataset(item, reduced=True) print "Extracting numeric features" X, y, x = extract_features(df, itemid) print "Splitting in train and test sets" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) print "Training regression model" # Create linear regression object # Train the model using the training sets regr.fit(X_train, y_train) # The mean square error print "\nModel evaluation" print "Residual sum of squares: %.2f" % np.mean((regr.predict(X_test) - y_test) ** 2) # Explained variance score: 1 is perfect prediction print 'Variance score: %.2f' % regr.score(X_test, y_test) sale_speed = regr.predict(x) print "\nPredicted sale speed %.1f items per day" % sale_speed return {"predicted_sale_speed": round(sale_speed[0])}
def upsert_dataset(remote, dataset): if get_package(remote, dataset['name']): new_pkg = update_dataset(remote, dataset) else: new_pkg = create_dataset(remote, dataset) return new_pkg