def sub_2(cfg, model, weight_path): utils.load_model(weight_path, model) result_all = {} model = model.eval() dataset_sub = RsnaDatasetTest2() dataloader = DataLoader(dataset_sub, batch_size=1, shuffle=False, num_workers=3, collate_fn=lambda x: x) for (item) in tqdm(dataloader): imgs, study_id, sop_arr = item[0] _bs = 128 preds = [] for i in np.arange(0, len(sop_arr), step=_bs): _imgs = torch.from_numpy(imgs[i:i + _bs]).cuda() with torch.no_grad(): outputs = model(_imgs) res = torch.sigmoid(outputs["pe_present_on_image"]).cpu().numpy() preds.extend(res) result_all[study_id] = { "outputs": np.array(preds), "ids": np.array(sop_arr), } if args.debug: break print("per study result's keys(): ", result_all[study_id].keys()) # import pdb; pdb.set_trace() return result_all
def main(): args = parser.parse_args() args, logging, writer = utils.parse_args(args) logging.info('# Start Re-training #') criterion = LOSS_FACTORY[args.task](args, args.loss_scaling) if args.model_type == "pointwise": model_temp = POINTWISE_FACTORY[args.model] else: raise NotImplementedError("Other models have not been implemented!") model = POINTWISE_FACTORY[args.model](args.input_size, args.output_size, args.layers, args.activation, args) logging.info('## Model created: ##') logging.info(model.__repr__()) logging.info("### Param size = %f MB, Total number of params = %d ###" % utils.count_parameters_in_MB(model, args)) logging.info('### Loading model to parallel GPUs ###') utils.profile(model, args, logging) model = utils.model_to_gpus(model, args) logging.info('### Preparing schedulers and optimizers ###') optimizer = torch.optim.Adam( model.parameters(), args.learning_rate, weight_decay = args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs) logging.info('## Downloading and preparing data ##') train_loader, valid_loader= get_train_loaders(args) logging.info('## Beginning Training ##') train = Trainer(model, criterion, optimizer, scheduler, args) best_error, train_time, val_time = train.train_loop( train_loader, valid_loader, logging, writer) logging.info('## Finished training, the best observed validation error: {}, total training time: {}, total validation time: {} ##'.format( best_error, timedelta(seconds=train_time), timedelta(seconds=val_time))) logging.info('## Beginning Plotting ##') del model with torch.no_grad(): model = model_temp(args.input_size, args.output_size, args.layers, args.activation, args) utils.load_model(model, args.save+"/weights.pt") model = utils.model_to_gpus(model, args) model.eval() plot_regression_uncertainty(model, PLT, train_loader, args) logging.info('# Finished #')
def _load_ensemble(self, input_size, output_size, layers, activation): sample_names = [] for root, dirs, files in os.walk(self.args.save): for filename in files: if ".pt" in filename: sample_name = re.findall('weights_[0-9]*.pt', filename) if len(sample_name) >= 1: sample_name = sample_name[0] sample_names.append(sample_name) sample_names.sort(key=natural_keys) sample_names = sample_names[:self.args.samples] for i in range(self.args.samples): model = _LinearNetwork(input_size, output_size, layers, activation, self.args) load_model(model, self.args.model_path + "/" + sample_names[i]) self.ensemble.append(model)
def get_model_paths(noise_type, noise_level, exp_num, init_index, start_epoch=0, end_epoch=None, step=1, points=None, root_dir="../results"): """Return model paths of specifed experiment""" model_dict = {noise_type: {noise_level: {exp_num: {init_index: {}}}}} result_path = root_dir models = load_model(model_dict, result_path) model_paths = models[noise_type][noise_level][exp_num][init_index] last_epoch = int( model_paths[-1].split("/")[-1].split(".")[0].split("_")[-1]) if points != None: step = int(last_epoch / points) if step == 0: step = 1 if end_epoch != None: model_paths = models[noise_type][noise_level][exp_num][init_index][ start_epoch:end_epoch:step] else: model_paths = models[noise_type][noise_level][exp_num][init_index][ start_epoch::step] return model_paths
def train_model(dataloaders, device, model, criterion, optimizer, state_path, model_name, scheduler=None, num_epochs=25, continue_train=True, arcface=False): def create_thread(q, dataloader): tr = Thread(target=insertData, args=(q, dataloader)) # start inserting tr.setDaemon(True) tr.start() if continue_train and os.path.exists(state_path): with open(state_path, 'rb') as f: state_dict = pickle.load(f) print(state_dict) train_loss = state_dict['loss'] val_loss = state_dict['val_losses'] accuracy = state_dict['accuracy'] start = state_dict['epoch'] model = load_model(model, model_name, start) start += 1 scheduler.load_state_dict( torch.load(os.path.join(f'models/{model_name}.scheduler'))) print(scheduler.state_dict()) else: train_loss, val_loss, accuracy = [], [], [] start = 0 for epoch in tqdm(range(start, num_epochs)): train_loss.append( train_step(dataloaders, device, model, criterion, optimizer, arcface).cpu()) if scheduler is not None: scheduler.step() cur_val_loss, cur_acc = eval_step(dataloaders, device, model, criterion, arcface) val_loss.append(cur_val_loss.cpu()) accuracy.append(cur_acc) print(f'Accuracy is {cur_acc}') with open(state_path, 'wb') as f: pickle.dump( { 'loss': train_loss, 'val_losses': val_loss, 'epoch': epoch, 'accuracy': accuracy }, f) torch.save(model.state_dict(), os.path.join(f'models/{model_name}{epoch}.data')) if scheduler is not None: torch.save(scheduler.state_dict(), os.path.join(f'models/{model_name}.scheduler')) return train_loss, val_loss
def test_load(): train, dev, test, word_idx = read_sst( u"C:\\Users\\roger\\NLP\\Corpus\\sst_bi\\sst.bi.train", u"C:\\Users\\roger\\NLP\\Corpus\\sst_bi\\sst.bi.dev", u"C:\\Users\\roger\\NLP\\Corpus\\sst_bi\\sst.bi.test", ) classifier = load_model("cnn_model") acc, pred = classifier.test(test[0], test[1]) print acc return pred
def sub(cfg, model, weight_path): utils.load_model(weight_path, model) model = model.eval() dataset_sub = RsnaDatasetTest() df_test = pd.read_csv(DATADIR / "test.csv") result_all = {} for study in tqdm(df_test.StudyInstanceUID.unique()): dataset_sub.set_StudyInstanceUID(study) loader_sub = DataLoader(dataset_sub, batch_size=32, shuffle=False, pin_memory=True, num_workers=6) with torch.no_grad(): result = run_nn(cfg, 'test', model, loader_sub) result_all[study] = result print("per study result's keys(): ", result_all[study].keys()) return result_all
def main(result, *masks): models = reduce(lambda x, y: x + y, map(glob, masks)) weights = [get_weights(x) for x in tqdm(models, desc='loading')] model = load_model(models[0]) for i, _ in enumerate(tqdm(model.layers, desc='averaging')): w = [x[i] for x in weights] w = average(w) model.layers[i].set_weights(w) model.save(result)
def valid(_cfg, model, all_exam=False): cfg = copy.deepcopy(_cfg) if all_exam: cfg["dataset"]["param"][ "posexam_only"] = False # validation for all slices assert cfg["output"] assert not os.path.exists(cfg["output"]) criterion = factory.get_criterion(cfg) path = os.path.join(output_dir, 'fold%d_ep0.pt' % (cfg['fold'])) print(f'best path: {str(path)}') utils.load_model(str(path), model) loader_valid = factory.get_loader_valid(cfg) with torch.no_grad(): results = run_nn(cfg, 'valid', model, loader_valid, criterion=criterion) utils.save_pickle(results, cfg["output"]) log('saved to %s' % cfg["output"])
def sub_3(cfg, model, weight_path): """ Returns: result_all["study_id"] -> { "outputs" -> {"col_name1" -> np.ndarray, "col_name2 -> np.ndarray} "ids -> sop_id_arr } """ utils.load_model(weight_path, model) result_all = {} model = model.eval() dataset_sub = RsnaDatasetTest2() dataloader = DataLoader(dataset_sub, batch_size=1, shuffle=False, num_workers=1, collate_fn=lambda x: x) for (item) in tqdm(dataloader): imgs, study_id, sop_arr = item[0] _bs = 64 outputs_all = defaultdict(list) for i in np.arange(0, len(sop_arr), step=_bs): _imgs = torch.from_numpy(imgs[i:i + _bs]).cuda() with torch.no_grad(): outputs = model(_imgs) for _k in outputs.keys(): # iter over output keys: outputs_all[_k].extend( torch.sigmoid(outputs[_k]).cpu().numpy() ) # currently all output is binarty logit result_all[study_id] = { "outputs": dict([(k, np.array(v)) for k, v in outputs_all.items()]), "ids": np.array(sop_arr), } if args.debug: break print("per study result's keys(): ", result_all[study_id].keys()) return result_all
def __init__(self, config): """ Initialize this classifier with the given configuration Input: config: a YAML node with the classifier configuration """ self.__clf_name = config["classifier"] clf_config = config[self.__clf_name] self.__pickle = clf_config["pickle"] self.__clf = utils.load_model(self.__pickle) if self.__clf is not None: return if self.__clf_name == "adaboost": self.__clf = self.__init_adaboost(clf_config)
def test_save_and_load_checkpoint(self): model = torchvision.models.resnet18(pretrained=False) utils.save_checkpoint(model, epoch=100, filename='tmp.pth', save_arch=True) loaded_model = utils.load_model('tmp.pth') torch.testing.assert_allclose(model.conv1.weight, loaded_model.conv1.weight) model.conv1.weight = nn.Parameter(torch.zeros_like(model.conv1.weight)) model = utils.load_checkpoint('tmp.pth', model=model)['model'] assert (model.conv1.weight != 0).any()
def test_all_models(test_one_model, model_dict, model_filenames, data_loader, administrator): for i, filename in enumerate(model_filenames): logging.info("\n") model, _ = load_model(model_dict, filename) logging.info("Loaded {}. Now testing".format(filename)) administrator.set_model(model) t_valid = time.time() logdict = test_one_model(model, data_loader) logdict['model'] = filename logging.info("Testing took {:.1f} seconds".format(time.time() - t_valid)) #t_log = time.time() administrator.log(**logdict)
def predict_export(params): predictions = {} error_class = [] for class_name in ['arousal', 'valence', 'topic']: # if class_name in ['arousal', 'valence']: class_no = 3 else: class_no = 10 trained_model_path = os.path.join( 'experiments/pretrained_model/', class_name, args.experiment_name + '_' + data_file_name()) try: model = load_model(path=trained_model_path, name=args.experiment_name) loader, metadata = load_data_pipeline(params, class_name) predictions['prediction_' + class_name] = predict( params, model, loader, args.predict_partition, class_name) torch.cuda.empty_cache() except FileNotFoundError as fnfe: print("Model not found: " + str(fnfe)) print("[WARN!] Set all prediction values for this model to 0") error_class.append(class_name) continue predictions['id'] = metadata['id'] #[args.predict_partition] predictions['segment_id'] = metadata['segment_id'] df = pd.DataFrame.from_dict(predictions) # , orient='index' .T if len(error_class) > 0: for ec in error_class: df['prediction_' + ec] = np.nan df = df.fillna(0) header_names = [ 'id', 'segment_id', 'prediction_arousal', 'prediction_valence', 'prediction_topic' ] predict_partition = args.predict_partition.replace('valid', 'devel') df[header_names].to_csv(output_path + predict_partition + '.csv', header=header_names, index=False)
def main(): argparser = argparse.ArgumentParser(description=__doc__) argparser.add_argument('--gpuid',default='0,') argparser.add_argument('--arch', default='fnn') argparser.add_argument('--loss',default='mse') argparser.add_argument('--dataset',default='fashionmnist') argparser.add_argument('--load_size', type=int,default=1000) argparser.add_argument('--num_wrong_samples',type=int,default=0) argparser.add_argument('--num_clean_samples',type=int,default=1000) argparser.add_argument('--batch_size', type=int,default=1000) argparser.add_argument('--nclasses', type=int,default=2) argparser.add_argument('--task', default='loss') argparser.add_argument('--dir', default='') argparser.add_argument('--save_dir', default='') args = argparser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"]=args.gpuid print(args) if args.loss == 'mse': ct = torch.nn.MSELoss().cuda() one_hot = True elif args.loss == 'hinge': ct = torch.nn.MultiMarginLoss(p=2) one_hot = False elif args.loss == 'cross_entropy': ct = torch.nn.CrossEntropyLoss() one_hot = False trDL,teDL = load_data(args, stop=True, one_hot=one_hot) net = load_model(args.dataset, args.arch) if args.task == 'loss': compute_loss(net, ct, trDL, teDL,args) elif args.task == 'nonuniformity': compute_diversity(net,ct,trDL,args) elif args.task == 'weight_norm': compute_norm(net,args) elif args.task == 'sharpness': compute_sharpness(net, ct, trDL, args) else: raise ValueError('Task %s has not been implemented'%(args.task))
def builtin_train(args): # 1. load dataset and model (train_images, train_labels), (test_images, test_labels) = load_dataset(args.data) input_shape = train_images[:args.batch_size, :, :, :].shape output_size = max(train_labels) + 1 model = load_model(args.arch, input_shape=input_shape, output_size=output_size) model.summary() # 2. set tensorboard cofigs logdir = os.path.join(args.logdir, get_current_time()) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir) # 3. loss, optimizer, metrics setting model.compile( optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"], ) # 4. dataset config buffer_size = len(train_images) train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels)) train_ds = train_ds.shuffle(buffer_size) if args.augmentation: train_ds = train_ds.map(augment) train_ds = train_ds.batch(args.batch_size) test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)) test_ds = test_ds.batch(args.batch_size) fit_params = {} fit_params["batch_size"] = args.batch_size fit_params["epochs"] = args.max_epoch if args.steps_per_epoch: fit_params["steps_per_epoch"] = args.steps_per_epoch fit_params["verbose"] = 1 fit_params["shuffle"] = True fit_params["callbacks"] = [tensorboard_callback] fit_params["validation_data"] = test_ds # 5. start train and test model.fit(train_ds, **fit_params)
def main(_run): args = argparse.Namespace(**_run.config) args.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") # Load test data test_loader = load_data(args) ex.info["test_size"] = len(test_loader.dataset) # Load model model_bold = load_model_bold(args) model = load_model(args, model_bold) model = model.to(args.device) model.eval() # Define a Loss function and optimizer criterion = nn.CrossEntropyLoss() correct = 0 total = 0 running_loss = 0. with torch.no_grad(): for _, data in enumerate(test_loader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data[0].to(args.device), data[1].to(args.device) # predict labels outputs = model(inputs) loss = criterion(outputs, labels) running_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = correct / total loss = running_loss / len(test_loader) ex.info["accuracy"] = accuracy ex.info["loss"] = loss print("Testing the network on the {} test images".format( ex.info["test_size"])) print("Test Accuracy: {}".format(accuracy)) print("Test Loss: {}".format(loss))
def train_model(dataloaders, device, model, criterion, optimizer, state_path, model_name, scheduler=None, num_epochs=25, continue_train=False): if continue_train and os.path.exists(state_path): with open(state_path, 'rb') as f: state_dict = pickle.load(f) print(state_dict) train_loss = state_dict['loss'] val_loss = state_dict['val_losses'] accuracy = state_dict['accuracy'] start = state_dict['epoch'] model = load_model(model, model_name, start) start += 1 else: train_loss, val_loss, accuracy = [], [], [] start = 0 for epoch in tqdm(range(start, num_epochs)): train_loss.append( train_step(dataloaders, device, model, criterion, optimizer).cpu()) cur_val_loss, cur_acc = eval_step(dataloaders, device, model) val_loss.append(cur_val_loss.cpu()) accuracy.append(cur_acc) print(f'Accuracy is {cur_acc}') with open(state_path, 'wb') as f: pickle.dump( { 'loss': train_loss, 'val_losses': val_loss, 'epoch': epoch, 'accuracy': accuracy }, f) torch.save(model.state_dict(), os.path.join(f'models/{model_name}{epoch}.data')) return train_loss, val_loss
def text_classify_predict_main(model_name, predict_file, output_file=None, seg=True, encoding='utf8', language='english'): sys.stdout.write("Load model ...\n") model = load_model(model_name) sys.stdout.write("Loaded model from %s\n" % model_name) assert type(model) == TextClassifier texts = list() if seg: word_segmentor = ChineseWordSegmentor('ictclas') else: word_segmentor = None sys.stdout.write("Loaded Data from %s ...\n" % predict_file) with open(predict_file) as fin: for line in fin: line = line.decode(encoding=encoding).strip() if seg: token = ' '.join(word_segmentor.segment(line)) else: token = line token = generate_sentence_token(token, max_len=MAX_LEN, remove_stop=REMOVE_STOP, low_case=LOW_CASE, language=language) texts.append(token) sys.stdout.write("Predict Data ...\n") prob_result = [model.predict_text_prob(' '.join(token)) for token in texts] if output_file is None: out = sys.stdout else: sys.stdout.write("Save Result to %s \n" % output_file) out = open(output_file, 'w') for prob in prob_result: write_str = model.prob_to_str(prob[0]) + '\n' out.write(write_str.encode('utf8')) if output_file is not None: out.close()
def main(): args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpuid trDL, teDL = load_data(args, stop=True, one_hot=True) net = load_model(args.dataset, args.arch, width=args.width, depth=args.depth) ct = torch.nn.MSELoss() print('# of parameters', num_parameters(net)) res = scan(net, ct, trDL, teDL, args.model, verbose=True, niters=50, nonuniformity=args.nonuniformity) with open(args.save_res, 'wb') as f: pickle.dump(res, f)
def train(): """Train function.""" args.outputs_dir = params['save_model_path'] if args.group_size > 1: init() context.set_auto_parallel_context( device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) args.outputs_dir = os.path.join(args.outputs_dir, "ckpt_{}/".format(str(get_rank()))) args.rank = get_rank() else: args.outputs_dir = os.path.join(args.outputs_dir, "ckpt_0/") args.rank = 0 if args.group_size > 1: args.max_epoch = params["max_epoch_train_NP"] args.loss_scale = params['loss_scale'] / 2 args.lr_steps = list(map(int, params["lr_steps_NP"].split(','))) params['train_type'] = params['train_type_NP'] params['optimizer'] = params['optimizer_NP'] params['group_params'] = params['group_params_NP'] else: args.max_epoch = params["max_epoch_train"] args.loss_scale = params['loss_scale'] args.lr_steps = list(map(int, params["lr_steps"].split(','))) # create network print('start create network') criterion = openpose_loss() criterion.add_flags_recursive(fp32=True) network = OpenPoseNet(vggpath=params['vgg_path'], vgg_with_bn=params['vgg_with_bn']) if params["load_pretrain"]: print("load pretrain model:", params["pretrained_model_path"]) load_model(network, params["pretrained_model_path"]) train_net = BuildTrainNetwork(network, criterion) # create dataset if os.path.exists(args.jsonpath_train) and os.path.exists(args.imgpath_train) \ and os.path.exists(args.maskpath_train): print('start create dataset') else: print('Error: wrong data path') return 0 num_worker = 20 if args.group_size > 1 else 48 de_dataset_train = create_dataset(args.jsonpath_train, args.imgpath_train, args.maskpath_train, batch_size=params['batch_size'], rank=args.rank, group_size=args.group_size, num_worker=num_worker, multiprocessing=True, shuffle=True, repeat_num=1) steps_per_epoch = de_dataset_train.get_dataset_size() print("steps_per_epoch: ", steps_per_epoch) # lr scheduler lr_stage, lr_base, lr_vgg = get_lr(params['lr'] * args.group_size, params['lr_gamma'], steps_per_epoch, args.max_epoch, args.lr_steps, args.group_size, lr_type=params['lr_type'], warmup_epoch=params['warmup_epoch']) # optimizer if params['group_params']: vgg19_base_params = list( filter(lambda x: 'base.vgg_base' in x.name, train_net.trainable_params())) base_params = list( filter(lambda x: 'base.conv' in x.name, train_net.trainable_params())) stages_params = list( filter(lambda x: 'base' not in x.name, train_net.trainable_params())) group_params = [{ 'params': vgg19_base_params, 'lr': lr_vgg }, { 'params': base_params, 'lr': lr_base }, { 'params': stages_params, 'lr': lr_stage }] if params['optimizer'] == "Momentum": opt = Momentum(group_params, learning_rate=lr_stage, momentum=0.9) elif params['optimizer'] == "Adam": opt = Adam(group_params) else: raise ValueError("optimizer not support.") else: if params['optimizer'] == "Momentum": opt = Momentum(train_net.trainable_params(), learning_rate=lr_stage, momentum=0.9) elif params['optimizer'] == "Adam": opt = Adam(train_net.trainable_params(), learning_rate=lr_stage) else: raise ValueError("optimizer not support.") # callback config_ck = CheckpointConfig( save_checkpoint_steps=params['ckpt_interval'], keep_checkpoint_max=params["keep_checkpoint_max"]) ckpoint_cb = ModelCheckpoint(prefix='{}'.format(args.rank), directory=args.outputs_dir, config=config_ck) time_cb = TimeMonitor(data_size=de_dataset_train.get_dataset_size()) if args.rank == 0: callback_list = [MyLossMonitor(), time_cb, ckpoint_cb] else: callback_list = [MyLossMonitor(), time_cb] # train if params['train_type'] == 'clip_grad': train_net = TrainOneStepWithClipGradientCell(train_net, opt, sens=args.loss_scale) train_net.set_train() model = Model(train_net) elif params['train_type'] == 'fix_loss_scale': loss_scale_manager = FixedLossScaleManager(args.loss_scale, drop_overflow_update=False) train_net.set_train() model = Model(train_net, optimizer=opt, loss_scale_manager=loss_scale_manager) else: raise ValueError("Type {} is not support.".format( params['train_type'])) print("============== Starting Training ==============") model.train(args.max_epoch, de_dataset_train, callbacks=callback_list, dataset_sink_mode=False) return 0
######################################################################### # Prepare model ######################################################################### student_config = BertConfig(os.path.join(args.bert_model, 'bert_config.json')) if args.kd_model.lower() in ['kd', 'kd.cls']: logger.info('using normal Knowledge Distillation') output_all_layers = args.kd_model.lower() == 'kd.cls' student_encoder, student_classifier = init_model( task_name, output_all_layers, args.student_hidden_layers, student_config) n_student_layer = len(student_encoder.bert.encoder.layer) student_encoder = load_model(student_encoder, args.encoder_checkpoint, args, 'student', verbose=True) logger.info('*' * 77) student_classifier = load_model(student_classifier, args.cls_checkpoint, args, 'classifier', verbose=True) elif args.kd_model.lower() == 'kd.full': logger.info('using FULL Knowledge Distillation') layer_idx = [int(i) for i in args.fc_layer_idx.split(',')] num_fc_layer = len(layer_idx) if args.weights is None or args.weights.lower() in ['none']: weights = np.array([1] * (num_fc_layer - 1) + [num_fc_layer - 1]) / 2 / (num_fc_layer - 1)
def initialize(self): self.net = utils.load_model()
punctuation = punctuation_reverse_map[punctuation_index] if punctuation == " ": output_file.write("%s%s" % (punctuation, word)) else: if write_readable_text: output_file.write("%s %s" % (punctuation[:1], word)) else: output_file.write(" %s %s" % (punctuation, word)) if __name__ == "__main__": if len(sys.argv) > 4: model_name = sys.argv[1] net = utils.load_model(model_name) net.batch_size = 1 net.reset_state() punctuation_reverse_map = utils.get_reverse_map(net.out_vocabulary) write_readable_text = bool(int(sys.argv[2])) text_has_pause_duration_tags = bool(int(sys.argv[3])) output_file_path = sys.argv[4] if len(sys.argv) > 5: with open(sys.argv[5], 'r') as unpunctuated_file: unpunctuated_text = " ".join(unpunctuated_file.readlines()) else: unpunctuated_text = " ".join(sys.stdin.readlines())
batch_size=args.batch_size, shuffle=False, num_workers=4) for data, label in te_loader: data, label = tensor2cuda(data), tensor2cuda(label) break adv_list = [] pred_list = [] with torch.no_grad(): model = WideResNet(depth=34, num_classes=10, widen_factor=10, dropRate=0.0) load_model(model, args.load_checkpoint) if torch.cuda.is_available(): model.cuda() attack = FastGradientSignUntargeted(model, max_epsilon, args.alpha, min_val=0, max_val=1, max_iters=args.k, _type=perturbation_type) adv_data = attack.perturb(data, label, 'mean', False) output = model(adv_data, _eval=True)
elif args.loss == 'hinge': ct = torch.nn.MultiMarginLoss(p=2) one_hot = False elif args.loss == 'cross_entropy': print('Loss') ct = torch.nn.CrossEntropyLoss() one_hot = False # Load data and model for lr,n_iters in zip(args.lr,args.n_iters): for i in range(args.n_tries): print('==== Start of %d-th Experiment ==='%(i+1)) trDL,teDL = load_data(args,one_hot=one_hot) net = load_model(args.dataset,args.arch) #net.apply(lambda t: weights_init(t,args.gain,args.init)) optimizer = torch.optim.LBFGS(net.parameters(),lr = lr) trainer = Trainer(iter_display = args.iter_display) trainer.set_model(net,ct,optimizer,scheduler=None) res=trainer.train_sgd( trDL, batch_size = args.batch_size, iter_start = 1, iter_end = n_iters, tol = args.tol ) trDL.reset() trL,trA,trC = eval_accuracy(net,ct,trDL)
with open(TEST_LIST_PATH, "a") as test_list_file: [noise_type, noise_level, hyperparam_index], init_index = keys, key test_list_file.write( "{dict_index} {noise_type} {noise_level} {hyperparam_index} {init_index}\n" .format(dict_index=dict_index, noise_type=noise_type, noise_level=noise_level, hyperparam_index=hyperparam_index, init_index=init_index)) else: raise ValueError( "The dictionary provided to the write_final_epoch_path_to_file function was not in the correct format." ) if __name__ == "__main__": root_dir = '../results/mnist' experiment_dicts = get_experiment_dicts(root_dir) paths_per_experiment_dict = [] for experiment_dict in experiment_dicts: model_paths = load_model(experiment_dict, path_to_results=root_dir) paths_per_experiment_dict.append(model_paths) os.makedirs(TEST_LIST_DIRECTORY, exist_ok=True) open(TEST_LIST_PATH, "w").close() np.savez_compressed(TEST_DICT_PATH, data=paths_per_experiment_dict) for index, dictionary in enumerate(paths_per_experiment_dict): write_details_to_file(dictionary, dict_index=index)
def train(cfg, model): criterion = factory.get_criterion(cfg) # optim = torch.optim.Adam(model.parameters(), lr=1e-3) optim = factory.get_optimizer(cfg, model.parameters()) best = { 'loss': float('inf'), 'score': 0.0, 'epoch': -1, } if "resume_from" in cfg.keys() and cfg["resume_from"]: detail = utils.load_model(cfg["resume_from"], model, optim=optim) best.update({ 'loss': detail['loss'], 'score': detail['score'], 'epoch': detail['epoch'], }) # to set lr manually after resumed for param_group in optim.param_groups: param_group['lr'] = cfg["optimizer"]["param"]["lr"] log(f"initial lr {utils.get_lr(optim)}") scheduler, is_reduce_lr = factory.get_scheduler(cfg, optim) log(f"is_reduce_lr: {is_reduce_lr}") loader_train = factory.get_loader_train(cfg) loader_valid = factory.get_loader_valid(cfg) log('train data: loaded %d records' % len(loader_train.dataset)) log('valid data: loaded %d records' % len(loader_valid.dataset)) log('apex %s' % cfg["apex"]) if cfg["apex"]: amp.initialize(model, optim, opt_level='O1') for epoch in range(best['epoch'] + 1, cfg["epoch"]): log(f'\n----- epoch {epoch} -----') run_nn(cfg, 'train', model, loader_train, criterion=criterion, optim=optim, apex=cfg["apex"]) with torch.no_grad(): val = run_nn(cfg, 'valid', model, loader_valid, criterion=criterion) detail = { 'score': val['score'], 'loss': val['loss'], 'epoch': epoch, } if val['loss'] <= best['loss']: best.update(detail) utils.save_model(model, optim, detail, cfg["fold"], output_dir, best=True) utils.save_model(model, optim, detail, cfg["fold"], output_dir) log('[best] ep:%d loss:%.4f score:%.4f' % (best['epoch'], best['loss'], best['score'])) if is_reduce_lr: scheduler.step(val['loss']) # reducelronplateau else: scheduler.step()
help='name of videoplayer predictions file.') parser.add_argument('-csv', '--csvfile', default='', help='name of csv predictions file.') parser.add_argument('-trf', '--trfile', default='', help='name of tracker predictions file.') args = parser.parse_args() assert (args.vpfile or args.csvfile or args.trfile) assert (args.offset >= 1) states_df = data.get_statewise_data() model, cp = utils.load_model(args.experimentid, args.checkpoint, v=False) prediction_date = (states_df.date.max().to_pydatetime() - dt.timedelta(days=args.offset)).strftime("%Y-%m-%d") print("Predicting for:", prediction_date) api = predictions.generate(states_df, constants.STT_INFO, model, cp, args.taskidx, args.days, args.offset, plot=False) if args.vpfile: predictions.export_videoplayer(api, prediction_date, args.vpfile)
def train_mlt_single(args): global logger logger.info(args) task_lst, vocabs = utils.get_data(args.data_path) task_db = task_lst[args.task_id] train_data = task_db.train_set dev_data = task_db.dev_set test_data = task_db.test_set task_name = task_db.task_name if args.debug: train_data = train_data[:200] dev_data = dev_data[:200] test_data = test_data[:200] args.epochs = 3 args.pruning_iter = 3 summary_writer = SummaryWriter( log_dir=os.path.join(args.tb_path, "global/%s" % task_name) ) logger.info("task name: {}, task id: {}".format(task_db.task_name, task_db.task_id)) logger.info( "train len {}, dev len {}, test len {}".format( len(train_data), len(dev_data), len(test_data) ) ) # init model model = get_model(args, task_lst, vocabs) logger.info("model: \n{}".format(model)) if args.init_weights is not None: utils.load_model(model, args.init_weights) if utils.need_acc(task_name): metrics = [AccuracyMetric(target="y"), MetricInForward(val_name="loss")] metric_key = "acc" else: metrics = [ YangJieSpanMetric( tag_vocab=vocabs[task_name], pred="pred", target="y", seq_len="seq_len", encoding_type="bioes" if task_name == "ner" else "bio", ), MetricInForward(val_name="loss"), ] metric_key = "f" logger.info(metrics) need_cut_names = list(set([s.strip() for s in args.need_cut.split(",")])) prune_names = [] for name, p in model.named_parameters(): if not p.requires_grad or "bias" in name: continue for n in need_cut_names: if n in name: prune_names.append(name) break # get Pruning class pruner = Pruning( model, prune_names, final_rate=args.final_rate, pruning_iter=args.pruning_iter ) if args.init_masks is not None: pruner.load(args.init_masks) pruner.apply_mask(pruner.remain_mask, pruner._model) # save checkpoint os.makedirs(args.save_path, exist_ok=True) logger.info('Saving init-weights to {}'.format(args.save_path)) torch.save( model.cpu().state_dict(), os.path.join(args.save_path, "init_weights.th") ) torch.save(args, os.path.join(args.save_path, "args.th")) # start training and pruning summary_writer.add_scalar("remain_rate", 100.0, 0) summary_writer.add_scalar("cutoff", 0.0, 0) if args.init_weights is not None: init_tester = Tester( test_data, model, metrics=metrics, batch_size=args.batch_size, num_workers=4, device="cuda", use_tqdm=False, ) res = init_tester.test() logger.info("No init testing, Result: {}".format(res)) del res, init_tester for prune_step in range(pruner.pruning_iter + 1): # reset optimizer every time optim_params = [p for p in model.parameters() if p.requires_grad] # utils.get_logger(__name__).debug(optim_params) utils.get_logger(__name__).debug(len(optim_params)) optimizer = get_optim(args.optim, optim_params) # optimizer = TriOptim(optimizer, args.n_filters, args.warmup, args.decay) factor = pruner.cur_rate / 100.0 factor = 1.0 # print(factor, pruner.cur_rate) for pg in optimizer.param_groups: pg["lr"] = factor * pg["lr"] utils.get_logger(__name__).info(optimizer) trainer = Trainer( train_data, model, loss=LossInForward(), optimizer=optimizer, metric_key=metric_key, metrics=metrics, print_every=200, batch_size=args.batch_size, num_workers=4, n_epochs=args.epochs, dev_data=dev_data, save_path=None, sampler=fastNLP.BucketSampler(batch_size=args.batch_size), callbacks=[ pruner, # LRStep(lstm.WarmupLinearSchedule(optimizer, args.warmup, int(len(train_data)/args.batch_size*args.epochs))) GradientClipCallback(clip_type="norm", clip_value=5), LRScheduler( lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep)) ), LogCallback(path=os.path.join(args.tb_path, "No", str(prune_step))), ], use_tqdm=False, device="cuda", check_code_level=-1, ) res = trainer.train() logger.info("No #{} training, Result: {}".format(pruner.prune_times, res)) name, val = get_metric(res) summary_writer.add_scalar("prunning_dev_acc", val, prune_step) tester = Tester( test_data, model, metrics=metrics, batch_size=args.batch_size, num_workers=4, device="cuda", use_tqdm=False, ) res = tester.test() logger.info("No #{} testing, Result: {}".format(pruner.prune_times, res)) name, val = get_metric(res) summary_writer.add_scalar("pruning_test_acc", val, prune_step) # prune and save torch.save( model.state_dict(), os.path.join( args.save_path, "best_{}_{}.th".format(pruner.prune_times, pruner.cur_rate), ), ) pruner.pruning_model() summary_writer.add_scalar("remain_rate", pruner.cur_rate, prune_step + 1) summary_writer.add_scalar("cutoff", pruner.last_cutoff, prune_step + 1) pruner.save( os.path.join( args.save_path, "{}_{}.th".format(pruner.prune_times, pruner.cur_rate) ) )
else: args.eval_batch_size = 32 args.raw_data_dir = os.path.join(HOME_DATA_FOLDER, 'data_raw', task) run_folder = os.path.join(KD_DIR, task, sub_dir, run_folder) encoder_file = glob.glob(run_folder + '/*e.%d.encoder.pkl' % epoch) cls_file = glob.glob(run_folder + '/*e.%d.cls.pkl' % epoch) assert len(encoder_file) == 1 and len( cls_file) == 1, f'encoder/cls file error: {encoder_file}, {cls_file}' encoder_file, cls_file = encoder_file[0], cls_file[0] encoder_bert, classifier = init_model(task, output_all_layers, n_layer, config) encoder_bert = load_model(encoder_bert, encoder_file, args, 'exact', verbose=True) classifier = load_model(classifier, cls_file, args, 'exact', verbose=True) all_res = {'train': None, 'dev': None, 'test': None} if 'dev' in interested_set or 'valid' in interested_set: dev_examples, dev_dataloader, dev_label_ids = get_task_dataloader( task.lower(), 'dev', tokenizer, args, SequentialSampler, args.eval_batch_size) dev_res = eval_model_dataloader(encoder_bert, classifier, dev_dataloader, args.device, detailed=True, verbose=False)