def main(cmd=None, stdout=True): args = get_args(cmd, stdout) model_id = "seed_{}_strat_{}_noise_fn_{}_noise_fp_{}_num_passes_{}_seed_size_{}_model_{}_batch_size_{}_gamma_{}_label_budget_{}_epochs_{}".format( args.seed, args.strategy, args.noise_fn, args.noise_fp, args.num_passes, args.seed_size, args.model, args.batch_size, args.gamma, args.label_budget, args.epochs) logging.basicConfig( filename="{}/{}.txt".format(args.dout, model_id), format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logger = Experiment(comet_ml_key, project_name="ActiveDialogue") logger.log_parameters(vars(args)) if args.model == "glad": model_arch = GLAD elif args.model == "gce": model_arch = GCE env = PartialEnv(load_dataset, model_arch, args) if args.seed_size: with logger.train(): if not env.load('seed'): logging.info("No loaded seed. Training now.") env.seed_fit(args.seed_epochs, prefix="seed") logging.info("Seed completed.") else: logging.info("Loaded seed.") if args.force_seed: logging.info("Training seed regardless.") env.seed_fit(args.seed_epochs, prefix="seed") env.load('seed') use_strategy = False if args.strategy == "entropy": use_strategy = True strategy = partial_entropy elif args.strategy == "bald": use_strategy = True strategy = partial_bald if use_strategy: if args.threshold_strategy == "fixed": strategy = FixedThresholdStrategy(strategy, args, True) elif args.threshold_strategy == "variable": strategy = VariableThresholdStrategy(strategy, args, True) elif args.threshold_strategy == "randomvariable": strategy = StochasticVariableThresholdStrategy( strategy, args, True) ended = False i = 0 initial_metrics = env.metrics(True) logger.log_current_epoch(i) logging.info("Initial metrics: {}".format(initial_metrics)) for k, v in initial_metrics.items(): logger.log_metric(k, v) with logger.train(): while not ended: i += 1 # Observe environment state logger.log_current_epoch(i) if env.can_label: # Obtain label request from strategy obs, preds = env.observe(20 if args.strategy == "bald" else 1) if args.strategy != "bald": preds = preds[0] if args.strategy == "aggressive": label_request = aggressive(preds) elif args.strategy == "random": label_request = random(preds) elif args.strategy == "passive": label_request = passive(preds) elif use_strategy: label_request = strategy.observe(preds) else: raise ValueError() # Label solicitation labeled = env.label(label_request) if use_strategy: strategy.update( sum([ np.sum(s.flatten()) for s in label_request.values() ]), sum([ np.sum(np.ones_like(s).flatten()) for s in label_request.values() ])) else: break # Environment stepping ended = env.step() # Fit every al_batch of items best = env.fit(prefix=model_id, reset_model=True) for k, v in best.items(): logger.log_metric(k, v) env.load(prefix=model_id) # Final fit final_metrics = env.fit(epochs=args.final_epochs, prefix="final_fit_" + model_id, reset_model=True) for k, v in final_metrics.items(): logger.log_metric("Final " + k, v) logging.info("Final " + k + ": " + str(v)) logging.info("Run finished.")
print(out2.shape) exit()""" criterion = nn.BCELoss() # Establish convention for real and fake labels during training real_label = 1 fake_label = 0 # Setup Adam optimizers for both G and D optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) steps = 0 for epoch in range(num_epochs): experiment.log_current_epoch(epoch) for i, data in enumerate(dataloader, 0): experiment.set_step(steps) ############################ # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ########################### ## Train with all-real batch netD.zero_grad() # Format batch real_cpu = data[0].to(device) b_size = real_cpu.size(0) label = torch.full((b_size,), real_label, device=device) # Forward pass real batch through D output = netD(real_cpu).view(-1) # Calculate loss on all-real batch
def run(args, train, sparse_evidences, claims_dict): BATCH_SIZE = args.batch_size LEARNING_RATE = args.learning_rate DATA_SAMPLING = args.data_sampling NUM_EPOCHS = args.epochs MODEL = args.model RANDOMIZE = args.no_randomize PRINT = args.print use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") logger = Logger('./logs/{}'.format(time.localtime())) if MODEL: print("Loading pretrained model...") model = torch.load(MODEL) model.load_state_dict(torch.load(MODEL).state_dict()) else: model = cdssm.CDSSM() model = model.cuda() model = model.to(device) # model = cdssm.CDSSM() # model = model.cuda() # model = model.to(device) if torch.cuda.device_count() > 0: print("Let's use", torch.cuda.device_count(), "GPU(s)!") model = nn.DataParallel(model) print("Created model with {:,} parameters.".format( putils.count_parameters(model))) # if MODEL: # print("TEMPORARY change to loading!") # model.load_state_dict(torch.load(MODEL).state_dict()) print("Created dataset...") # use an 80/20 train/validate split! train_size = int(len(train) * 0.80) #test = int(len(train) * 0.5) train_dataset = pytorch_data_loader.WikiDataset( train[:train_size], claims_dict, data_sampling=DATA_SAMPLING, sparse_evidences=sparse_evidences, randomize=RANDOMIZE) val_dataset = pytorch_data_loader.WikiDataset( train[train_size:], claims_dict, data_sampling=DATA_SAMPLING, sparse_evidences=sparse_evidences, randomize=RANDOMIZE) train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=0, shuffle=True, collate_fn=pytorch_data_loader.PadCollate()) val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=0, shuffle=True, collate_fn=pytorch_data_loader.PadCollate()) # Loss and optimizer criterion = torch.nn.NLLLoss() # criterion = torch.nn.SoftMarginLoss() # if torch.cuda.device_count() > 0: # print("Let's parallelize the backward pass...") # criterion = DataParallelCriterion(criterion) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-3) OUTPUT_FREQ = max(int((len(train_dataset) / BATCH_SIZE) * 0.02), 20) parameters = { "batch size": BATCH_SIZE, "epochs": NUM_EPOCHS, "learning rate": LEARNING_RATE, "optimizer": optimizer.__class__.__name__, "loss": criterion.__class__.__name__, "training size": train_size, "data sampling rate": DATA_SAMPLING, "data": args.data, "sparse_evidences": args.sparse_evidences, "randomize": RANDOMIZE, "model": MODEL } experiment = Experiment(api_key="YLsW4AvRTYGxzdDqlWRGCOhee", project_name="clsm", workspace="moinnadeem") experiment.add_tag("train") experiment.log_asset("cdssm.py") experiment.log_dataset_info(name=args.data) experiment.log_parameters(parameters) model_checkpoint_dir = "models/saved_model" for key, value in parameters.items(): if type(value) == str: value = value.replace("/", "-") if key != "model": model_checkpoint_dir += "_{}-{}".format(key.replace(" ", "_"), value) print("Training...") beginning_time = time.time() best_loss = torch.tensor(float("inf"), dtype=torch.float) # begin loss at infinity for epoch in range(NUM_EPOCHS): beginning_time = time.time() mean_train_acc = 0.0 train_running_loss = 0.0 train_running_accuracy = 0.0 model.train() experiment.log_current_epoch(epoch) with experiment.train(): for train_batch_num, inputs in enumerate(train_dataloader): claims_tensors, claims_text, evidences_tensors, evidences_text, labels = inputs claims_tensors = claims_tensors.cuda() evidences_tensors = evidences_tensors.cuda() labels = labels.cuda() #claims = claims.to(device).float() #evidences = evidences.to(device).float() #labels = labels.to(device) y_pred = model(claims_tensors, evidences_tensors) y = (labels) # y = y.unsqueeze(0) # y = y.unsqueeze(0) # y_pred = parallel.gather(y_pred, 0) y_pred = y_pred.squeeze() # y = y.squeeze() loss = criterion(y_pred, torch.max(y, 1)[1]) # loss = criterion(y_pred, y) y = y.float() binary_y = torch.max(y, 1)[1] binary_pred = torch.max(y_pred, 1)[1] accuracy = (binary_y == binary_pred).to("cuda") accuracy = accuracy.float() accuracy = accuracy.mean() train_running_accuracy += accuracy.item() mean_train_acc += accuracy.item() train_running_loss += loss.item() if PRINT: for idx in range(len(y)): print( "Claim: {}, Evidence: {}, Prediction: {}, Label: {}" .format(claims_text[0], evidences_text[idx], torch.exp(y_pred[idx]), y[idx])) if (train_batch_num % OUTPUT_FREQ) == 0 and train_batch_num > 0: elapsed_time = time.time() - beginning_time binary_y = torch.max(y, 1)[1] binary_pred = torch.max(y_pred, 1)[1] print( "[{}:{}:{:3f}s] training loss: {}, training accuracy: {}, training recall: {}" .format( epoch, train_batch_num / (len(train_dataset) / BATCH_SIZE), elapsed_time, train_running_loss / OUTPUT_FREQ, train_running_accuracy / OUTPUT_FREQ, recall_score(binary_y.cpu().detach().numpy(), binary_pred.cpu().detach().numpy()))) # 1. Log scalar values (scalar summary) info = { 'train_loss': train_running_loss / OUTPUT_FREQ, 'train_accuracy': train_running_accuracy / OUTPUT_FREQ } for tag, value in info.items(): experiment.log_metric(tag, value, step=train_batch_num * (epoch + 1)) logger.scalar_summary(tag, value, train_batch_num + 1) ## 2. Log values and gradients of the parameters (histogram summary) for tag, value in model.named_parameters(): tag = tag.replace('.', '/') logger.histo_summary(tag, value.detach().cpu().numpy(), train_batch_num + 1) logger.histo_summary(tag + '/grad', value.grad.detach().cpu().numpy(), train_batch_num + 1) train_running_loss = 0.0 beginning_time = time.time() train_running_accuracy = 0.0 optimizer.zero_grad() loss.backward() optimizer.step() # del loss # del accuracy # del claims_tensors # del claims_text # del evidences_tensors # del evidences_text # del labels # del y # del y_pred # torch.cuda.empty_cache() print("Running validation...") model.eval() pred = [] true = [] avg_loss = 0.0 val_running_accuracy = 0.0 val_running_loss = 0.0 beginning_time = time.time() with experiment.validate(): for val_batch_num, val_inputs in enumerate(val_dataloader): claims_tensors, claims_text, evidences_tensors, evidences_text, labels = val_inputs claims_tensors = claims_tensors.cuda() evidences_tensors = evidences_tensors.cuda() labels = labels.cuda() y_pred = model(claims_tensors, evidences_tensors) y = (labels) # y_pred = parallel.gather(y_pred, 0) y_pred = y_pred.squeeze() loss = criterion(y_pred, torch.max(y, 1)[1]) y = y.float() binary_y = torch.max(y, 1)[1] binary_pred = torch.max(y_pred, 1)[1] true.extend(binary_y.tolist()) pred.extend(binary_pred.tolist()) accuracy = (binary_y == binary_pred).to("cuda") accuracy = accuracy.float().mean() val_running_accuracy += accuracy.item() val_running_loss += loss.item() avg_loss += loss.item() if (val_batch_num % OUTPUT_FREQ) == 0 and val_batch_num > 0: elapsed_time = time.time() - beginning_time print( "[{}:{}:{:3f}s] validation loss: {}, accuracy: {}, recall: {}" .format( epoch, val_batch_num / (len(val_dataset) / BATCH_SIZE), elapsed_time, val_running_loss / OUTPUT_FREQ, val_running_accuracy / OUTPUT_FREQ, recall_score(binary_y.cpu().detach().numpy(), binary_pred.cpu().detach().numpy()))) # 1. Log scalar values (scalar summary) info = {'val_accuracy': val_running_accuracy / OUTPUT_FREQ} for tag, value in info.items(): experiment.log_metric(tag, value, step=val_batch_num * (epoch + 1)) logger.scalar_summary(tag, value, val_batch_num + 1) ## 2. Log values and gradients of the parameters (histogram summary) for tag, value in model.named_parameters(): tag = tag.replace('.', '/') logger.histo_summary(tag, value.detach().cpu().numpy(), val_batch_num + 1) logger.histo_summary(tag + '/grad', value.grad.detach().cpu().numpy(), val_batch_num + 1) val_running_accuracy = 0.0 val_running_loss = 0.0 beginning_time = time.time() # del loss # del accuracy # del claims_tensors # del claims_text # del evidences_tensors # del evidences_text # del labels # del y # del y_pred # torch.cuda.empty_cache() accuracy = accuracy_score(true, pred) print("[{}] mean accuracy: {}, mean loss: {}".format( epoch, accuracy, avg_loss / len(val_dataloader))) true = np.array(true).astype("int") pred = np.array(pred).astype("int") print(classification_report(true, pred)) best_loss = torch.tensor( min(avg_loss / len(val_dataloader), best_loss.cpu().numpy())) is_best = bool((avg_loss / len(val_dataloader)) <= best_loss) putils.save_checkpoint( { "epoch": epoch, "model": model, "best_loss": best_loss }, is_best, filename="{}_loss_{}".format(model_checkpoint_dir, best_loss.cpu().numpy()))
def main(_): experiment = Experiment(api_key="xXtJguCo8yFdU7dpjEpo6YbHw", project_name=args.experiment_name) hyper_params = { "learning_rate": args.lr, "num_epochs": args.max_epoch, "batch_size": args.single_batch_size, "alpha": args.alpha, "beta": args.beta, "gamma": args.gamma, "loss": args.loss } experiment.log_multiple_params(hyper_params) # TODO: split file support with tf.Graph().as_default(): global save_model_dir start_epoch = 0 global_counter = 0 gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION, visible_device_list=cfg.GPU_AVAILABLE, allow_growth=True) config = tf.ConfigProto( gpu_options=gpu_options, device_count={ "GPU": cfg.GPU_USE_COUNT, }, allow_soft_placement=True, log_device_placement=False, ) with tf.Session(config=config) as sess: # sess=tf_debug.LocalCLIDebugWrapperSession(sess,ui_type='readline') model = RPN3D(cls=cfg.DETECT_OBJ, single_batch_size=args.single_batch_size, learning_rate=args.lr, max_gradient_norm=5.0, alpha=args.alpha, beta=args.beta, gamma=args.gamma, loss_type=args.loss, avail_gpus=cfg.GPU_AVAILABLE.split(',')) # param init/restore if tf.train.get_checkpoint_state(save_model_dir): print("Reading model parameters from %s" % save_model_dir) model.saver.restore(sess, tf.train.latest_checkpoint(save_model_dir)) start_epoch = model.epoch.eval() + 1 global_counter = model.global_step.eval() + 1 else: print("Created model with fresh parameters.") tf.global_variables_initializer().run() # train and validate is_summary, is_summary_image, is_validate = False, False, False summary_interval = 5 summary_val_interval = 10 summary_writer = tf.summary.FileWriter(log_dir, sess.graph) experiment.set_model_graph(sess.graph) # training with experiment.train(): for epoch in range(start_epoch, args.max_epoch): counter = 0 batch_time = time.time() experiment.log_current_epoch(epoch) for batch in iterate_data( train_dir, shuffle=True, aug=True, is_testset=False, batch_size=args.single_batch_size * cfg.GPU_USE_COUNT, multi_gpu_sum=cfg.GPU_USE_COUNT): counter += 1 global_counter += 1 experiment.set_step(global_counter) if counter % summary_interval == 0: is_summary = True else: is_summary = False epochs = args.max_epoch start_time = time.time() ret = model.train_step(sess, batch, train=True, summary=is_summary) forward_time = time.time() - start_time batch_time = time.time() - batch_time param = ret params = { "loss": param[0], "cls_loss": param[1], "cls_pos_loss": param[2], "cls_neg_loss": param[3] } experiment.log_multiple_metrics(params) # print(ret) print( 'train: {} @ epoch:{}/{} loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}' .format(counter, epoch, epochs, ret[0], ret[1], ret[2], ret[3], forward_time, batch_time)) # with open('log/train.txt', 'a') as f: # f.write( 'train: {} @ epoch:{}/{} loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}'.format(counter,epoch, epochs, ret[0], ret[1], ret[2], ret[3], forward_time, batch_time)) #print(counter, summary_interval, counter % summary_interval) if counter % summary_interval == 0: print("summary_interval now") summary_writer.add_summary(ret[-1], global_counter) #print(counter, summary_val_interval, counter % summary_val_interval) if counter % summary_val_interval == 0: print("summary_val_interval now") batch = sample_test_data( val_dir, args.single_batch_size * cfg.GPU_USE_COUNT, multi_gpu_sum=cfg.GPU_USE_COUNT) ret = model.validate_step(sess, batch, summary=True) summary_writer.add_summary(ret[-1], global_counter) try: ret = model.predict_step(sess, batch, summary=True) summary_writer.add_summary( ret[-1], global_counter) except: print("prediction skipped due to error") if check_if_should_pause(args.tag): model.saver.save(sess, os.path.join( save_model_dir, 'checkpoint'), global_step=model.global_step) print('pause and save model @ {} steps:{}'.format( save_model_dir, model.global_step.eval())) sys.exit(0) batch_time = time.time() experiment.log_epoch_end(epoch) sess.run(model.epoch_add_op) model.saver.save(sess, os.path.join(save_model_dir, 'checkpoint'), global_step=model.global_step) # dump test data every 10 epochs if (epoch + 1) % 10 == 0: # create output folder os.makedirs(os.path.join(args.output_path, str(epoch)), exist_ok=True) os.makedirs(os.path.join(args.output_path, str(epoch), 'data'), exist_ok=True) if args.vis: os.makedirs(os.path.join(args.output_path, str(epoch), 'vis'), exist_ok=True) for batch in iterate_data( val_dir, shuffle=False, aug=False, is_testset=False, batch_size=args.single_batch_size * cfg.GPU_USE_COUNT, multi_gpu_sum=cfg.GPU_USE_COUNT): if args.vis: tags, results, front_images, bird_views, heatmaps = model.predict_step( sess, batch, summary=False, vis=True) else: tags, results = model.predict_step( sess, batch, summary=False, vis=False) for tag, result in zip(tags, results): of_path = os.path.join(args.output_path, str(epoch), 'data', tag + '.txt') with open(of_path, 'w+') as f: labels = box3d_to_label( [result[:, 1:8]], [result[:, 0]], [result[:, -1]], coordinate='lidar')[0] for line in labels: f.write(line) print('write out {} objects to {}'.format( len(labels), tag)) # dump visualizations if args.vis: for tag, front_image, bird_view, heatmap in zip( tags, front_images, bird_views, heatmaps): front_img_path = os.path.join( args.output_path, str(epoch), 'vis', tag + '_front.jpg') bird_view_path = os.path.join( args.output_path, str(epoch), 'vis', tag + '_bv.jpg') heatmap_path = os.path.join( args.output_path, str(epoch), 'vis', tag + '_heatmap.jpg') cv2.imwrite(front_img_path, front_image) cv2.imwrite(bird_view_path, bird_view) cv2.imwrite(heatmap_path, heatmap) # execute evaluation code cmd_1 = "./kitti_eval/launch_test.sh" cmd_2 = os.path.join(args.output_path, str(epoch)) cmd_3 = os.path.join(args.output_path, str(epoch), 'log') os.system(" ".join([cmd_1, cmd_2, cmd_3])) print('train done. total epoch:{} iter:{}'.format( epoch, model.global_step.eval())) # finallly save model model.saver.save(sess, os.path.join(save_model_dir, 'checkpoint'), global_step=model.global_step)
def main(): # Training settings parser = argparse.ArgumentParser(description='Cifar10 Example') parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=25, metavar='N', help='number of epochs to train (default: 25)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--model-path', type=str, default='', metavar='M', help='model param path') parser.add_argument('--loss-type', type=str, default='CE', metavar='L', help='B or CE or F or ICF_CE or ICF_F or CB_CE or CB_F') parser.add_argument('--beta', type=float, default=0.999, metavar='B', help='Beta for ClassBalancedLoss') parser.add_argument('--gamma', type=float, default=2.0, metavar='G', help='Gamma for FocalLoss') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--balanced-data', action='store_true', default=False, help='For sampling rate. Default is Imbalanced-data.') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') args = parser.parse_args() # Add the following code anywhere in your machine learning file experiment = Experiment(api_key="5Yl3Rxz9S3E0PUKQTBpA0QJPi", project_name="imbalanced-cifar-10", workspace="tancoro") # ブラウザの実験ページを開く # experiment.display(clear=True, wait=True, new=0, autoraise=True) # 実験キー(実験を一意に特定するためのキー)の取得 exp_key = experiment.get_key() print('KEY: ' + exp_key) # HyperParamの記録 hyper_params = { 'batch_size': args.batch_size, 'epoch': args.epochs, 'learning_rate': args.lr, 'sgd_momentum' : args.momentum, 'model_path' : args.model_path, 'loss_type' : args.loss_type, 'beta' : args.beta, 'gamma' : args.gamma, 'torch_manual_seed': args.seed, 'balanced_data' : args.balanced_data } experiment.log_parameters(hyper_params) use_cuda = not args.no_cuda and torch.cuda.is_available() print('use_cuda {}'.format(use_cuda)) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # train dataset cifar10_train_dataset = datasets.CIFAR10('./data', train=True, download=True, transform=transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])) # train sampling rate sampling_rate = {} if not args.balanced_data: sampling_rate = {1:0.05, 4:0.05, 6:0.05} print(sampling_rate) # train Sampler train_sampler = ReductionSampler(cifar10_train_dataset, sampling_rate=sampling_rate) # train loader train_loader = torch.utils.data.DataLoader(cifar10_train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs) # test dataset cifar10_test_dataset = datasets.CIFAR10('./data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])) # test majority loader test_majority_sampler = ReductionSampler(cifar10_test_dataset, sampling_rate={1:0, 4:0, 6:0}) test_majority_loader = torch.utils.data.DataLoader(cifar10_test_dataset, batch_size=args.test_batch_size, sampler=test_majority_sampler, **kwargs) # test minority loader test_minority_sampler = ReductionSampler(cifar10_test_dataset, sampling_rate={0:0, 2:0, 3:0, 5:0, 7:0, 8:0, 9:0}) test_minority_loader = torch.utils.data.DataLoader(cifar10_test_dataset, batch_size=args.test_batch_size, sampler=test_minority_sampler, **kwargs) # test alldata loader test_alldata_loader = torch.utils.data.DataLoader(cifar10_test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs) model = ResNet18().to(device) # train loss train_loss = BasicCrossEntropyLoss() if args.loss_type == 'CE': train_loss = CrossEntropyLoss(train_sampler.get_data_count_map(), device) elif args.loss_type == 'F': train_loss = FocalLoss(train_sampler.get_data_count_map(), device, gamma=args.gamma) elif args.loss_type == 'ICF_CE': train_loss = InverseClassFrequencyCrossEntropyLoss(train_sampler.get_data_count_map(), device) elif args.loss_type == 'ICF_F': train_loss = InverseClassFrequencyFocalLoss(train_sampler.get_data_count_map(), device, gamma=args.gamma) elif args.loss_type == 'CB_CE': train_loss = ClassBalancedCrossEntropyLoss(train_sampler.get_data_count_map(), device, beta=args.beta) elif args.loss_type == 'CB_F': train_loss = ClassBalancedFocalLoss(train_sampler.get_data_count_map(), device, beta=args.beta, gamma=args.gamma) print('Train Loss Type: {}'.format(type(train_loss))) # load param if len(args.model_path) > 0: model.load_state_dict(torch.load(args.model_path)) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4) # lr = 0.1 if epoch < 15 # lr = 0.01 if 15 <= epoch < 20 # lr = 0.001 if 20 <= epoch < 25 scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[15,20], gamma=0.1) for epoch in range(1, args.epochs + 1): with experiment.train(): experiment.log_current_epoch(epoch) train(args, model, device, train_loader, len(train_sampler), optimizer, epoch, experiment, lossfunc=train_loss) with experiment.test(): test(args, model, device, test_minority_loader, len(test_minority_sampler), epoch, experiment, pref='minority') test(args, model, device, test_majority_loader, len(test_majority_sampler), epoch, experiment, pref='majority') test(args, model, device, test_alldata_loader, len(test_alldata_loader.dataset), epoch, experiment, pref='all') if (args.save_model) and (epoch % 10 == 0): print('saving model to ./model/cifar10_{0}_{1:04d}.pt'.format(exp_key, epoch)) torch.save(model.state_dict(), "./model/cifar10_{0}_{1:04d}.pt".format(exp_key, epoch)) scheduler.step()
def main(): args = get_args() logger = Experiment(comet_ml_key, project_name="ActiveDialogue") logger.log_parameters(vars(args)) if args.model == "glad": model_arch = GLAD elif args.model == "gce": model_arch = GCE env = BagEnv(load_dataset, model_arch, args, logger) if args.seed_size: with logger.train(): if not env.load_seed(): logging.debug("No loaded seed. Training now.") env.seed_fit(args.seed_epochs, prefix="seed") logging.debug("Seed completed.") else: logging.debug("Loaded seed.") if args.force_seed: logging.debug("Training seed regardless.") env.seed_fit(args.seed_epochs, prefix="seed") env.load_seed() logging.debug("Current seed metrics: {}".format(env.metrics(True))) use_strategy = False if args.strategy == "lc": use_strategy = True strategy = lc_singlet elif args.strategy == "bald": use_strategy = True strategy = bald_singlet if use_strategy: if args.threshold_strategy == "fixed": strategy = FixedThresholdStrategy(strategy, args) elif args.threshold_strategy == "variable": strategy = VariableThresholdStrategy(strategy, args) elif args.threshold_strategy == "randomvariable": strategy = StochasticVariableThresholdStrategy(strategy, args) ended = False i = 0 while not ended: i += 1 # Observe environment state logger.log_current_epoch(i) for j in range(args.label_timeout): if env.can_label: # Obtain label request from strategy obs, preds = env.observe() if args.strategy == "epsiloncheat": label_request = epsilon_cheat(preds, env.leak_labels()) elif args.strategy == "randomsinglets": label_request = random_singlets(preds) elif args.strategy == "passive": label_request = passive(preds) elif use_strategy: label_request = strategy.observe(preds) else: raise ValueError() # Label solicitation labeled = env.label(label_request) if use_strategy: strategy.update( np.sum(label_request.flatten()), np.sum(np.ones_like(label_request.flatten()))) # Environment stepping ended = env.step() # Fit every al_batch of items env.fit() logging.debug("Final fit: ", env.seed_fit(100, "final_fit", True))
def train(self): ################### epochs = 100000 batch_size = 10 gamma = 0.99 ################### ###### load ####### experiment = Experiment(api_key="DFqdpuCkMgoVhT4sJyOXBYRRN") #DFqdpuCkMgoVhT4sJyOXBYRRN ################### step = 0 r_vis = 0 with experiment.train(): for e in range(epochs): experiment.log_current_epoch(e) act_dic = [] act_p_dic = [] r_dic = [] # init num_game = 1 done = self._init_env() while True: if done and num_game == batch_size: ### visualize ### step = step + 1 experiment.log_metric("reward", r_vis, step=step) r_vis = 0 ################# break # finish elif done: ### visualize ### step = step + 1 experiment.log_metric("reward", r_vis, step=step) r_vis = 0 ################# num_game = num_game + 1 done = self._init_env() # new game else: # in the game self.diff_state = torch.from_numpy(self.diff_state).float().to(self.device) act, act_p = self.ppo.get_action(self.diff_state) state, reward, done, _ = self.env.step(act) state = prepro(state) self.diff_state = state - self.pre_state self.pre_state = state # remember action you take, and reward act_dic.append(act) act_p_dic.append(act_p) r_dic.append(reward) ### visualize ### r_vis = r_vis + reward ################# # dic -> numpy -> tensor act_p_dic = torch.stack(act_p_dic) act_dic = np.array(act_dic) act_dic = torch.from_numpy(act_dic).float().to(self.device).view(-1, 1) r_dic = np.array(r_dic) r_dic = torch.from_numpy(r_dic).float().to(self.device) # early stop if (r_dic.sum() / batch_size) >= 2: self.ppo.save(step) # suitable reward """ change reward like below [0, 0, 0, 0, 1] -> [0.99^4, 0.99^3, 0.99^2, 0.99, 1] """ r = 0 for i in range(len(r_dic) - 1, -1, -1): if r_dic[i] != 0: r = r_dic[i] else: r = r * gamma r_dic[i] = r r_dic = (r_dic - r_dic.mean()) / (r_dic.std() + 1e-8) self.ppo.update(act_p_dic, act_dic, r_dic) if(e % 100 == 0 and e != 0): self.ppo.save(e) print("save:", e)