Esempio n. 1
0
def train_a_gym_model(env, config):
    """We train gym-type RL problem using ppo given environment and configuration"""
    torch.set_num_threads(1)

    seed = config.get('seed', None)
    log_dir = config.get('log_dir', '/tmp/gym')
    log_interval = config.get('log_interval', 10)
    save_interval = config.get('save_interval', 100)
    save_dir = config.get('save_dir', 'trained_models/ppo')
    add_timestep = config.get('add_timestep', False)
    num_processes = config.get('num_processes', 4)
    gamma = config.get('gamma', 0.99)
    num_stack = config.get('num_stack', 1)
    recurrent_policy = config.get('recurrent_policy', False)
    cuda = config.get('cuda', True)
    vis = config.get('vis', True)
    vis_interval = config.get('vis_interval', 100)
    env_name = config['env_name']
    save_step = config.get('save_step', None)
    warm_model = config.get('warm_model', None)
    if save_step is not None:
        next_save_step = save_step

    # clean the log folder, if necessary
    try:
        os.makedirs(log_dir)
    except OSError:
        files = glob.glob(os.path.join(log_dir, '*.monitor.csv'))
        for f in files:
            os.remove(f)

    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed(seed)

    if vis:
        from visdom import Visdom
        port = config.get('port', 8097)
        viz = Visdom(port=port)
        win = None

    envs = [
        make_env(env, seed, i, log_dir, add_timestep)
        for i in range(num_processes)
    ]

    if num_processes > 1:
        envs = SubprocVecEnv(envs)
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        envs = VecNormalize(envs, gamma=gamma)

    obs_shape = envs.observation_space.shape
    obs_shape = (obs_shape[0] * num_stack, *obs_shape[1:])

    if warm_model is None:
        actor_critic = Policy(obs_shape, envs.action_space, recurrent_policy)
    else:
        actor_critic, ob_rms, ret_rms = torch.load(warm_model)
        envs.ob_rms = ob_rms  # also use previous existing observation rms
        envs.ret_rms = ret_rms

    if envs.action_space.__class__.__name__ == "Discrete":
        action_shape = 1
    else:
        action_shape = envs.action_space.shape[0]

    if cuda:
        actor_critic.cuda()

    clip_param = config.get('clip_param', 0.2)
    ppo_epoch = config.get('ppo_epoch', 4)
    num_mini_batch = config.get('num_mini_batch', 32)
    value_loss_coef = config.get('value_loss_coef', 0.5)
    entropy_coef = config.get('entropy_coef', 0.01)
    lr = config.get('lr', 1e-3)
    eps = config.get('eps', 1e-5)
    max_grad_norm = config.get('max_grad_norm', 0.5)
    use_gae = config.get('use_gae', False)
    tau = config.get('tau', 0.95)
    num_steps = config.get('num_steps', 100)
    num_frames = config.get('num_frames', 1e6)

    num_updates = int(num_frames) // num_steps // num_processes

    agent = algo.PPO(actor_critic,
                     clip_param,
                     ppo_epoch,
                     num_mini_batch,
                     value_loss_coef,
                     entropy_coef,
                     lr=lr,
                     eps=eps,
                     max_grad_norm=max_grad_norm)

    rollouts = RolloutStorage(num_steps, num_processes, obs_shape,
                              envs.action_space, actor_critic.state_size)
    current_obs = torch.zeros(num_processes, *obs_shape)

    obs = envs.reset()
    update_current_obs(obs, current_obs, obs_shape, num_stack)

    rollouts.observations[0].copy_(current_obs)

    # These variables are used to compute average rewards for all processes.
    episode_rewards = torch.zeros([num_processes, 1])
    final_rewards = torch.zeros([num_processes, 1])

    if cuda:
        current_obs = current_obs.cuda()
        rollouts.cuda()

    def save_the_model(num=None):
        """num is additional information"""
        # save it after training
        save_path = save_dir
        try:
            os.makedirs(save_path)
        except OSError:
            pass
        # A really ugly way to save a model to CPU
        save_model = actor_critic
        if cuda:
            save_model = copy.deepcopy(actor_critic).cpu()
        save_model = [
            save_model,
            hasattr(envs, 'ob_rms') and envs.ob_rms or None,
            hasattr(envs, 'ret_rms') and envs.ret_rms or None
        ]
        if num is None:
            save_name = '%s.pt' % env_name
        else:
            save_name = '%s_at_%d.pt' % (env_name, int(num))
        torch.save(save_model, os.path.join(save_path, save_name))

    start = time.time()
    for j in range(1, 1 + num_updates):
        for step in range(num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, states = actor_critic.act(
                    rollouts.observations[step], rollouts.states[step],
                    rollouts.masks[step])
            cpu_actions = action.squeeze(1).cpu().numpy()

            # Obser reward and next obs
            obs, reward, done, info = envs.step(cpu_actions)
            reward = torch.from_numpy(np.expand_dims(np.stack(reward),
                                                     1)).float()
            episode_rewards += reward

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            final_rewards *= masks
            final_rewards += (1 - masks) * episode_rewards
            episode_rewards *= masks

            if cuda:
                masks = masks.cuda()

            if current_obs.dim() == 4:
                current_obs *= masks.unsqueeze(2).unsqueeze(2)
            else:
                current_obs *= masks

            update_current_obs(obs, current_obs, obs_shape, num_stack)
            rollouts.insert(current_obs, states, action, action_log_prob,
                            value, reward, masks)

        with torch.no_grad():
            next_value = actor_critic.get_value(rollouts.observations[-1],
                                                rollouts.states[-1],
                                                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, use_gae, gamma, tau)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)

        rollouts.after_update()

        if j % save_interval == 0 and save_dir != "":
            save_the_model()
            if save_step is not None:
                total_num_steps = j * num_processes * num_steps
                if total_num_steps > next_save_step:
                    save_the_model(total_num_steps)
                    next_save_step += save_step

        if j % log_interval == 0:
            end = time.time()
            total_num_steps = j * num_processes * num_steps
            print(
                "Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}"
                .format(j, total_num_steps,
                        int(total_num_steps / (end - start)),
                        final_rewards.mean(), final_rewards.median(),
                        final_rewards.min(), final_rewards.max(), dist_entropy,
                        value_loss, action_loss))
        if vis and j % vis_interval == 0:
            try:
                # Sometimes monitor doesn't properly flush the outputs
                win = visdom_plot(viz, win, log_dir, env_name, 'ppo',
                                  num_frames)
            except IOError:
                pass
    # finally save model again
    save_the_model()
Esempio n. 2
0
    def __init__(self, FG, SUPERVISED=True):
        # parameters
        self.num_epoch = FG.num_epoch
        self.batch_size = FG.batch_size
        self.save_dir = FG.save_dir
        self.result_dir = FG.result_dir
        self.dataset = 'MRI'
        self.log_dir = FG.log_dir
        self.model_name = 'infoGAN'
        self.input_size = FG.input_size
        self.z_dim = FG.z
        self.SUPERVISED = SUPERVISED        # if it is true, label info is directly used for code
        self.len_discrete_code = 10         # categorical distribution (i.e. label)
        self.len_continuous_code = 2        # gaussian distribution (e.g. rotation, thickness)
        self.sample_num = self.len_discrete_code ** 2
        
        # torch setting
        self.device = torch.device('cuda:{}'.format(FG.devices[0]))
        torch.cuda.set_device(FG.devices[0])
        timer = SimpleTimer()

        # load dataset
        x, y = Trainset(FG)      # x = image, y=target
        trainset = ADNIDataset(FG, x, y, cropping=NineCrop((40,40,40),(32,32,32)),
                               transform=Compose([Lambda(lambda patches: torch.stack([ToTensor()(patch) for patch in patches]))]))     
        self.trainloader = DataLoader(trainset, batch_size=self.batch_size,
                                 shuffle=True, pin_memory=True,
                                 num_workers=4)
        #self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size)
        #data = self.trainloader
        for _, data in enumerate(self.trainloader):
            data = data['image']
            break

        # networks init
        self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1],
                           input_size=self.input_size, len_discrete_code=self.len_discrete_code,
                           len_continuous_code=self.len_continuous_code).to('cuda:{}'.format(FG.devices[0]))
        self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size,
                               len_discrete_code=self.len_discrete_code, len_continuous_code=self.len_continuous_code).to('cuda:{}'.format(FG.devices[0]))
        self.G_optimizer = optim.Adam(self.G.parameters(), lr=FG.lrG, betas=(FG.beta1, FG.beta2))
        self.D_optimizer = optim.Adam(self.D.parameters(), lr=FG.lrD, betas=(FG.beta1, FG.beta2))
        self.info_optimizer = optim.Adam(itertools.chain(self.G.parameters(), self.D.parameters()), lr=FG.lrD, betas=(FG.beta1, FG.beta2))

        if len(FG.devices) != 1:
            self.G = torch.nn.DataParallel(self.G, FG.devices)
            self.D = torch.nn.DataParallel(self.D, FG.devices)
        self.BCE_loss = nn.BCELoss().to('cuda:{}'.format(FG.devices[0]))
        self.CE_loss = nn.CrossEntropyLoss().to('cuda:{}'.format(FG.devices[0]))
        self.MSE_loss = nn.MSELoss().to('cuda:{}'.format(FG.devices[0]))

        print('---------- Networks architecture -------------')
        ori_utils.print_network(self.G)
        ori_utils.print_network(self.D)
        print('-----------------------------------------------')

        # fixed noise & condition
        self.sample_z = torch.zeros((self.sample_num, self.z_dim))
        for i in range(self.len_discrete_code):
            self.sample_z[i * self.len_discrete_code] = torch.rand(1, self.z_dim)
            for j in range(1, self.len_discrete_code):
                self.sample_z[i * self.len_discrete_code + j] = self.sample_z[i * self.len_discrete_code]

        temp = torch.zeros((self.len_discrete_code, 1))
        for i in range(self.len_discrete_code):
            temp[i, 0] = i

        temp_y = torch.zeros((self.sample_num, 1))
        for i in range(self.len_discrete_code):
            temp_y[i * self.len_discrete_code: (i + 1) * self.len_discrete_code] = temp

        self.sample_y = torch.zeros((self.sample_num, self.len_discrete_code)).scatter_(1, temp_y.type(torch.LongTensor), 1)
        self.sample_c = torch.zeros((self.sample_num, self.len_continuous_code))

        # manipulating two continuous code
        #self.sample_z2 = torch.rand((1, self.z_dim)).expand(self.sample_num, self.z_dim)
        self.sample_z2 = torch.zeros((self.sample_num, self.z_dim))
        z2 = torch.rand(1, self.z_dim)
        for i in range(self.sample_num):
            self.sample_z2[i] = z2
        
        self.sample_y2 = torch.zeros(self.sample_num, self.len_discrete_code)
        self.sample_y2[:, 0] = 1

        temp_c = torch.linspace(-1, 1, 10)
        self.sample_c2 = torch.zeros((self.sample_num, 2))
        for i in range(self.len_discrete_code):
            for j in range(self.len_discrete_code):
                self.sample_c2[i*self.len_discrete_code+j, 0] = temp_c[i]
                self.sample_c2[i*self.len_discrete_code+j, 1] = temp_c[j]

        self.sample_z = self.sample_z.cuda(self.device, non_blocking=True)
        self.sample_y = self.sample_y.cuda(self.device, non_blocking=True) 
        self.sample_c = self.sample_c.cuda(self.device, non_blocking=True)
        self.sample_z2 = self.sample_z2.cuda(self.device, non_blocking=True)
        self.sample_y2 = self.sample_y2.cuda(self.device, non_blocking=True)
        self.sample_c2 = self.sample_c2.cuda(self.device, non_blocking=True)


        vis = Visdom(port=10002, env=str(FG.vis_env))

        self.printers = dict(
            D_loss = Scalar(vis, 'D_loss', opts=dict(
                showlegend=True, title='D loss', ytickmin=0, ytinkmax=2.0)),
            G_loss = Scalar(vis, 'G_loss', opts=dict(
                showlegend=True, title='G loss', ytickmin=0, ytinkmax=10)),
            info_loss = Scalar(vis, 'info_loss', opts=dict(
                showlegend=True, title='info loss', ytickmin=0, ytinkmax=10)),
            input = Image3D(vis, 'input'),
            input_fi = Image3D(vis, 'input_fi'),
            output = Image3D(vis, 'output'),
            output2 = Image3D(vis, 'output2'))

        self.timer = SimpleTimer()
Esempio n. 3
0
                    help="How frequently (every mini-batch) to evaluate model",
                    default=20000,
                    type=int)
parser.add_argument("--save-dir",
                    dest="save_dir",
                    help="Directory to save trained models",
                    default='Saved-Models/',
                    type=str)
parser.add_argument("--load-model",
                    dest="load_model",
                    help="Directory from which to load trained models",
                    default=None,
                    type=str)

opt = parser.parse_args()
vis = Visdom()


### evaluation code
def evalModel(model):
    # set model to eval mode
    model.eval()
    print('\n\n')
    print('*' * 30, ' MODEL EVALUATION ', '*' * 30)

    _article, _revArticle, _extArticle, max_article_oov, article_oov, article_string, abs_string = dl.getEvalBatch(
    )
    _article = Variable(_article.cuda(), volatile=True)
    _extArticle = Variable(_extArticle.cuda(), volatile=True)
    _revArticle = Variable(_revArticle.cuda(), volatile=True)
    all_summaries = model((_article, _revArticle, _extArticle),
Esempio n. 4
0
def do_train(cfg, model, data_loader, data_loader_val, optimizer, scheduler,
             checkpointer, device, checkpoint_period, test_period, arguments,
             distributed, vis_port):
    from visdom import Visdom
    vis = None
    if distributed:
        if dist.get_rank() == 0:
            vis = Visdom(server='http://127.0.0.1', port=vis_port)
    else:
        vis = Visdom(server='http://127.0.0.1', port=vis_port)
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))

            # 更新 loss 曲线
            loss_dict_print = loss_dict_reduced
            loss_dict_print['loss'] = losses_reduced
            print_dict(vis, loss_dict_print, iteration, need_plot=True)

        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            # with torch.no_grad():
            #     # Should be one image for each GPU:
            #     for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)):
            #         images_val = images_val.to(device)
            #         targets_val = [target.to(device) for target in targets_val]
            #         loss_dict = model(images_val, targets_val)
            #         losses = sum(loss for loss in loss_dict.values())
            #         loss_dict_reduced = reduce_loss_dict(loss_dict)
            #         losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            #         meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            # synchronize()
            # logger.info(
            #     meters_val.delimiter.join(
            #         [
            #             "[Validation]: ",
            #             "eta: {eta}",
            #             "iter: {iter}",
            #             "{meters}",
            #             "lr: {lr:.6f}",
            #             "max mem: {memory:.0f}",
            #         ]
            #     ).format(
            #         eta=eta_string,
            #         iter=iteration,
            #         meters=str(meters_val),
            #         lr=optimizer.param_groups[0]["lr"],
            #         memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
            #     )
            # )
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Esempio n. 5
0
def run_inference_ss_vae(args):
    """
    run inference for SS-VAE
    :param args: arguments for SS-VAE
    :return: None
    """
    if args.use_cuda:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

    if args.seed is not None:
        set_seed(args.seed, args.use_cuda)

    viz = None
    if args.visualize:
        from visdom import Visdom
        viz = Visdom()
        mkdir_p("./vae_results")

    # batch_size: number of images (and labels) to be considered in a batch
    ss_vae = SSVAE(z_dim=args.z_dim,
                   hidden_layers=args.hidden_layers,
                   epsilon_scale=args.epsilon_scale,
                   use_cuda=args.use_cuda,
                   aux_loss_multiplier=args.aux_loss_multiplier)

    # setup the optimizer
    adam_params = {"lr": args.learning_rate, "betas": (args.beta_1, 0.999)}
    optimizer = Adam(adam_params)

    # set up the loss(es) for inference setting the enum_discrete parameter builds the loss as a sum
    # by enumerating each class label for the sampled discrete categorical distribution in the model
    loss_basic = SVI(ss_vae.model,
                     ss_vae.guide,
                     optimizer,
                     loss="ELBO",
                     enum_discrete=args.enum_discrete)

    # build a list of all losses considered
    losses = [loss_basic]

    # aux_loss: whether to use the auxiliary loss from NIPS 14 paper (Kingma et al)
    if args.aux_loss:
        loss_aux = SVI(ss_vae.model_classify,
                       ss_vae.guide_classify,
                       optimizer,
                       loss="ELBO")
        losses.append(loss_aux)

    try:
        # setup the logger if a filename is provided
        logger = None if args.logfile is None else open(args.logfile, "w")

        data_loaders = setup_data_loaders(MNISTCached,
                                          args.use_cuda,
                                          args.batch_size,
                                          sup_num=args.sup_num)

        # how often would a supervised batch be encountered during inference
        # e.g. if sup_num is 3000, we would have every 16th = int(50000/3000) batch supervised
        # until we have traversed through the all supervised batches
        periodic_interval_batches = int(MNISTCached.train_data_size /
                                        (1.0 * args.sup_num))

        # number of unsupervised examples
        unsup_num = MNISTCached.train_data_size - args.sup_num

        # initializing local variables to maintain the best validation accuracy
        # seen across epochs over the supervised training set
        # and the corresponding testing set and the state of the networks
        best_valid_acc, corresponding_test_acc = 0.0, 0.0

        # run inference for a certain number of epochs
        for i in range(0, args.num_epochs):

            # get the losses for an epoch
            epoch_losses_sup, epoch_losses_unsup = \
                run_inference_for_epoch(data_loaders, losses, periodic_interval_batches)

            # compute average epoch losses i.e. losses per example
            avg_epoch_losses_sup = map(lambda v: v / args.sup_num,
                                       epoch_losses_sup)
            avg_epoch_losses_unsup = map(lambda v: v / unsup_num,
                                         epoch_losses_unsup)

            # store the loss and validation/testing accuracies in the logfile
            str_loss_sup = " ".join(map(str, avg_epoch_losses_sup))
            str_loss_unsup = " ".join(map(str, avg_epoch_losses_unsup))

            str_print = "{} epoch: avg losses {}".format(
                i, "{} {}".format(str_loss_sup, str_loss_unsup))

            validation_accuracy = get_accuracy(data_loaders["valid"],
                                               ss_vae.classifier,
                                               args.batch_size)
            str_print += " validation accuracy {}".format(validation_accuracy)

            # this test accuracy is only for logging, this is not used
            # to make any decisions during training
            test_accuracy = get_accuracy(data_loaders["test"],
                                         ss_vae.classifier, args.batch_size)
            str_print += " test accuracy {}".format(test_accuracy)

            # update the best validation accuracy and the corresponding
            # testing accuracy and the state of the parent module (including the networks)
            if best_valid_acc < validation_accuracy:
                best_valid_acc = validation_accuracy
                corresponding_test_acc = test_accuracy

            print_and_log(logger, str_print)

        final_test_accuracy = get_accuracy(data_loaders["test"],
                                           ss_vae.classifier, args.batch_size)
        print_and_log(
            logger,
            "best validation accuracy {} corresponding testing accuracy {} "
            "last testing accuracy {}".format(best_valid_acc,
                                              corresponding_test_acc,
                                              final_test_accuracy))

        # visualize the conditional samples
        visualize(ss_vae, viz, data_loaders["test"])
    finally:
        # close the logger file object if we opened it earlier
        if args.logfile is not None:
            logger.close()
Esempio n. 6
0
 def __init__(self, port='13579', env='main'):
     self.cur_win = {}
     self.env = env
     self.visdom = Visdom(port=port, env=env)
Esempio n. 7
0
def train(model, train_loader, test_loader):
    model.to(DEVICE)
    num_layers = model.get_num_of_layer()
    viz = Visdom(port=17000)
    for i in range(1, num_layers + 1):
        print('*' * 5 + 'Layer_{}'.format(i) + '*' * 5)
        # 可视化学习曲线
        viz.line(
            [[0., 0.]], [0.],
            win='encoder_layer_{}'.format(i),
            opts={
                'title': 'encoder_layer_{}'.format(i),
                'legend': ['train_loss', 'eval_loss']
            })
        model.set_trainable_layer(i)
        # note: 交叉熵并无法最小化误差,其误差最小化为x = 0.5
        criterion = CrossEntropy()
        # 过滤掉不可训练参数
        trainable_parameters = list(
            filter(lambda p: p.requires_grad, model.parameters()))
        optimizer = optim.Adam(iter(trainable_parameters), lr=LR)
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=EPOCH_PER_LAYER // 2,
                                              gamma=0.1)
        criterion.to(DEVICE)
        for epoch in range(EPOCH_PER_LAYER):
            train_loss = []
            for step, ((x, neighbor_region), _) in enumerate(train_loader):
                if neighbor_region.ndim == 4:
                    neighbor_region = neighbor_region.reshape(
                        (neighbor_region.shape[0], -1))
                # 拼接原始光谱信息和邻域信息
                input = torch.cat([x, neighbor_region], -1)
                input = input.to(DEVICE)
                raw_input, out = model(input)  #[batchsz, n_feautre]
                # 计算Loss
                loss = criterion(out, raw_input)
                train_loss.append(loss.item())
                #反向传播
                optimizer.zero_grad()
                loss.backward()
                # 梯度裁剪
                # l2_norm = nn.utils.clip_grad_norm_(trainable_parameters, 10)
                optimizer.step()

                if step % 50 == 0:
                    lr = optimizer.state_dict()['param_groups'][0]['lr']
                    # print('Layer-{} epoch:{} batch:{} loss:{:.6f} lr:{} l2-norm:{}'.format(i, epoch, step, loss.item(), lr, l2_norm))
                    print(
                        'Layer-{} epoch:{} batch:{} loss:{:.6f} lr:{}'.format(
                            i, epoch, step, loss.item(), lr))

            # 测试
            eval_loss = []
            with torch.no_grad():
                for j, ((x, neighbor_region), _) in enumerate(test_loader):
                    if neighbor_region.ndim == 4:
                        neighbor_region = neighbor_region.reshape(
                            (neighbor_region.shape[0], -1))
                    # 拼接原始光谱信息和邻域信息
                    input = torch.cat([x, neighbor_region], -1)
                    input = input.to(DEVICE)
                    raw_input, out = model(input)
                    loss = criterion(out, raw_input)
                    # 可视化向量
                    if j == 0:
                        num = 5
                        raw_vector, re_vector = raw_input[:num], out[:num]
                        length = raw_vector.shape[1]
                        for k in range(num):
                            viz.line(torch.stack([raw_vector[k], re_vector[k]],
                                                 dim=0).T,
                                     list(range(length)),
                                     win='layer{}_sample{}'.format(i, k),
                                     opts={
                                         'title':
                                         'layer{}_sample{}'.format(i, k),
                                         'legend': ['encoder', 'decoder']
                                     })

                    eval_loss.append(loss.item())
            train_mean_loss = float(np.mean(train_loss))
            eval_mean_loss = float(np.mean(eval_loss))
            print(
                'Layer-{} epoch:{} train_loss: {:.6f} eval_loss:{:.6f}'.format(
                    i, epoch, train_mean_loss, eval_mean_loss))
            viz.line([[train_mean_loss, eval_mean_loss]], [epoch],
                     win='encoder_layer_{}'.format(i),
                     update='append')
            scheduler.step()
            # 训练最后一层时,每经过50个epoch保存一个模型
            if i == num_layers and (epoch + 1) % 50 == 0:
                save_path = os.path.join(SAVE_ROOT, DATASET_NAME)
                if not os.path.exists(save_path):
                    os.makedirs(save_path)
                torch.save(
                    model.state_dict(),
                    os.path.join(save_path,
                                 'stacked_auto_encoder_{}'.format(epoch)))
        print('*' * 5 + 'Finish' + '*' * 5)
Esempio n. 8
0
def main():
    setup_logger(args.verbose, args.model_name)

    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")

    if args.vis:
        from visdom import Visdom
        viz = Visdom(port=args.visdom_port)
        win = None

    envs = make_vec_envs(args.env_name,
                         args.seed,
                         args.num_processes,
                         args.gamma,
                         config.log_directory,
                         args.add_timestep,
                         device,
                         allow_early_resets=True,
                         num_frame_stack=None,
                         ip=args.ip,
                         start_port=args.port,
                         wait_action=args.wait_action,
                         reset_step=args.reset_step)

    actor_critic = Policy(envs.observation_space.shape,
                          envs.action_space,
                          base_kwargs={
                              'recurrent': args.recurrent_policy,
                              'hidden_size': args.hidden_layer_size
                          })
    # load model
    if args.load_path is not None:
        logger.info("loading model: {}".format(args.load_path))
        actor_critic = torch.load(args.load_path)

    actor_critic.to(device)

    if args.algo == 'a2c':
        agent = algo.A2C_ACKTR(actor_critic,
                               args.value_loss_coef,
                               args.entropy_coef,
                               lr=args.lr,
                               eps=args.eps,
                               alpha=args.alpha,
                               max_grad_norm=args.max_grad_norm)
    elif args.algo == 'ppo':
        agent = algo.PPO(actor_critic,
                         args.clip_param,
                         args.ppo_epoch,
                         args.num_mini_batch,
                         args.value_loss_coef,
                         args.entropy_coef,
                         lr=args.lr,
                         eps=args.eps,
                         max_grad_norm=args.max_grad_norm,
                         use_clipped_value_loss=True)
    elif args.algo == 'acktr':
        agent = algo.A2C_ACKTR(actor_critic,
                               args.value_loss_coef,
                               args.entropy_coef,
                               acktr=True)

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                              envs.observation_space.shape, envs.action_space,
                              actor_critic.recurrent_hidden_state_size)

    obs = envs.reset()
    rollouts.obs[0].copy_(obs)
    rollouts.to(device)

    episode_rewards = deque(maxlen=20)
    episode_distance = deque(maxlen=20)

    if args.use_wandb:
        wandb.init(project='LumbarSpine',
                   config=args,
                   group=args.model_name,
                   resume=args.resume_wandb)
        # wandb.watch(actor_critic)

        if wandb.run.resumed:
            logger.info('Wandb resumed!')

    # --------------------- train ----------------------------
    start = time.time()
    for iter in range(num_updates):
        logger.info('Training {}/{} updates'.format(iter, num_updates))
        if args.test:
            break

        # todo: maybe this is what is making things confusing?!
        envs.reset()

        # HACKY way of reconnecting back the main env to avoid packet drop
        # for i in range(args.num_processes):
        #     envs.venv.envs[i].env.net.connect(args.ip, args.port)

        if args.use_linear_lr_decay:
            # decrease learning rate linearly
            if args.algo == "acktr":
                # use optimizer's learning rate since it's hard-coded in kfac.py
                lr = update_linear_schedule(agent.optimizer, iter, num_updates,
                                            agent.optimizer.lr)
            else:
                lr = update_linear_schedule(agent.optimizer, iter, num_updates,
                                            args.lr)
        else:
            lr = args.lr

        if args.algo == 'ppo' and args.use_linear_clip_decay:
            agent.clip_param = args.clip_param * (1 -
                                                  iter / float(num_updates))

        distances = []
        vels = []

        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts.obs[step], rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])

            # Obser reward and next obs
            obs, reward, done, infos = envs.step(action)

            for info in infos:
                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])
                    episode_distance.append(info['episode_']['distance'])
                if 'distance' in info.keys():
                    distances.append(info['distance'])
                    vels.append(info['vel'])

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])

            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks)

        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts.obs[-1], rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.tau)
        value_loss, action_loss, dist_entropy = agent.update(rollouts)
        rollouts.after_update()

        # save for every interval-th episode or for the last epoch
        if iter % args.save_interval == 0 or iter == num_updates - 1:
            save_path = os.path.join(config.trained_directory,
                                     args.algo + "-" + args.env_name + ".pt")
            logger.info("Saving model: {}".format(save_path))
            torch.save(actor_critic, save_path)

        total_num_steps = (iter + 1) * args.num_processes * args.num_steps

        log_info = {
            'average_vel': np.mean(vels),
            'average_distance': np.mean(distances),
            'value_loss': value_loss,
            'action_loss': action_loss,
            'dist_entropy': dist_entropy,
            'lr': lr,
            'agent_clip_param': agent.clip_param,
        }

        if len(episode_rewards) > 1:
            log_info.update({
                'mean_episode_reward': np.mean(episode_rewards),
                'median_episode_reward': np.median(episode_rewards),
                'min_episode_reward': np.min(episode_rewards),
                'max_episode_reward': np.max(episode_rewards),
                'mean_episode_distance': np.mean(episode_distance)
            })

        # todo: switch to episodic and cover other locations. This log is only for episodic
        if iter % args.episode_log_interval == 0 and len(episode_rewards) > 1:
            end = time.time()
            logger.info(
                "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median "
                "reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n".format(
                    iter,
                    total_num_steps, int(total_num_steps / (end - start)),
                    len(episode_rewards), np.mean(episode_rewards),
                    np.median(episode_rewards), np.min(episode_rewards),
                    np.max(episode_rewards), dist_entropy, value_loss,
                    action_loss))

        # --------------------- evaluate ----------------------------
        # Evaluate on a single environment
        if args.eval_interval is not None and iter % args.eval_interval == 0:
            logger.info('Evaluate')

            # todo: what is allow_early_resets? (False for main, True for eval)
            eval_envs = make_vec_envs(
                args.env_name,
                args.seed,
                1,  # args.num_processes,
                args.gamma,
                config.log_directory,
                args.add_timestep,
                device,
                allow_early_resets=False,
                num_frame_stack=None,
                ip=args.ip,
                start_port=args.port,
                wait_action=args.wait_action,
                eval_mode=True)

            eval_episode_rewards = []
            rewards = []

            obs = eval_envs.reset()
            eval_recurrent_hidden_states = torch.zeros(
                args.num_processes,
                actor_critic.recurrent_hidden_state_size,
                device=device)
            eval_masks = torch.zeros(args.num_processes, 1, device=device)

            eval_distances = []

            # while len(eval_episode_rewards) < 10:
            for eval_step in range(args.num_steps_eval):
                with torch.no_grad():
                    _, action, _, eval_recurrent_hidden_states = actor_critic.act(
                        obs,
                        eval_recurrent_hidden_states,
                        eval_masks,
                        deterministic=True)

                # Obser reward and next obs
                obs, reward, done, infos = eval_envs.step(action)

                eval_masks = torch.tensor([[0.0] if done_ else [1.0]
                                           for done_ in done],
                                          dtype=torch.float32,
                                          device=device)
                logger.log(msg='eval step reward: {}'.format(reward), level=18)
                logger.log(msg='eval step obs: {}'.format(obs), level=18)

                for info in infos:
                    if 'episode' in info.keys():
                        eval_episode_rewards.append(info['episode']['r'])
                    if 'distance' in info.keys():
                        eval_distances.append(info['distance'])

                rewards.extend(reward)

            eval_envs.close()

            if args.episodic:
                logger.info(
                    "Evaluation using {} episodes: mean reward {:.5f}\n".
                    format(len(eval_episode_rewards),
                           np.mean(eval_episode_rewards)))
            else:
                logger.info(
                    "Evaluation using {} steps: mean reward {:.5f}\n".format(
                        args.num_steps_eval, np.mean(rewards)))

            # update info
            log_info.update({
                'mean_eval_reward': np.mean(rewards),
                'eval_average_distance': np.mean(eval_distances)
            })

        if args.vis and iter % args.vis_interval == 0:
            try:
                # Sometimes monitor doesn't properly flush the outputs
                logger.info("Visdom log update")
                win = visdom_plot(viz, win, config.visdom_log_directory,
                                  args.env_name, args.algo, args.num_env_steps)
            except IOError:
                pass

        if iter % args.log_interval == 0:
            logger.info('{}:{}  {}'.format(iter, num_updates, log_info))
            if args.use_wandb:
                wandb.log(log_info)

    # -------------------------------------- testing -------------------------------------
    if args.test:
        logger.info('Evaluate')

        # todo: what is allow_early_resets? (False for main, True for eval)
        eval_envs = make_vec_envs(args.env_name,
                                  args.seed,
                                  args.num_processes,
                                  args.gamma,
                                  config.log_directory,
                                  args.add_timestep,
                                  device,
                                  allow_early_resets=False,
                                  num_frame_stack=None,
                                  ip=args.ip,
                                  start_port=args.port,
                                  wait_action=args.wait_action,
                                  eval_mode=True)

        eval_episode_rewards = []
        rewards = []

        obs = eval_envs.reset()
        eval_recurrent_hidden_states = torch.zeros(
            args.num_processes,
            actor_critic.recurrent_hidden_state_size,
            device=device)
        eval_masks = torch.zeros(args.num_processes, 1, device=device)

        # while len(eval_episode_rewards) < 10:
        for eval_step in range(args.num_steps_eval):
            with torch.no_grad():
                _, action, _, eval_recurrent_hidden_states = actor_critic.act(
                    obs,
                    eval_recurrent_hidden_states,
                    eval_masks,
                    deterministic=True)

            # Obser reward and next obs
            obs, reward, done, infos = eval_envs.step(action)

            eval_masks = torch.tensor([[0.0] if done_ else [1.0]
                                       for done_ in done],
                                      dtype=torch.float32,
                                      device=device)
            logger.info('eval step reward: {}'.format(reward))
            logger.log(msg='eval step obs: {}'.format(obs), level=18)

            if args.episodic:
                for info in infos:
                    if 'episode' in info.keys():
                        eval_episode_rewards.append(info['episode']['r'])
            else:
                rewards.append(reward)

        eval_envs.close()

        if args.episodic:
            logger.info(
                "Evaluation using {} episodes: mean reward {:.5f}\n".format(
                    len(eval_episode_rewards), np.mean(eval_episode_rewards)))
        else:
            logger.info(
                "Evaluation using {} steps: mean reward {:.5f}\n".format(
                    args.num_steps, np.mean(rewards)))
Esempio n. 9
0
import torch
import syft as sy  # <-- NEW: import the Pysyft library
import random
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from visdom import Visdom
from datetime import datetime
import ComputePrivacy as Privacy  # Import self definition function to compute the privacy loss
import Datasets  # Import self definition function to load the federated datasets
hook = sy.TorchHook(
    torch
)  # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning
date = datetime.now().strftime('%Y-%m-%d %H:%M')
vis = Visdom(env='CELEBA_AdaClip2_Asyn_04_flat')


# Define parameters
class Arguments():
    def __init__(self):
        self.batch_size = 5  # Number of samples used of each user/device at each iteration.
        # If this value is less than 1, then it means the sampling ratio, else it means the mini-batch size
        self.lr = 0.0001  # Learning rate
        self.ClipBound = torch.tensor([0.001])  # clipbound
        self.z = 0.4  # Noise parameter z in Gaussian noise N(0, (zS)^2) where S is sensitivity
        self.users_total = 800  # Total number of users/devices
        self.user_sel_prob = 0.02  # Probability for sampling users/devices at each iteration
        self.itr_numbers = 6000  # Number of total iterations

        self.test_batch_size = 128  # Number of test mini-batch size
Esempio n. 10
0
def train():
    parser = ArgumentParser()
    parser.add_argument("--visdom", action="store_true")
    parser.add_argument("--batch_size", type=int, default=64)
    param = parser.parse_args()
    if param.visdom:
        from visdom import Visdom

        dom = Visdom()
    train_loader = DataLoader(
        VOCDataset(mode="train"),
        batch_size=param.batch_size,
        num_workers=8,
        drop_last=True,
        pin_memory=True,
        shuffle=True,
    )
    valid_loader = DataLoader(
        VOCDataset(mode="val"),
        batch_size=16,
        num_workers=4,
        drop_last=True,
        shuffle=True,
    )
    net = yolo().cuda()
    # net = torch.load("weights/75_net.pk")
    criterion = YoloLoss().cuda()
    optim = SGD(
        params=net.parameters(),
        lr=0.001,
        weight_decay=5e-4,
        momentum=0.9,
        nesterov=True,
    )
    # optim = Adam(params=net.parameters(), lr=1e-3, weight_decay=5e-4, eps=1e-4)
    t_obj_loss, t_nobj_loss, t_xy_loss, t_wh_loss, t_class_loss = [], [], [], [], []
    v_obj_loss, v_nobj_loss, v_xy_loss, v_wh_loss, v_class_loss = [], [], [], [], []
    valid_loss = []
    train_loss = []

    for epoch in range(0, 120):
        train_bar = tqdm(train_loader, dynamic_ncols=True)
        val_bar = tqdm(valid_loader, dynamic_ncols=True)
        train_bar.set_description_str(f"epoch/{epoch}")
        update_lr(optim, epoch)
        net.train()
        for i, ele in enumerate(train_bar):
            img, target = ele
            img, target = Variable(img).cuda(), Variable(target).cuda()
            output = net(img)
            optim.zero_grad()
            obj_loss, noobj_loss, xy_loss, wh_loss, class_loss = criterion(
                output, target.float()
            )
            loss = obj_loss + noobj_loss + xy_loss + wh_loss + 2 * class_loss
            loss.backward()
            train_loss.append(loss.item())
            t_obj_loss.append(obj_loss.item())
            t_nobj_loss.append(noobj_loss.item())
            t_xy_loss.append(xy_loss.item())
            t_wh_loss.append(wh_loss.item())
            t_class_loss.append(class_loss.item())
            optim.step()
            if i % 10 == 0:
                loss_list = [
                    np.mean(x)
                    for x in [
                        t_obj_loss,
                        t_nobj_loss,
                        t_xy_loss,
                        t_wh_loss,
                        t_class_loss,
                    ]
                ]
                train_bar.set_postfix_str(
                    "o:{:.2f} n:{:.2f} x:{:.2f} w:{:.2f} c:{:.2f}".format(*loss_list)
                )
                if param.visdom:
                    # train_bar.set_postfix_str(f"loss {np.mean(train_loss)}")
                    dom.line(train_loss, win="train", opts={"title": "Train loss"})
                    dom.line(t_obj_loss, win="obj", opts={"title": "obj"})
                    dom.line(t_nobj_loss, win="noobj", opts={"title": "noobj"})
                    dom.line(t_xy_loss, win="xy", opts={"title": "xy"})
                    dom.line(t_wh_loss, win="wh", opts={"title": "wh"})
                    dom.line(t_class_loss, win="class", opts={"title": "class"})
        if epoch % 5 == 0:
            torch.save(net, f"weights/{epoch}_net.pk")
        net.eval()
        with torch.no_grad():
            for i, ele in enumerate(val_bar):
                img, target = ele
                img, target = Variable(img).cuda(), Variable(target).cuda()
                output = net(img)
                obj_loss, noobj_loss, xy_loss, wh_loss, class_loss = criterion(
                    output, target.float()
                )
                v_obj_loss.append(obj_loss.item())
                v_nobj_loss.append(noobj_loss.item())
                v_xy_loss.append(xy_loss.item())
                v_wh_loss.append(wh_loss.item())
                v_class_loss.append(class_loss.item())
                loss = obj_loss + noobj_loss + xy_loss + wh_loss + class_loss
                valid_loss.append(loss.item())
                if i % 10 == 0:
                    loss_list = [
                        np.mean(x)
                        for x in [
                            v_obj_loss,
                            v_nobj_loss,
                            v_xy_loss,
                            v_wh_loss,
                            v_class_loss,
                        ]
                    ]
                    val_bar.set_postfix_str(
                        "o:{:.2f} n:{:.2f} x:{:.2f} w:{:.2f}c:{:.2f}".format(*loss_list)
                    )
                    if param.visdom:
                        dom.line(
                            valid_loss, win="valid_loss", opts=dict(title="Valid loss")
                        )

    torch.save(net, f"weights/{epoch}_net.pk")
Esempio n. 11
0
                    help='IP of the visdom server')
parser.add_argument('--visdom_port',
                    type=int,
                    default=DEFAULT_VISDOM_PORT,
                    help='IP port of the visdom server')
parser.add_argument('--value',
                    type=float,
                    default=DEFAULT_DATA_VALUE,
                    help='Y value for the line plot')
args = parser.parse_args()

print("Connecting to visdom server on ", args.visdom_host, ":",
      args.visdom_port)
value = args.value

viz = Visdom(server="http://" + args.visdom_host, port=args.visdom_port)
assert viz.check_connection()

if not viz.win_exists(WIN_LINE):
    viz.line(Y=np.array([1]),
             X=np.array([1]),
             opts=dict(
                 xlabel='Iteration',
                 ylabel='Rate',
                 title='Bitcoin to Chinese Yuan',
             ),
             win=WIN_LINE)

if os.path.exists(store_file):
    f = open(store_file, 'rb')
    iteration = int(pickle.load(f))
Esempio n. 12
0
def learn(
        *,
        network,
        env,
        eval_env,
        make_eval_env,
        env_id,
        total_timesteps,
        timesteps_per_batch=1024,  # what to train on
        max_kl=0.001,
        cg_iters=10,
        gamma=0.99,
        lam=1.0,  # advantage estimation
        seed=None,
        ent_coef=0.0,
        cg_damping=1e-2,
        vf_stepsize=3e-4,
        vf_iters=3,
        max_episodes=0,
        max_iters=0,  # time constraint
        callback=None,
        load_path=None,

        # MBL
        # For train mbl
        mbl_train_freq=10,

        # For eval
        num_eval_episodes=5,
        eval_freq=5,
        vis_eval=False,
        eval_targs=('mbmf', 'mf'),
        #eval_targs=('mf',),
        quant=2,

        # For mbl.step
        #num_samples=(1500,),
        num_samples=(1, ),
        #horizon=(5,),
        horizon=(2, 1),
        #num_elites=(10,),
        num_elites=(1, ),
        mbl_lamb=(1.0, ),
        mbl_gamma=0.99,
        #mbl_sh=1, # Number of step for stochastic sampling
        mbl_sh=max((5, )),
        #vf_lookahead=-1,
        #use_max_vf=False,
        reset_per_step=(0, ),

        # For get_model
        num_fc=2,
        num_fwd_hidden=500,
        use_layer_norm=False,

        # For MBL
        num_warm_start=int(1e4),
        init_epochs=10,
        update_epochs=5,
        batch_size=512,
        update_with_validation=False,
        use_mean_elites=1,
        use_ent_adjust=0,
        adj_std_scale=0.5,

        # For data loading
        validation_set_path=None,

        # For data collect
        collect_val_data=False,

        # For traj collect
        traj_collect='mf',

        # For profile
        measure_time=True,
        eval_val_err=False,
        measure_rew=True,
        **network_kwargs):
    '''
    learn a policy function with TRPO algorithm

    Parameters:
    ----------

    network                 neural network to learn. Can be either string ('mlp', 'cnn', 'lstm', 'lnlstm' for basic types)
                            or function that takes input placeholder and returns tuple (output, None) for feedforward nets
                            or (output, (state_placeholder, state_output, mask_placeholder)) for recurrent nets

    env                     environment (one of the gym environments or wrapped via baselines.common.vec_env.VecEnv-type class

    timesteps_per_batch     timesteps per gradient estimation batch

    max_kl                  max KL divergence between old policy and new policy ( KL(pi_old || pi) )

    ent_coef                coefficient of policy entropy term in the optimization objective

    cg_iters                number of iterations of conjugate gradient algorithm

    cg_damping              conjugate gradient damping

    vf_stepsize             learning rate for adam optimizer used to optimie value function loss

    vf_iters                number of iterations of value function optimization iterations per each policy optimization step

    total_timesteps           max number of timesteps

    max_episodes            max number of episodes

    max_iters               maximum number of policy optimization iterations

    callback                function to be called with (locals(), globals()) each policy optimization step

    load_path               str, path to load the model from (default: None, i.e. no model is loaded)

    **network_kwargs        keyword arguments to the policy / network builder. See baselines.common/policies.py/build_policy and arguments to a particular type of network

    Returns:
    -------

    learnt model

    '''
    if not isinstance(num_samples, tuple): num_samples = (num_samples, )
    if not isinstance(horizon, tuple): horizon = (horizon, )
    if not isinstance(num_elites, tuple): num_elites = (num_elites, )
    if not isinstance(mbl_lamb, tuple): mbl_lamb = (mbl_lamb, )
    if not isinstance(reset_per_step, tuple):
        reset_per_step = (reset_per_step, )
    if validation_set_path is None:
        if collect_val_data:
            validation_set_path = os.path.join(logger.get_dir(), 'val.pkl')
        else:
            validation_set_path = os.path.join('dataset',
                                               '{}-val.pkl'.format(env_id))
    if eval_val_err:
        eval_val_err_path = os.path.join('dataset',
                                         '{}-combine-val.pkl'.format(env_id))
    logger.log(locals())
    logger.log('MBL_SH', mbl_sh)
    logger.log('Traj_collect', traj_collect)

    if MPI is not None:
        nworkers = MPI.COMM_WORLD.Get_size()
        rank = MPI.COMM_WORLD.Get_rank()
    else:
        nworkers = 1
        rank = 0
    cpus_per_worker = 1
    U.get_session(
        config=tf.ConfigProto(allow_soft_placement=True,
                              inter_op_parallelism_threads=cpus_per_worker,
                              intra_op_parallelism_threads=cpus_per_worker))

    policy = build_policy(env, network, value_network='copy', **network_kwargs)
    set_global_seeds(seed)

    np.set_printoptions(precision=3)
    # Setup losses and stuff
    # ----------------------------------------
    ob_space = env.observation_space
    ac_space = env.action_space

    ob = observation_placeholder(ob_space)
    with tf.variable_scope("pi"):
        pi = policy(observ_placeholder=ob)
    with tf.variable_scope("oldpi"):
        oldpi = policy(observ_placeholder=ob)

    # MBL
    # ---------------------------------------
    viz = Visdom(env=env_id)
    win = None
    eval_targs = list(eval_targs)
    logger.log(eval_targs)

    make_model = get_make_mlp_model(num_fc=num_fc,
                                    num_fwd_hidden=num_fwd_hidden,
                                    layer_norm=use_layer_norm)
    mbl = MBL(env=eval_env,
              env_id=env_id,
              make_model=make_model,
              num_warm_start=num_warm_start,
              init_epochs=init_epochs,
              update_epochs=update_epochs,
              batch_size=batch_size,
              **network_kwargs)

    val_dataset = {'ob': None, 'ac': None, 'ob_next': None}
    if update_with_validation:
        logger.log('Update with validation')
        val_dataset = load_val_data(validation_set_path)
    if eval_val_err:
        logger.log('Log val error')
        eval_val_dataset = load_val_data(eval_val_err_path)
    if collect_val_data:
        logger.log('Collect validation data')
        val_dataset_collect = []

    def _mf_pi(ob, t=None):
        stochastic = True
        ac, vpred, _, _ = pi.step(ob, stochastic=stochastic)
        return ac, vpred

    def _mf_det_pi(ob, t=None):
        #ac, vpred, _, _ = pi.step(ob, stochastic=False)
        ac, vpred = pi._evaluate([pi.pd.mode(), pi.vf], ob)
        return ac, vpred

    def _mf_ent_pi(ob, t=None):
        mean, std, vpred = pi._evaluate([pi.pd.mode(), pi.pd.std, pi.vf], ob)
        ac = np.random.normal(mean, std * adj_std_scale, size=mean.shape)
        return ac, vpred


################### use_ent_adjust======> adj_std_scale????????pi action sample

    def _mbmf_inner_pi(ob, t=0):
        if use_ent_adjust:
            return _mf_ent_pi(ob)
        else:
            if t < mbl_sh: return _mf_pi(ob)
            else: return _mf_det_pi(ob)

    # ---------------------------------------

    # Run multiple configuration once
    all_eval_descs = []

    def make_mbmf_pi(n, h, e, l):
        def _mbmf_pi(ob):
            ac, rew = mbl.step(ob=ob,
                               pi=_mbmf_inner_pi,
                               horizon=h,
                               num_samples=n,
                               num_elites=e,
                               gamma=mbl_gamma,
                               lamb=l,
                               use_mean_elites=use_mean_elites)
            return ac[None], rew

        return Policy(step=_mbmf_pi, reset=None)

    for n in num_samples:
        for h in horizon:
            for l in mbl_lamb:
                for e in num_elites:
                    if 'mbmf' in eval_targs:
                        all_eval_descs.append(
                            ('MeanRewMBMF-n-{}-h-{}-e-{}-l-{}-sh-{}-me-{}'.
                             format(n, h, e, l, mbl_sh, use_mean_elites),
                             'MBMF-n-{}-h-{}-e-{}-l-{}-sh-{}-me-{}'.format(
                                 n, h, e, l, mbl_sh,
                                 use_mean_elites), make_mbmf_pi(n, h, e, l)))
    if 'mf' in eval_targs:
        all_eval_descs.append(
            ('MeanRewMF', 'MF', Policy(step=_mf_pi, reset=None)))

    logger.log('List of evaluation targets')
    for it in all_eval_descs:
        logger.log(it[0])

    pool = Pool(mp.cpu_count())
    warm_start_done = False
    # ----------------------------------------

    atarg = tf.placeholder(
        dtype=tf.float32,
        shape=[None])  # Target advantage function (if applicable)
    ret = tf.placeholder(dtype=tf.float32, shape=[None])  # Empirical return

    ac = pi.pdtype.sample_placeholder([None])

    kloldnew = oldpi.pd.kl(pi.pd)
    ent = pi.pd.entropy()
    meankl = tf.reduce_mean(kloldnew)
    meanent = tf.reduce_mean(ent)
    entbonus = ent_coef * meanent

    vferr = tf.reduce_mean(tf.square(pi.vf - ret))

    ratio = tf.exp(pi.pd.logp(ac) -
                   oldpi.pd.logp(ac))  # advantage * pnew / pold
    surrgain = tf.reduce_mean(ratio * atarg)

    optimgain = surrgain + entbonus
    losses = [optimgain, meankl, entbonus, surrgain, meanent]
    loss_names = ["optimgain", "meankl", "entloss", "surrgain", "entropy"]

    dist = meankl

    all_var_list = get_trainable_variables("pi")
    # var_list = [v for v in all_var_list if v.name.split("/")[1].startswith("pol")]
    # vf_var_list = [v for v in all_var_list if v.name.split("/")[1].startswith("vf")]
    var_list = get_pi_trainable_variables("pi")
    vf_var_list = get_vf_trainable_variables("pi")

    vfadam = MpiAdam(vf_var_list)

    get_flat = U.GetFlat(var_list)
    set_from_flat = U.SetFromFlat(var_list)
    klgrads = tf.gradients(dist, var_list)
    flat_tangent = tf.placeholder(dtype=tf.float32,
                                  shape=[None],
                                  name="flat_tan")
    shapes = [var.get_shape().as_list() for var in var_list]
    start = 0
    tangents = []
    for shape in shapes:
        sz = U.intprod(shape)
        tangents.append(tf.reshape(flat_tangent[start:start + sz], shape))
        start += sz
    gvp = tf.add_n([
        tf.reduce_sum(g * tangent)
        for (g, tangent) in zipsame(klgrads, tangents)
    ])  #pylint: disable=E1111
    fvp = U.flatgrad(gvp, var_list)

    assign_old_eq_new = U.function(
        [], [],
        updates=[
            tf.assign(oldv, newv)
            for (oldv,
                 newv) in zipsame(get_variables("oldpi"), get_variables("pi"))
        ])

    compute_losses = U.function([ob, ac, atarg], losses)
    compute_lossandgrad = U.function([ob, ac, atarg], losses +
                                     [U.flatgrad(optimgain, var_list)])
    compute_fvp = U.function([flat_tangent, ob, ac, atarg], fvp)
    compute_vflossandgrad = U.function([ob, ret],
                                       U.flatgrad(vferr, vf_var_list))

    @contextmanager
    def timed(msg):
        if rank == 0:
            print(colorize(msg, color='magenta'))
            tstart = time.time()
            yield
            print(
                colorize("done in %.3f seconds" % (time.time() - tstart),
                         color='magenta'))
        else:
            yield

    def allmean(x):
        assert isinstance(x, np.ndarray)
        out = np.empty_like(x)
        MPI.COMM_WORLD.Allreduce(x, out, op=MPI.SUM)
        out /= nworkers
        return out

    U.initialize()
    if load_path is not None:
        pi.load(load_path)

    th_init = get_flat()
    MPI.COMM_WORLD.Bcast(th_init, root=0)
    set_from_flat(th_init)
    vfadam.sync()
    print("Init param sum", th_init.sum(), flush=True)
    # Prepare for rollouts
    # ----------------------------------------
    if traj_collect == 'mf':
        seg_gen = traj_segment_generator(pi,
                                         env,
                                         timesteps_per_batch,
                                         stochastic=True)

    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=40)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=40)  # rolling buffer for episode rewards

    if sum([max_iters > 0, total_timesteps > 0, max_episodes > 0]) == 0:
        # noththing to be done
        return pi

    assert sum([max_iters>0, total_timesteps>0, max_episodes>0]) < 2, \
        'out of max_iters, total_timesteps, and max_episodes only one should be specified'

    while True:
        if callback: callback(locals(), globals())
        if total_timesteps and timesteps_so_far >= total_timesteps:
            break
        elif max_episodes and episodes_so_far >= max_episodes:
            break
        elif max_iters and iters_so_far >= max_iters:
            break
        logger.log("********** Iteration %i ************" % iters_so_far)

        with timed("sampling"):
            seg = seg_gen.__next__()
            if traj_collect == 'mf-random' or traj_collect == 'mf-mb':
                seg_mbl = seg_gen_mbl.__next__()
            else:
                seg_mbl = seg
        add_vtarg_and_adv(seg, gamma, lam)

        # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets))
        ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg[
            "tdlamret"]

        # Val data collection
        if collect_val_data:
            for ob_, ac_, ob_next_ in zip(ob[:-1, 0, ...], ac[:-1, ...],
                                          ob[1:, 0, ...]):
                val_dataset_collect.append(
                    (copy.copy(ob_), copy.copy(ac_), copy.copy(ob_next_)))
        # -----------------------------
        # MBL update
        else:
            ob_mbl, ac_mbl = seg_mbl["ob"], seg_mbl["ac"]

            mbl.add_data_batch(ob_mbl[:-1, 0, ...], ac_mbl[:-1, ...],
                               ob_mbl[1:, 0, ...])
            mbl.update_forward_dynamic(require_update=iters_so_far %
                                       mbl_train_freq == 0,
                                       ob_val=val_dataset['ob'],
                                       ac_val=val_dataset['ac'],
                                       ob_next_val=val_dataset['ob_next'])
        # -----------------------------

        if traj_collect == 'mf':
            #if traj_collect == 'mf' or traj_collect == 'mf-random' or traj_collect == 'mf-mb':
            vpredbefore = seg[
                "vpred"]  # predicted value function before udpate
            atarg = (atarg - atarg.mean()) / atarg.std(
            )  # standardized advantage function estimate

            if hasattr(pi, "ret_rms"): pi.ret_rms.update(tdlamret)
            if hasattr(pi, "rms"):
                pi.rms.update(ob)  # update running mean/std for policy

            args = seg["ob"], seg["ac"], atarg
            fvpargs = [arr[::5] for arr in args]

            def fisher_vector_product(p):
                return allmean(compute_fvp(p, *fvpargs)) + cg_damping * p

            assign_old_eq_new(
            )  # set old parameter values to new parameter values
            with timed("computegrad"):
                *lossbefore, g = compute_lossandgrad(*args)
            lossbefore = allmean(np.array(lossbefore))
            g = allmean(g)
            if np.allclose(g, 0):
                logger.log("Got zero gradient. not updating")
            else:
                with timed("cg"):
                    stepdir = cg(fisher_vector_product,
                                 g,
                                 cg_iters=cg_iters,
                                 verbose=rank == 0)
                assert np.isfinite(stepdir).all()
                shs = .5 * stepdir.dot(fisher_vector_product(stepdir))
                lm = np.sqrt(shs / max_kl)
                # logger.log("lagrange multiplier:", lm, "gnorm:", np.linalg.norm(g))
                fullstep = stepdir / lm
                expectedimprove = g.dot(fullstep)
                surrbefore = lossbefore[0]
                stepsize = 1.0
                thbefore = get_flat()
                for _ in range(10):
                    thnew = thbefore + fullstep * stepsize
                    set_from_flat(thnew)
                    meanlosses = surr, kl, *_ = allmean(
                        np.array(compute_losses(*args)))
                    improve = surr - surrbefore
                    logger.log("Expected: %.3f Actual: %.3f" %
                               (expectedimprove, improve))
                    if not np.isfinite(meanlosses).all():
                        logger.log("Got non-finite value of losses -- bad!")
                    elif kl > max_kl * 1.5:
                        logger.log("violated KL constraint. shrinking step.")
                    elif improve < 0:
                        logger.log("surrogate didn't improve. shrinking step.")
                    else:
                        logger.log("Stepsize OK!")
                        break
                    stepsize *= .5
                else:
                    logger.log("couldn't compute a good step")
                    set_from_flat(thbefore)
                if nworkers > 1 and iters_so_far % 20 == 0:
                    paramsums = MPI.COMM_WORLD.allgather(
                        (thnew.sum(),
                         vfadam.getflat().sum()))  # list of tuples
                    assert all(
                        np.allclose(ps, paramsums[0]) for ps in paramsums[1:])

            for (lossname, lossval) in zip(loss_names, meanlosses):
                logger.record_tabular(lossname, lossval)

            with timed("vf"):

                for _ in range(vf_iters):
                    for (mbob, mbret) in dataset.iterbatches(
                        (seg["ob"], seg["tdlamret"]),
                            include_final_partial_batch=False,
                            batch_size=64):
                        g = allmean(compute_vflossandgrad(mbob, mbret))
                        vfadam.update(g, vf_stepsize)

            logger.record_tabular("ev_tdlam_before",
                                  explained_variance(vpredbefore, tdlamret))

        lrlocal = (seg["ep_lens"], seg["ep_rets"])  # local values
        if MPI is not None:
            listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        else:
            listoflrpairs = [lrlocal]
        lens, rews = map(flatten_lists, zip(*listoflrpairs))
        lenbuffer.extend(lens)
        rewbuffer.extend(rews)

        logger.record_tabular("EpLenMean", np.mean(lenbuffer))
        logger.record_tabular("EpRewMean", np.mean(rewbuffer))
        logger.record_tabular("EpThisIter", len(lens))
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)
        iters_so_far += 1

        logger.record_tabular("EpisodesSoFar", episodes_so_far)
        logger.record_tabular("TimestepsSoFar", timesteps_so_far)
        logger.record_tabular("TimeElapsed", time.time() - tstart)

        if rank == 0:
            # MBL evaluation
            if not collect_val_data:
                set_global_seeds(seed)
                default_sess = tf.get_default_session()

                def multithread_eval_policy(env_, pi_, num_episodes_,
                                            vis_eval_, seed):
                    with default_sess.as_default():
                        if hasattr(env, 'ob_rms') and hasattr(env_, 'ob_rms'):
                            env_.ob_rms = env.ob_rms
                        res = eval_policy(env_, pi_, num_episodes_, vis_eval_,
                                          seed, measure_time, measure_rew)

                        try:
                            env_.close()
                        except:
                            pass
                    return res

                if mbl.is_warm_start_done() and iters_so_far % eval_freq == 0:
                    warm_start_done = mbl.is_warm_start_done()
                    if num_eval_episodes > 0:
                        targs_names = {}
                        with timed('eval'):
                            num_descs = len(all_eval_descs)
                            list_field_names = [e[0] for e in all_eval_descs]
                            list_legend_names = [e[1] for e in all_eval_descs]
                            list_pis = [e[2] for e in all_eval_descs]
                            list_eval_envs = [
                                make_eval_env() for _ in range(num_descs)
                            ]
                            list_seed = [seed for _ in range(num_descs)]
                            list_num_eval_episodes = [
                                num_eval_episodes for _ in range(num_descs)
                            ]
                            print(list_field_names)
                            print(list_legend_names)

                            list_vis_eval = [
                                vis_eval for _ in range(num_descs)
                            ]

                            for i in range(num_descs):
                                field_name, legend_name = list_field_names[
                                    i], list_legend_names[i],

                                res = multithread_eval_policy(
                                    list_eval_envs[i], list_pis[i],
                                    list_num_eval_episodes[i],
                                    list_vis_eval[i], seed)
                                #eval_results = pool.starmap(multithread_eval_policy, zip(list_eval_envs, list_pis, list_num_eval_episodes, list_vis_eval,list_seed))

                                #for field_name, legend_name, res in zip(list_field_names, list_legend_names, eval_results):
                                perf, elapsed_time, eval_rew = res
                                logger.record_tabular(field_name, perf)
                                if measure_time:
                                    logger.record_tabular(
                                        'Time-%s' % (field_name), elapsed_time)
                                if measure_rew:
                                    logger.record_tabular(
                                        'SimRew-%s' % (field_name), eval_rew)
                                targs_names[field_name] = legend_name

                    if eval_val_err:
                        fwd_dynamics_err = mbl.eval_forward_dynamic(
                            obs=eval_val_dataset['ob'],
                            acs=eval_val_dataset['ac'],
                            obs_next=eval_val_dataset['ob_next'])
                        logger.record_tabular('FwdValError', fwd_dynamics_err)

                    logger.dump_tabular()
                    #print(logger.get_dir())
                    #print(targs_names)
                    if num_eval_episodes > 0:
                        win = plot(viz,
                                   win,
                                   logger.get_dir(),
                                   targs_names=targs_names,
                                   quant=quant,
                                   opt='best')
            # -----------
        yield pi

    if collect_val_data:
        with open(validation_set_path, 'wb') as f:
            pickle.dump(val_dataset_collect, f)
        logger.log('Save {} validation data'.format(len(val_dataset_collect)))
Esempio n. 13
0
    def __init__(self, train_dataset, val_dataset, model, loss_fn, optimizer,
                 lr_scheduler, params):
        """
    General purpose training script
    :param train_dataset: PyTorch dataset that loads training images
    :param val_dataset: PyTorch dataset that loads testing / validation images
    :param model: Network model
    :param optimizer: PyTorch optimizer object
    :param lr_scheduler: PyTorch learning rate scheduler object
    :param loss_fn: loss function
    :param params: dictionary containing parameters for the training process
    It can contain the following fields (fields with no default value mentioned
    are mandatory):
      n_epochs: number of epochs of training
      batch_size: batch size for one iteration
      do_val: perform validation? (default: True)
      shuffle: shuffle training data? (default: True)
      num_workers: number of CPU threads for loading data (default: 4)
      val_freq: frequency of validation (in number of epochs) (default: 1)
      print_freq: progress printing frequency (in number of iterations
        (default: 20)
      experiment: name of the experiment, used to create logs and checkpoints
      checkpoint_file: Name of file with saved weights. Loaded at before
        start of training if provided (default: None)
      resume_optim: whether to resume optimization from loaded weights
        (default: True)
    """
        self.model = model
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        self.best_prec1 = -float('inf')

        # parse params with default values
        self.config = {
            'n_epochs': params['n_epochs'],
            'batch_size': params['batch_size'],
            'do_val': params.get('do_val', True),
            'shuffle': params.get('shuffle', True),
            'num_workers': params.get('num_workers', 10),
            'val_freq': params.get('val_freq', 1),
            'print_freq': params.get('print_freq', 100),
            'experiment': params['experiment'],
            'checkpoint_file': params.get('checkpoint_file'),
            'resume_optim': params.get('resume_optim', True)
        }

        self.logdir = osp.join(os.getcwd(), 'logs', self.config['experiment'])
        if not osp.isdir(self.logdir):
            os.makedirs(self.logdir)

        # visdom plots
        self.vis_env = self.config['experiment']
        self.loss_win = 'loss_win'
        self.vis = Visdom()
        self.vis.line(X=np.zeros((1, 2)),
                      Y=np.zeros((1, 2)),
                      win=self.loss_win,
                      opts={
                          'legend': ['train_loss', 'val_loss'],
                          'xlabel': 'epochs',
                          'ylabel': 'loss'
                      },
                      env=self.vis_env)
        self.lr_win = 'lr_win'
        self.vis.line(X=np.zeros(1),
                      Y=np.zeros(1),
                      win=self.lr_win,
                      opts={
                          'legend': ['learning_rate'],
                          'xlabel': 'epochs',
                          'ylabel': 'log(lr)'
                      },
                      env=self.vis_env)
        self.top1_win = 'top1_win'
        self.vis.line(X=np.zeros((1, 2)),
                      Y=np.zeros((1, 2)),
                      win=self.top1_win,
                      opts={
                          'legend': ['train_top1_prec', 'val_top1_prec'],
                          'xlabel': 'epochs',
                          'ylabel': 'top1_prec (%)'
                      },
                      env=self.vis_env)
        self.top5_win = 'top5_win'
        self.vis.line(X=np.zeros((1, 2)),
                      Y=np.zeros((1, 2)),
                      win=self.top5_win,
                      opts={
                          'legend': ['train_top5_prec', 'val_top5_prec'],
                          'xlabel': 'epochs',
                          'ylabel': 'top5_prec (%)'
                      },
                      env=self.vis_env)

        # log all the command line options
        print('---------------------------------------')
        print('Experiment: {:s}'.format(self.config['experiment']))
        for k, v in self.config.items():
            print('{:s}: {:s}'.format(k, str(v)))
        print('---------------------------------------')

        self.start_epoch = int(0)
        checkpoint_file = self.config['checkpoint_file']
        if checkpoint_file:
            if osp.isfile(checkpoint_file):
                checkpoint = torch.load(checkpoint_file)
                self.model.load_state_dict(checkpoint['model_state_dict'])
                self.best_prec1 = checkpoint['best_prec1']
                if self.config['resume_optim']:
                    self.optimizer.load_state_dict(
                        checkpoint['optim_state_dict'])
                    self.start_epoch = checkpoint['epoch']
                #print('Loaded checkpoint {:s} epoch {:d}'.format(checkpoint_file,
                #  checkpoint['epoch']))

        self.train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=self.config['batch_size'],
            shuffle=self.config['shuffle'],
            num_workers=self.config['num_workers'])
        if self.config['do_val']:
            self.val_loader = torch.utils.data.DataLoader(
                val_dataset,
                batch_size=self.config['batch_size'],
                shuffle=False,
                num_workers=self.config['num_workers'])
        else:
            self.val_loader = None
def main(args):
    """
    run inference for SS-VAE
    :param args: arguments for SS-VAE
    :return: None
    """
    if args.seed is not None:
        pyro.set_rng_seed(args.seed)

    viz = None
    if args.visualize:
        viz = Visdom()
        mkdir_p("./vae_results")

    # batch_size: number of images (and labels) to be considered in a batch
    ss_vae = SSVAE(z_dim=args.z_dim,
                   hidden_layers=args.hidden_layers,
                   use_cuda=args.cuda,
                   config_enum=args.enum_discrete,
                   aux_loss_multiplier=args.aux_loss_multiplier)

    # setup the optimizer
    adam_params = {"lr": args.learning_rate, "betas": (args.beta_1, 0.999)}
    optimizer = Adam(adam_params)

    # set up the loss(es) for inference. wrapping the guide in config_enumerate builds the loss as a sum
    # by enumerating each class label for the sampled discrete categorical distribution in the model
    guide = config_enumerate(ss_vae.guide, args.enum_discrete, expand=True)
    elbo = (JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO)(
        max_plate_nesting=1)
    loss_basic = SVI(ss_vae.model, guide, optimizer, loss=elbo)

    # build a list of all losses considered
    losses = [loss_basic]

    # aux_loss: whether to use the auxiliary loss from NIPS 14 paper (Kingma et al)
    if args.aux_loss:
        elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
        loss_aux = SVI(ss_vae.model_classify,
                       ss_vae.guide_classify,
                       optimizer,
                       loss=elbo)
        losses.append(loss_aux)

    try:
        # setup the logger if a filename is provided
        logger = open(args.logfile, "w") if args.logfile else None

        data_loaders = setup_data_loaders(MNISTCached,
                                          args.cuda,
                                          args.batch_size,
                                          sup_num=args.sup_num)

        # how often would a supervised batch be encountered during inference
        # e.g. if sup_num is 3000, we would have every 16th = int(50000/3000) batch supervised
        # until we have traversed through the all supervised batches
        periodic_interval_batches = int(MNISTCached.train_data_size /
                                        (1.0 * args.sup_num))

        # number of unsupervised examples
        unsup_num = MNISTCached.train_data_size - args.sup_num

        # initializing local variables to maintain the best validation accuracy
        # seen across epochs over the supervised training set
        # and the corresponding testing set and the state of the networks
        best_valid_acc, corresponding_test_acc = 0.0, 0.0

        # WL: added. =====
        print_and_log(logger, args)
        print_and_log(
            logger,
            "\nepoch\t" + "elbo(sup)\t" + "elbo(unsup)\t" + "time(sec)")
        times = [time.time()]
        # ================

        # run inference for a certain number of epochs
        for i in range(0, args.num_epochs):

            # get the losses for an epoch
            epoch_losses_sup, epoch_losses_unsup = \
                run_inference_for_epoch(data_loaders, losses, periodic_interval_batches)

            # compute average epoch losses i.e. losses per example
            avg_epoch_losses_sup = map(lambda v: v / args.sup_num,
                                       epoch_losses_sup)
            avg_epoch_losses_unsup = map(lambda v: v / unsup_num,
                                         epoch_losses_unsup)

            # store the loss and validation/testing accuracies in the logfile
            # WL: edited. =====
            # str_loss_sup = " ".join(map(str, avg_epoch_losses_sup))
            # str_loss_unsup = " ".join(map(str, avg_epoch_losses_unsup))
            # str_print = "{} epoch: avg losses {}".format(i, "{} {}".format(str_loss_sup, str_loss_unsup))
            times.append(time.time())
            str_elbo_sup = " ".join(
                map(lambda v: f"{-v:.4f}", avg_epoch_losses_sup))
            str_elbo_unsup = " ".join(
                map(lambda v: f"{-v:.4f}", avg_epoch_losses_unsup))
            str_print = f"{i:06d}\t"\
                        f"{str_elbo_sup}\t"\
                        f"{str_elbo_unsup}\t"\
                        f"{times[-1]-times[-2]:.3f}"
            # =================

            validation_accuracy = get_accuracy(data_loaders["valid"],
                                               ss_vae.classifier,
                                               args.batch_size)
            # WL: commented. =====
            # str_print += " validation accuracy {}".format(validation_accuracy)
            # ====================

            # this test accuracy is only for logging, this is not used
            # to make any decisions during training
            test_accuracy = get_accuracy(data_loaders["test"],
                                         ss_vae.classifier, args.batch_size)
            # WL: commented. =====
            # str_print += " test accuracy {}".format(test_accuracy)
            # ====================

            # update the best validation accuracy and the corresponding
            # testing accuracy and the state of the parent module (including the networks)
            if best_valid_acc < validation_accuracy:
                best_valid_acc = validation_accuracy
                corresponding_test_acc = test_accuracy

            print_and_log(logger, str_print)

        final_test_accuracy = get_accuracy(data_loaders["test"],
                                           ss_vae.classifier, args.batch_size)
        # WL: commented. =====
        # print_and_log(logger, "best validation accuracy {} corresponding testing accuracy {} "
        #               "last testing accuracy {}".format(best_valid_acc, corresponding_test_acc, final_test_accuracy))
        # ====================

        # visualize the conditional samples
        visualize(ss_vae, viz, data_loaders["test"])
    finally:
        # close the logger file object if we opened it earlier
        if args.logfile:
            logger.close()
Esempio n. 15
0
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from datetime import datetime
from torch.autograd import Variable
from visdom import Visdom
hook = sy.TorchHook(torch)  # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning
import ComputePrivacy as Privacy# Import self definition function to compute the privacy loss
import logging
import Datasets

logger = logging.getLogger(__name__)
date = datetime.now().strftime('%Y-%m-%d %H:%M')
vis = Visdom(env='SENT140_AdaClip2_ASyn')

#定义参量
class Arguments():
    def __init__(self):
        self.batch_size = 1  # Number of samples used of each user/device at each iteration.
        # If this value is less than 1, then it means the sampling ratio, else it means the mini-batch size
        self.lr = 0.01  # Learning rate
        self.ClipBound = torch.tensor([0.1])  # clipbound
        self.z = 0.1  # Noise parameter z in Gaussian noise N(0, (zS)^2) where S is sensitivity
        self.users_total = 1000  # Total number of users/devices
        self.user_sel_prob = 0.01  # Probability for sampling users/devices at each iteration
        self.itr_numbers = 5000  # Number of total iterations

        self.test_batch_size = 1  # Number of test mini-batch size
        self.log_train = 100  # Logging interval for printing the training loss
Esempio n. 16
0
import MyDataloader
from CandyNet import *

import csv
import pandas as pd
import SimpleITK as sitk
from medpy import metric
import numpy as np
import time
import shutil
import sys
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from visdom import Visdom
viz = Visdom(env='PiaNet with GRU Server13.41')
viz.line([0], [0], win='loss-dice')
viz.line([0], [0], win='train')
viz.line([0], [0], win='valid')


#################initialization network##############
def weights_init(model):
    if isinstance(model, nn.Conv3d) or isinstance(model, nn.ConvTranspose3d):
        nn.init.kaiming_uniform_(model.weight.data, 0.25)
        nn.init.constant_(model.bias.data, 0)
    # elif isinstance(model, nn.InstanceNorm3d):
    # 	nn.init.constant_(model.weight.data,1.0)
    # 	nn.init.constant_(model.bias.data, 0)

Esempio n. 17
0
def main():
    args = parser.parse_args()
    cf = ConfigParser.ConfigParser()
    try:
        cf.read(args.conf)
    except:
        print("conf file not exists")

    logger = init_logger(os.path.join(args.log_dir, 'train_cnn_lstm_ctc.log'))
    dataset = cf.get('Data', 'dataset')
    data_dir = cf.get('Data', 'data_dir')
    feature_type = cf.get('Data', 'feature_type')
    out_type = cf.get('Data', 'out_type')
    n_feats = cf.getint('Data', 'n_feats')
    mel = cf.getboolean('Data', 'mel')
    batch_size = cf.getint("Training", 'batch_size')

    #Data Loader
    train_dataset = myDataset(data_dir,
                              data_set='train',
                              feature_type=feature_type,
                              out_type=out_type,
                              n_feats=n_feats,
                              mel=mel)
    train_loader = myCNNDataLoader(train_dataset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=4,
                                   pin_memory=False)
    dev_dataset = myDataset(data_dir,
                            data_set="dev",
                            feature_type=feature_type,
                            out_type=out_type,
                            n_feats=n_feats,
                            mel=mel)
    dev_loader = myCNNDataLoader(dev_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=4,
                                 pin_memory=False)

    #decoder for dev set
    decoder = GreedyDecoder(dev_dataset.int2phone, space_idx=-1, blank_index=0)

    #Define Model
    rnn_input_size = cf.getint('Model', 'rnn_input_size')
    rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size')
    rnn_layers = cf.getint('Model', 'rnn_layers')
    rnn_type = RNN[cf.get('Model', 'rnn_type')]
    bidirectional = cf.getboolean('Model', 'bidirectional')
    batch_norm = cf.getboolean('Model', 'batch_norm')
    num_class = cf.getint('Model', 'num_class')
    drop_out = cf.getfloat('Model', 'num_class')
    model = CNN_LSTM_CTC(rnn_input_size=rnn_input_size,
                         rnn_hidden_size=rnn_hidden_size,
                         rnn_layers=rnn_layers,
                         rnn_type=rnn_type,
                         bidirectional=bidirectional,
                         batch_norm=batch_norm,
                         num_class=num_class,
                         drop_out=drop_out)
    #model.apply(xavier_uniform_init)
    print(model.name)

    #Training
    init_lr = cf.getfloat('Training', 'init_lr')
    num_epoches = cf.getint('Training', 'num_epoches')
    end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc')
    decay = cf.getfloat("Training", 'lr_decay')
    weight_decay = cf.getfloat("Training", 'weight_decay')
    try:
        seed = cf.getint('Training', 'seed')
    except:
        seed = torch.cuda.initial_seed()
    params = {
        'num_epoches': num_epoches,
        'end_adjust_acc': end_adjust_acc,
        'mel': mel,
        'seed': seed,
        'decay': decay,
        'learning_rate': init_lr,
        'weight_decay': weight_decay,
        'batch_size': batch_size,
        'feature_type': feature_type,
        'n_feats': n_feats,
        'out_type': out_type
    }
    print(params)

    if USE_CUDA:
        torch.cuda.manual_seed(seed)
        model = model.cuda()

    loss_fn = CTCLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=init_lr,
                                 weight_decay=weight_decay)

    #visualization for training
    from visdom import Visdom
    viz = Visdom()
    title = dataset + ' ' + feature_type + str(n_feats) + ' CNN_LSTM_CTC'
    opts = [
        dict(title=title + " Loss", ylabel='Loss', xlabel='Epoch'),
        dict(title=title + " CER on Train", ylabel='CER', xlabel='Epoch'),
        dict(title=title + ' CER on DEV', ylabel='DEV CER', xlabel='Epoch')
    ]
    viz_window = [None, None, None]

    count = 0
    learning_rate = init_lr
    acc_best = -100
    acc_best_true = -100
    adjust_rate_flag = False
    stop_train = False
    adjust_time = 0
    start_time = time.time()
    loss_results = []
    training_cer_results = []
    dev_cer_results = []

    while not stop_train:
        if count >= num_epoches:
            break
        count += 1

        if adjust_rate_flag:
            learning_rate *= decay
            adjust_rate_flag = False
            for param in optimizer.param_groups:
                param['lr'] *= decay

        print("Start training epoch: %d, learning_rate: %.5f" %
              (count, learning_rate))
        logger.info("Start training epoch: %d, learning_rate: %.5f" %
                    (count, learning_rate))

        loss = train(model,
                     train_loader,
                     loss_fn,
                     optimizer,
                     logger,
                     print_every=20)
        loss_results.append(loss)
        cer = dev(model, train_loader, decoder, logger)
        print("cer on training set is %.4f" % cer)
        logger.info("cer on training set is %.4f" % cer)
        training_cer_results.append(cer)
        acc = dev(model, dev_loader, decoder, logger)
        dev_cer_results.append(acc)

        #model_path_accept = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'.pkl'
        #model_path_reject = './log/epoch'+str(count)+'_lr'+str(learning_rate)+'_cv'+str(acc)+'_rejected.pkl'

        if acc > (acc_best + end_adjust_acc):
            acc_best = acc
            adjust_rate_count = 0
            model_state = copy.deepcopy(model.state_dict())
            op_state = copy.deepcopy(optimizer.state_dict())
        elif (acc > acc_best - end_adjust_acc):
            adjust_rate_count += 1
            if acc > acc_best and acc > acc_best_true:
                acc_best_true = acc
                model_state = copy.deepcopy(model.state_dict())
                op_state = copy.deepcopy(optimizer.state_dict())
        else:
            adjust_rate_count = 0

        #torch.save(model.state_dict(), model_path_reject)
        print("adjust_rate_count:" + str(adjust_rate_count))
        print('adjust_time:' + str(adjust_time))
        logger.info("adjust_rate_count:" + str(adjust_rate_count))
        logger.info('adjust_time:' + str(adjust_time))

        if adjust_rate_count == 10:
            adjust_rate_flag = True
            adjust_time += 1
            adjust_rate_count = 0
            acc_best = acc_best_true
            model.load_state_dict(model_state)
            optimizer.load_state_dict(op_state)

        if adjust_time == 8:
            stop_train = True

        time_used = (time.time() - start_time) / 60
        print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" %
              (count, acc, time_used))
        logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" %
                    (count, acc, time_used))

        x_axis = range(count)
        y_axis = [
            loss_results[0:count], training_cer_results[0:count],
            dev_cer_results[0:count]
        ]
        for x in range(len(viz_window)):
            if viz_window[x] is None:
                viz_window[x] = viz.line(
                    X=np.array(x_axis),
                    Y=np.array(y_axis[x]),
                    opts=opts[x],
                )
            else:
                viz.line(
                    X=np.array(x_axis),
                    Y=np.array(y_axis[x]),
                    win=viz_window[x],
                    update='replace',
                )

    print("End training, best cv acc is: %.4f" % acc_best)
    logger.info("End training, best cv acc is: %.4f" % acc_best)
    best_path = os.path.join(args.log_dir,
                             'best_model' + '_cv' + str(acc_best) + '.pkl')
    cf.set('Model', 'model_file', best_path)
    cf.write(open(args.conf, 'w'))
    params['epoch'] = count

    torch.save(
        CNN_LSTM_CTC.save_package(model,
                                  optimizer=optimizer,
                                  epoch=params,
                                  loss_results=loss_results,
                                  training_cer_results=training_cer_results,
                                  dev_cer_results=dev_cer_results), best_path)
Esempio n. 18
0
def train():
    viz = Visdom(port=8097)
    if args.dataset == 'COCO':
        if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCO_ROOT):
                parser.error('Must specify dataset_root if specifying dataset')
            print("WARNING: Using default COCO dataset_root because " +
                  "--dataset_root was not specified.")
            args.dataset_root = COCO_ROOT
        cfg = coco
        dataset = COCODetection(root=args.dataset_root,
                                transform=SSDAugmentation(
                                    cfg['min_dim'], MEANS))
    elif args.dataset == 'VOC':
        #if args.dataset_root == COCO_ROOT:
        #    parser.error('Must specify dataset if specifying dataset_root')
        cfg = voc
        dataset = VOCDetection(root=args.dataset_root,
                               transform=SSDAugmentation(
                                   cfg['min_dim'], MEANS))
    elif args.dataset == 'WIDER_FACE':
        dataset = WiderFaceDetection(root=args.dataset_root,
                                     transform=SimpleAugmentation())

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    ssd_net = build_face_ssd('train')
    if args.cuda:
        net = torch.nn.DataParallel(ssd_net)
        cudnn.benchmark = True
    else:
        net = ssd_net

    if args.resume:
        print('Resuming training, loading {}...'.format(args.resume))
        ssd_net.load_weights(args.resume)
    else:
        vgg_weights = torch.load(args.save_folder + args.basenet)
        print('Loading base network...')
        ssd_net.vgg.load_state_dict(vgg_weights)

    if args.cuda:
        net = net.cuda()

    if not args.resume:
        print('Initializing weights...')
        ssd_net.extras.apply(weights_init)
        ssd_net.loc.apply(weights_init)
        ssd_net.conf.apply(weights_init)
    #optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=0.00005)
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(num_classes=2,
                             overlap_thresh=0.5,
                             prior_for_matching=True,
                             bkg_label=0,
                             neg_mining=True,
                             neg_pos=3,
                             neg_overlap=0.5,
                             encode_target=False,
                             use_gpu=args.cuda)

    net.train()

    loc_loss = 0
    conf_loss = 0
    epoch = 0
    epoch_size = len(dataset) // args.batch_size
    step_index = 0

    height = 512
    width = 512
    in_pipe = viz.images(np.random.randn(args.batch_size, 3, height, width))
    # create batch iterator
    lr = args.lr
    acum_loss = 0.0
    count = 0
    iteration = args.start_iter
    for epoch in range(100000):
        for i_batch, (images, targets) in enumerate(data_loader):
            if i_batch % 20 == 0:
                #height, width = images.shape[2], images.shape[3]
                np_im = images.numpy().copy() + 128
                #drawes = []
                #for bb in range(args.batch_size):
                #  im = np.ascontiguousarray(np.transpose(np_im[bb], (1,2,0)))
                ##  boxes = targets[bb].numpy().copy()
                #  num_boxes = boxes.shape[0]
                #  for kk in range(num_boxes):
                #    x1 = int(min(width, max(0, width * boxes[kk,0])))
                #    y1 = int(min(height, max(0, height * boxes[kk,1])))
                #    x2 = int(min(width, max(0, width * boxes[kk,2])))
                #    y2 = int(min(height, max(0, height * boxes[kk,3])))
                #    cv2.rectangle(im, (x1, y1), (x2, y2), (255,0,0), 2)
                #  drawes.append(im)
                #drawes = np.transpose(np.stack(drawes, axis=0), (0,3,1,2))
                #viz.images(drawes, win=in_pipe)

            if args.cuda:
                with torch.no_grad():
                    images = Variable(images.cuda())
                    targets = [Variable(ann.cuda()) for ann in targets]
            else:
                with torch.no_grad():
                    images = Variable(images)
                    targets = [Variable(ann) for ann in targets]
            out = net(images)

            optimizer.zero_grad()
            loss_l, loss_c, need_draw = criterion(out, targets, images.shape,
                                                  i_batch)
            loss = loss_l + loss_c
            acum_loss += loss.data.item()
            count += 1
            loss.backward()
            optimizer.step()

            if i_batch % 20 == 0:
                #if False:
                height, width = images.shape[2], images.shape[3]
                drawes = []
                for bb in range(args.batch_size):
                    im = np.ascontiguousarray(
                        np.transpose(np_im[bb], (1, 2, 0)))
                    boxes = targets[bb].cpu().numpy()
                    num_boxes = boxes.shape[0]
                    matches = need_draw[bb]
                    for kk in range(num_boxes):
                        x1 = int(min(width, max(0, width * boxes[kk, 0])))
                        y1 = int(min(height, max(0, height * boxes[kk, 1])))
                        x2 = int(min(width, max(0, width * boxes[kk, 2])))
                        y2 = int(min(height, max(0, height * boxes[kk, 3])))
                        cv2.rectangle(im, (x1, y1), (x2, y2), (255, 0, 0), 2)

                    for kk in range(len(matches)):
                        x1, y1, x2, y2 = matches[kk]
                        cv2.rectangle(im, (x1, y1), (x2, y2), (0, 0, 255), 2)
                    drawes.append(im)
                drawes = np.transpose(np.stack(drawes, axis=0), (0, 3, 1, 2))
                viz.images(drawes, win=in_pipe)

            if i_batch % 5 == 0:
                print 'batch: %d, loss_loc: %.4f, loss_cls: %.4f, acum_loss: %4f' % (
                    i_batch, loss_l.data.item(), loss_c.data.item(),
                    acum_loss * 1.0 / count)
                for param_group in optimizer.param_groups:
                    print 'lr: {}'.format(param_group['lr'])

        if epoch in [900, 1800]:
            lr = lr * 0.1
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        print('saving SSD_wFace_matching_moreprior_{}.pth'.format(epoch))
        torch.save(
            ssd_net.state_dict(),
            './checkpoints/SSD_wFace_matching_moreprior_{}.pth'.format(epoch))
Esempio n. 19
0
def train(model, train_datasets, test_datasets, epochs_per_task=10,
          batch_size=64, test_size=1024, consolidate=True,
          fisher_estimation_sample_size=1024,
          lr=1e-3, weight_decay=1e-5,
          loss_log_interval=30,
          eval_log_interval=50,
          cuda=False):
    # prepare the loss criteriton and the optimizer.
    criteriton = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr,
                          weight_decay=weight_decay)

    # instantiate a visdom client
    vis = Visdom(env=model.name)

    # set the model's mode to training mode.
    model.train()

    for task, train_dataset in enumerate(train_datasets, 1):
        for epoch in range(1, epochs_per_task+1):
            # prepare the data loaders.
            data_loader = utils.get_data_loader(
                train_dataset, batch_size=batch_size,
                cuda=cuda
            )
            data_stream = tqdm(enumerate(data_loader, 1))

            for batch_index, (x, y) in data_stream:
                # where are we?
                data_size = len(x)
                dataset_size = len(data_loader.dataset)
                dataset_batches = len(data_loader)
                previous_task_iteration = sum([
                    epochs_per_task * len(d) // batch_size for d in
                    train_datasets[:task-1]
                ])
                current_task_iteration = (
                    (epoch-1)*dataset_batches + batch_index
                )
                iteration = (
                    previous_task_iteration +
                    current_task_iteration
                )

                # prepare the data.
                x = x.view(data_size, -1)
                x = Variable(x).cuda() if cuda else Variable(x)
                y = Variable(y).cuda() if cuda else Variable(y)

                # run the model and backpropagate the errors.
                optimizer.zero_grad()
                scores = model(x)
                ce_loss = criteriton(scores, y)
                ewc_loss = model.ewc_loss(cuda=cuda)
                loss = ce_loss + ewc_loss
                loss.backward()
                optimizer.step()

                # calculate the training precision.
                _, predicted = scores.max(1)
                precision = (predicted == y).sum().float() / len(x)

                data_stream.set_description((
                    '=> '
                    'task: {task}/{tasks} | '
                    'epoch: {epoch}/{epochs} | '
                    'progress: [{trained}/{total}] ({progress:.0f}%) | '
                    'prec: {prec:.4} | '
                    'loss => '
                    'ce: {ce_loss:.4} / '
                    'ewc: {ewc_loss:.4} / '
                    'total: {loss:.4}'
                ).format(
                    task=task,
                    tasks=len(train_datasets),
                    epoch=epoch,
                    epochs=epochs_per_task,
                    trained=batch_index*batch_size,
                    total=dataset_size,
                    progress=(100.*batch_index/dataset_batches),
                    prec=float(precision),
                    ce_loss=float(ce_loss),
                    ewc_loss=float(ewc_loss),
                    loss=float(loss),
                ))

                # Send test precision to the visdom server.
                if iteration % eval_log_interval == 0:
                    names = [
                        'task {}'.format(i+1) for i in
                        range(len(train_datasets))
                    ]
                    precs = [
                        utils.validate(
                            model, test_datasets[i], test_size=test_size,
                            cuda=cuda, verbose=False,
                        ) if i+1 <= task else 0 for i in
                        range(len(train_datasets))
                    ]
                    title = (
                        'precision (consolidated)' if consolidate else
                        'precision'
                    )
                    visual.visualize_scalars(
                        vis, precs, names, title,
                        iteration
                    )

                # Send losses to the visdom server.
                if iteration % loss_log_interval == 0:
                    title = 'loss (consolidated)' if consolidate else 'loss'
                    visual.visualize_scalars(
                        vis,
                        [loss, ce_loss, ewc_loss],
                        ['total', 'cross entropy', 'ewc'],
                        title, iteration
                    )

        if consolidate and task < len(train_datasets):
            # estimate the fisher information of the parameters and consolidate
            # them in the network.
            print(
                '=> Estimating diagonals of the fisher information matrix...',
                flush=True, end='',
            )
            model.consolidate(model.estimate_fisher(
                train_dataset, fisher_estimation_sample_size
            ))
            print(' Done!')
Esempio n. 20
0

if __name__ == '__main__':
    FG = biGAN_parser()
    if FG.clean_ckpt:
        shutil.rmtree(FG.checkpoint_root)
    if not os.path.exists(FG.checkpoint_root):
        os.makedirs(FG.checkpoint_root, exist_ok=True)
    logger = logging.Logger(FG.checkpoint_root)
    FG.seed = 1
    torch.manual_seed(FG.seed)
    torch.cuda.manual_seed(FG.seed)
    cudnn.benchmark = True
    EPS = 1e-12

    vis = Visdom(port=FG.vis_port, env=str(FG.vis_env))
    vis.text(argument_report(FG, end='<br>'), win='config')

    save_dir = str(FG.vis_env)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # torch setting
    device = torch.device('cuda:{}'.format(FG.devices[0]))
    torch.cuda.set_device(FG.devices[0])
    timer = SimpleTimer()

    printers = dict(
        lr = Scalar(vis, 'lr', opts=dict(
            showlegend=True, title='lr', ytickmin=0, ytinkmax=2.0)),
        D_loss = Scalar(vis, 'D_loss', opts=dict(
Esempio n. 21
0
#
# # data loader of test sets
# loadtesta = torch.utils.data.DataLoader(testa, batch_size=24, shuffle=True)
# loadtestb = torch.utils.data.DataLoader(testb, batch_size=24, shuffle=True)
# loadtestc = torch.utils.data.DataLoader(testc, batch_size=24, shuffle=True)


################################ Training Model  ################################
model = Mnet()
cec_loss = nn.CrossEntropyLoss()  # loss function
params = model.parameters()  # adjustable parameters and gradient
optimizer = optim.Adam(params=params, lr=0.001)

n_epochs = 3
n_iterations = 0
vis = Visdom()  # dynamic graphing window  # run this in terminal python -m visdom.server
vis_window = vis.line(np.array([0]), np.array([0]))  # initializing visdom

for e in range(n_epochs):  # loop for epoch
    for i, (images, labels) in enumerate(trainset):  # loop on  all images in one batch
        images = Variable(images)  # pass images through autograd variable for it to create gradient
        labels = Variable(labels)  # pass labels through autograd variable for it to create gradient
        output = model(images)  # passing images to model
        model.zero_grad()  # initializing the gradient
        loss = cec_loss(output, labels)  # calculating loss
        loss.backward()  # back propagation
        optimizer.step()  # update the weights
        n_iterations += 1  # counting iterations
        vis.line(np.array([loss.item()]), np.array([n_iterations]), win=vis_window, update='append')  # display on
        # Visdom
Esempio n. 22
0
def main():
    print("#######")
    print(
        "WARNING: All rewards are clipped or normalized so you need to use a monitor (see envs.py) or visdom plot to get true rewards"
    )
    print("#######")

    os.environ['OMP_NUM_THREADS'] = '1'

    if args.vis:
        from visdom import Visdom
        viz = Visdom()
        win = None

    envs = [
        make_env(args.env_name, args.seed, i, args.log_dir)
        for i in range(args.num_processes)
    ]

    if args.num_processes > 1:
        envs = SubprocVecEnv(envs)
    else:
        envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        envs = VecNormalize(envs)

    obs_shape = envs.observation_space.shape
    obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:])

    if len(envs.observation_space.shape) == 3:
        actor_critic = CNNPolicy(obs_shape[0], envs.action_space,
                                 args.recurrent_policy)
    else:
        assert not args.recurrent_policy, \
            "Recurrent policy is not implemented for the MLP controller"
        actor_critic = MLPPolicy(obs_shape[0], envs.action_space)

    if envs.action_space.__class__.__name__ == "Discrete":
        action_shape = 1
    else:
        action_shape = envs.action_space.shape[0]

    if args.cuda:
        actor_critic.cuda()

    if args.algo == 'a2c':
        optimizer = optim.RMSprop(actor_critic.parameters(),
                                  args.lr,
                                  eps=args.eps,
                                  alpha=args.alpha)
    elif args.algo == 'ppo':
        optimizer = optim.Adam(actor_critic.parameters(),
                               args.lr,
                               eps=args.eps)
    elif args.algo == 'acktr':
        optimizer = KFACOptimizer(actor_critic)

    rollouts = RolloutStorage(args.num_steps, args.num_processes, obs_shape,
                              envs.action_space, actor_critic.state_size)
    current_obs = torch.zeros(args.num_processes, *obs_shape)

    def update_current_obs(obs):
        shape_dim0 = envs.observation_space.shape[0]
        obs = torch.from_numpy(obs).float()
        if args.num_stack > 1:
            current_obs[:, :-shape_dim0] = current_obs[:, shape_dim0:]
        current_obs[:, -shape_dim0:] = obs

    obs = envs.reset()
    update_current_obs(obs)

    rollouts.observations[0].copy_(current_obs)

    # These variables are used to compute average rewards for all processes.
    episode_rewards = torch.zeros([args.num_processes, 1])
    final_rewards = torch.zeros([args.num_processes, 1])

    if args.cuda:
        current_obs = current_obs.cuda()
        rollouts.cuda()

    start = time.time()
    for j in range(num_updates):
        for step in range(args.num_steps):
            # Sample actions
            value, action, action_log_prob, states = actor_critic.act(
                Variable(rollouts.observations[step], volatile=True),
                Variable(rollouts.states[step], volatile=True),
                Variable(rollouts.masks[step], volatile=True))
            cpu_actions = action.data.squeeze(1).cpu().numpy()

            # Obser reward and next obs
            obs, reward, done, info = envs.step(cpu_actions)
            reward = torch.from_numpy(np.expand_dims(np.stack(reward),
                                                     1)).float()
            episode_rewards += reward

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            final_rewards *= masks
            final_rewards += (1 - masks) * episode_rewards
            episode_rewards *= masks

            if args.cuda:
                masks = masks.cuda()

            if current_obs.dim() == 4:
                current_obs *= masks.unsqueeze(2).unsqueeze(2)
            else:
                current_obs *= masks

            update_current_obs(obs)
            rollouts.insert(step, current_obs, states.data, action.data,
                            action_log_prob.data, value.data, reward, masks)

        next_value = actor_critic(
            Variable(rollouts.observations[-1], volatile=True),
            Variable(rollouts.states[-1], volatile=True),
            Variable(rollouts.masks[-1], volatile=True))[0].data

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.tau)

        if args.algo in ['a2c', 'acktr']:
            values, action_log_probs, dist_entropy, states = actor_critic.evaluate_actions(
                Variable(rollouts.observations[:-1].view(-1, *obs_shape)),
                Variable(rollouts.states[0].view(-1, actor_critic.state_size)),
                Variable(rollouts.masks[:-1].view(-1, 1)),
                Variable(rollouts.actions.view(-1, action_shape)))

            values = values.view(args.num_steps, args.num_processes, 1)
            action_log_probs = action_log_probs.view(args.num_steps,
                                                     args.num_processes, 1)

            advantages = Variable(rollouts.returns[:-1]) - values
            value_loss = advantages.pow(2).mean()

            action_loss = -(Variable(advantages.data) *
                            action_log_probs).mean()

            if args.algo == 'acktr' and optimizer.steps % optimizer.Ts == 0:
                # Sampled fisher, see Martens 2014
                actor_critic.zero_grad()
                pg_fisher_loss = -action_log_probs.mean()

                value_noise = Variable(torch.randn(values.size()))
                if args.cuda:
                    value_noise = value_noise.cuda()

                sample_values = values + value_noise
                vf_fisher_loss = -(values -
                                   Variable(sample_values.data)).pow(2).mean()

                fisher_loss = pg_fisher_loss + vf_fisher_loss
                optimizer.acc_stats = True
                fisher_loss.backward(retain_graph=True)
                optimizer.acc_stats = False

            optimizer.zero_grad()
            (value_loss * args.value_loss_coef + action_loss -
             dist_entropy * args.entropy_coef).backward()

            if args.algo == 'a2c':
                nn.utils.clip_grad_norm(actor_critic.parameters(),
                                        args.max_grad_norm)

            optimizer.step()
        elif args.algo == 'ppo':
            advantages = rollouts.returns[:-1] - rollouts.value_preds[:-1]
            advantages = (advantages - advantages.mean()) / (advantages.std() +
                                                             1e-5)

            for e in range(args.ppo_epoch):
                if args.recurrent_policy:
                    data_generator = rollouts.recurrent_generator(
                        advantages, args.num_mini_batch)
                else:
                    data_generator = rollouts.feed_forward_generator(
                        advantages, args.num_mini_batch)

                for sample in data_generator:
                    observations_batch, states_batch, actions_batch, \
                       return_batch, masks_batch, old_action_log_probs_batch, \
                            adv_targ = sample

                    # Reshape to do in a single forward pass for all steps
                    values, action_log_probs, dist_entropy, states = actor_critic.evaluate_actions(
                        Variable(observations_batch), Variable(states_batch),
                        Variable(masks_batch), Variable(actions_batch))

                    adv_targ = Variable(adv_targ)
                    ratio = torch.exp(action_log_probs -
                                      Variable(old_action_log_probs_batch))
                    surr1 = ratio * adv_targ
                    surr2 = torch.clamp(ratio, 1.0 - args.clip_param,
                                        1.0 + args.clip_param) * adv_targ
                    action_loss = -torch.min(
                        surr1,
                        surr2).mean()  # PPO's pessimistic surrogate (L^CLIP)

                    value_loss = (Variable(return_batch) -
                                  values).pow(2).mean()

                    optimizer.zero_grad()
                    (value_loss + action_loss -
                     dist_entropy * args.entropy_coef).backward()
                    nn.utils.clip_grad_norm(actor_critic.parameters(),
                                            args.max_grad_norm)
                    optimizer.step()

        rollouts.after_update()

        if j % args.save_interval == 0 and args.save_dir != "":
            save_path = os.path.join(args.save_dir, args.algo)
            try:
                os.makedirs(save_path)
            except OSError:
                pass

            # A really ugly way to save a model to CPU
            save_model = actor_critic
            if args.cuda:
                save_model = copy.deepcopy(actor_critic).cpu()
            torch.save(save_model,
                       os.path.join(save_path, args.env_name + ".pt"))

        if j % args.log_interval == 0:
            end = time.time()
            total_num_steps = (j + 1) * args.num_processes * args.num_steps
            print(
                "Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}"
                .format(j, total_num_steps,
                        int(total_num_steps / (end - start)),
                        final_rewards.mean(), final_rewards.median(),
                        final_rewards.min(), final_rewards.max(),
                        dist_entropy.data[0], value_loss.data[0],
                        action_loss.data[0]))
        if args.vis and j % args.vis_interval == 0:
            try:
                # Sometimes monitor doesn't properly flush the outputs
                win = visdom_plot(viz, win, args.log_dir, args.env_name,
                                  args.algo)
            except IOError:
                pass
GPU_ID = None
epochs = 40
batch_size = 32
start_epoch = 1
save_snapshot_interval_epoch = 1
peek_interval_epoch = 1
save_train_hr_interval_epoch = 1
loss_average_win_size = 2
validate_interval_epoch = 1
validate_batch_size = 4
plot_loss_start_epoch = 1
only_validate = False  #

from visdom import Visdom
vis = Visdom(server='http://127.0.0.1', port=8097)

# =================== config for model and dataset =====================================================================
from squid.data import Photo2PhotoData
from squid.data import RandomCropPhoto2PhotoData
from squid.model import SuperviseModel
import torch
import torch.nn as nn
from squid.loss import VGGLoss
from squid.net import Unet_Residual_Net

target_net = Unet_Residual_Net()
target_net = nn.DataParallel(target_net).cuda()

model = SuperviseModel({
    'net':
Esempio n. 24
0
import time
from sys import platform as _platform
from six.moves import urllib

#DEFAULT_PORT = 8097
#DEFAULT_HOSTNAME = "http://localhost"
#parser = argparse.ArgumentParser(description='Demo arguments')
#parser.add_argument('-port', metavar='port', type=int, default=DEFAULT_PORT,
#                    help='port the visdom server is running on.')
#parser.add_argument('-server', metavar='server', type=str,
#                    default=DEFAULT_HOSTNAME,
#                    help='Server address of the target to run the demo on.')
#FLAGS = parser.parse_args()

try:
    viz = Visdom(port=FLAGS.port, server=FLAGS.server)

    #    assert viz.check_connection(timeout_seconds=3), \
    #        'No connection could be formed quickly'

    textwindow = viz.text('Hello World!')

    updatetextwindow = viz.text('Hello World! More text should be here')
    #    assert updatetextwindow is not None, 'Window was none'
    viz.text('And here it is', win=updatetextwindow, append=True)

    # text window with Callbacks
    txt = 'This is a write demo notepad. Type below. Delete clears text:<br>'
    callback_text_window = viz.text(txt)

    def type_callback(event):
Esempio n. 25
0
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val iou: {:.4f}'.format(best_iou))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


if __name__ == "__main__":
    device = torch.device('cuda')
    ip = int(os.uname()[1][2:])
    current = os.getcwd()
    save_path = os.path.join(current, "result/ip_" + str(ip))
    os.makedirs(save_path, exist_ok=True)
    viz = Visdom(port=8097, server="http://localhost")
    #data_dir="../../../data/double_input/"
    input_dir = "../../../data/seg_train_images/"
    label_dir = "../../../data/seg_train_annotations/"
    per_train = 0.9
    batch_size = 2
    #num_sample=2242
    data_transforms = {
        'train':
        transforms.Compose([
            my_transform.Clahe(clipLimit=4, tileGridSize=(8, 8))
            #transforms.Resize((32,32),interpolation=Image.BILINEAR),
            #transforms.RandomVerticalFlip(),
            #transforms.RandomHorizontalFlip(),
            #transforms.ToTensor()
            #transforms.Normalize([0.5, 0.5], [0.5, 0.5])
# https://github.com/GunhoChoi/PyTorch-FastCampus
#
# 단순 역전파 신경망으로 3차 방정식 학습
# GTX1060 OC 6GB 기준 실행 시간 5.34초
#

import time
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from visdom import Visdom

start_time = time.time()
Visdom().delete_env('main')
viz = Visdom()

print("\n1. 데이터 생성")
# y= x^3-3x^2-9x-1 꼴 3차 방정식의 그래프를 만드는데, 노이즈를 조금 준다.
num_data = 1000
num_epoch = 5000

noise = init.normal_(torch.FloatTensor(num_data, 1), std=0.5)
x = init.uniform_(torch.FloatTensor(num_data, 1), -10, 10)
y = (x**3) - 3*(x**2) - 9*x - 1
y_noise = y + noise

input_data = torch.cat([x, y_noise], dim=1)
print(input_data)
Esempio n. 27
0
 def __init__(self):
     self.vis = Visdom()
     self.connected = self.vis.check_connection()
     self.plots = {}
Esempio n. 28
0
 def __init__(self, env_name='main'):
     self.viz = Visdom()
     self.env = env_name
     self.plots = {}
Esempio n. 29
0
# -*- coding: utf-8 -*-
__author__ = 'S.I. Mimilakis'
__copyright__ = 'MacSeNet'

# imports
from visdom import Visdom
import numpy as np
import matplotlib.pyplot as plt

viz = Visdom()


def init_visdom(title_a='Main Loss', title_b='Auxilliary Loss'):
    window = viz.line(X=np.arange(0, 1),
                      Y=np.reshape(0, 1),
                      opts=dict(title=title_a))
    windowb = viz.line(X=np.arange(0, 1),
                       Y=np.reshape(0, 1),
                       opts=dict(title=title_b))
    return window, windowb


def plot_grad_flow(named_parameters, id):
    ave_grads = []
    layers = []
    for n, p in named_parameters:
        if p.requires_grad and ("bias" not in n):
            layers.append(n)
            ave_grads.append(p.grad.abs().mean())
    plt.plot(ave_grads, alpha=0.3, color="b")
    plt.hlines(0, 0, len(ave_grads) + 1, linewidth=1, color="k")
Esempio n. 30
0
global get_and_decompress_dataframe, compress_and_transfer_dataframe, dict_to_obj

# -------------------------------------------------------------
# Check if the Python task is enabled or not
check_task_is_enabled()

# -------------------------------------------------------------
# Get Visdom endpoint
visdom_endpoint = variables.get("ENDPOINT_VISDOM") if variables.get(
    "ENDPOINT_VISDOM") else results[0].__str__()
print("ENDPOINT_VISDOM: ", visdom_endpoint)
assert visdom_endpoint is not None
visdom_endpoint = visdom_endpoint.replace("http://", "")
(VISDOM_HOST, VISDOM_PORT) = visdom_endpoint.split(":")
print("Connecting to %s:%s" % (VISDOM_HOST, VISDOM_PORT))
vis = Visdom(server="http://" + VISDOM_HOST, port=int(VISDOM_PORT))
assert vis.check_connection()

input_variables = {
    'task.dataframe_id': None,
    'task.dataframe_id_test': None,
    'task.algorithm_json': None,
    'task.label_column': None,
}
get_input_variables(input_variables)

dataframe_id = None
if input_variables['task.dataframe_id'] is not None:
    dataframe_id = input_variables['task.dataframe_id']
if input_variables['task.dataframe_id_test'] is not None:
    dataframe_id = input_variables['task.dataframe_id_test']