Exemple #1
0
def train():
    policy = build_policy()
    writer = SummaryWriter(writer_path)
    mean, std = [], []
    for i_eps in range(episodes):
        rewards = sample(env, policy, max_step, rand_theta=True)

        reward_mean = np.mean(rewards)
        reward_std = np.std(rewards)
        mean.append(reward_mean)
        std.append(reward_std)

        #==============learn==============
        pg_loss, q_loss, a_loss = policy.learn()
        if PLOT:
            plot(mean, POLT_NAME, model_save_dir, 50)
        if WRITER:
            writer.add_scalar('reward', reward_mean, global_step=i_eps)
            writer.add_scalar('loss/pg_loss', pg_loss, global_step=i_eps)
            writer.add_scalar('loss/q_loss', q_loss, global_step=i_eps)
            writer.add_scalar('loss/alpha_loss', a_loss, global_step=i_eps)
        if i_eps % 1 == 0:
            print (f'EPS:{i_eps}, reward:{reward_mean:.3f}, pg_loss:{pg_loss:.3f}, q_loss:{q_loss:.3f}, alpha_loss:{a_loss:.3f}', end='\r')
        if i_eps % 50 == 0 and i_eps > 0:
            policy.save_model(model_save_dir, save_file, save_actor=True, save_critic=True)
            print(f'save model at {i_eps}')
    writer.close()
    return mean, std
Exemple #2
0
def main():
    if not TRAIN:
        policy.load_model(model_save_dir, save_file, load_actor=True)
    live_time = []

    # while policy.warm_up():
    #     sample(env, policy, max_step)
    #     print (f'Warm up for buffer {policy.buffer.size()}', end='\r')

    for i_eps in range(episodes):
        reward_avg = sample(env, policy, max_step)
        if not TRAIN:
            print (f'EPS:{i_eps + 1}, reward:{round(reward_avg, 3)}')
        else:
            #==============learn==============
            pg_loss, v_loss = policy.learn()
            if PLOT:
                live_time.append(reward_avg)
                plot(live_time, 'DDPG_'+env_name, model_save_dir, 100)
            if WRITER:
                writer.add_scalar('reward', reward_avg, global_step=i_eps)
                writer.add_scalar('loss/pg_loss', pg_loss, global_step=i_eps)
                writer.add_scalar('loss/v_loss', v_loss, global_step=i_eps)
            if i_eps % 5 == 0:
                print (f'EPS:{i_eps}, reward_avg:{round(reward_avg, 3)}, pg_loss:{round(pg_loss, 3)}, v_loss:{round(v_loss, 3)}')
            if i_eps % 200 == 0:
                policy.save_model(model_save_dir, save_file, save_actor=True, save_critic=True)
    writer.close()
    env.close()
def train_ddpg(env, agent, config):
    episodes, tmax = config.episodes, config.tmax
    tic = time.time()
    means = []
    mins = []
    maxes = []
    stds = []
    mean_steps = []
    steps = []
    scores_window = deque(maxlen=100)
    for e in range(1, episodes):
        agent.reset_episode()
        episode_scores = []
        obs = env.reset()
        for t in range(tmax):
            actions = agent.act(obs.reshape(-1))
            next_obs, rewards, dones = env.step(actions.reshape(2, -1))
            # Step agent with reshaped observations
            agent.step(obs.reshape(-1), actions.reshape(-1), np.max(rewards),
                       next_obs.reshape(-1), np.max(dones))
            # Score tracking
            episode_scores.append(np.max(rewards))
            obs = next_obs
            if dones.any():
                steps.append(int(t))
                break

        scores_window.append(np.sum(episode_scores))
        means.append(np.mean(scores_window))
        mins.append(min(scores_window))
        maxes.append(max(scores_window))
        mean_steps.append(np.mean(steps))
        stds.append(np.std(scores_window))
        if e % 50 == 0:
            toc = time.time()
            r_mean = np.mean(scores_window)
            r_max = max(scores_window)
            r_min = min(scores_window)
            r_std = np.std(scores_window)
            plot(means,
                 maxes,
                 mins,
                 mean_steps,
                 num_agents=2,
                 name=config.name,
                 game='Tennis')
            print(
                "\rEpisode: {} out of {}, Steps {}, Mean steps {:.2f}, Noise {:.2f}, Rewards: mean {:.2f}, min {:.2f}, max {:.2f}, std {:.2f}, Elapsed {:.2f}"
                .format(e, episodes, np.sum(steps), np.mean(steps),
                        agent.noise_scale, r_mean, r_min, r_max, r_std,
                        (toc - tic) / 60))
        if np.mean(scores_window) > config.winning_condition:
            print('Env solved!')
            # save scores
            pickle.dump([means, maxes, mins, mean_steps],
                        open(str(config.name) + '_scores.p', 'wb'))
            # save policy
            agent.save_weights(config.checkpoint_path)
            break
    env.close()
Exemple #4
0
def check_data(d: dict) -> dict:
    try:
        samples = d['RIDE']['SAMPLES']
        powerdata = [x['WATTS'] for x in samples]
        heartrate = [x['HR'] for x in samples]
        # Find spikes/drops
        print(len(heartrate))
        spikes = collect_abnormal(heartrate)

        for spike in spikes:
            print(spike)
            print('Start: {}'.format(format_time(spike[0])))
            print('End: {}'.format(format_time(spike[1])))

        xs = list(range(len(heartrate)))

        # Smooth data
        savitzky = list(
            smooth.savitzky_golay(np.array(heartrate), window_size=21,
                                  order=3))

        plot.plot(xs, heartrate, savitzky)

        # Assign smoothed data
        return d
    except KeyError:
        return d
Exemple #5
0
def train():
    model = namedtuple('model', ['policy_net', 'value_net', 'v_net'])
    actor = ActorModel(state_space, hidden_dim, action_space)
    critic = CriticModel(state_space, hidden_dim, action_space)
    v_net = ValueModel(state_space)
    rl_agent = model(actor, critic, v_net)
    policy = SAC(rl_agent,
                 buffer_size=buffer_size,
                 actor_learn_freq=actor_learn_freq,
                 update_iteration=update_iteration,
                 target_update_freq=target_update_freq,
                 target_update_tau=target_update_tau,
                 batch_size=batch_size,
                 learning_rate=lr)
    writer = SummaryWriter(writer_path)

    if not TRAIN:
        policy.load_model(model_save_dir, save_file, load_actor=True)
    mean, std = [], []
    live_time = []

    # while policy.warm_up():
    #     sample(env, policy, max_step, warm_up=True)
    #     print (f'Warm up for buffer {policy.buffer.size()}', end='\r')

    for i_eps in range(episodes):
        rewards = sample(env, policy, max_step)
        reward_mean = np.mean(rewards)
        reward_std = np.std(rewards)

        mean.append(reward_mean)
        std.append(reward_std)
        if not TRAIN:
            print(f'EPS:{i_eps + 1}, reward:{round(reward_mean, 3)}')
        else:
            #==============learn==============
            pg_loss, q_loss, v_loss = policy.learn()
            if PLOT:
                live_time.append(reward_mean)
                plot(live_time, POLT_NAME, model_save_dir, 100)
            if WRITER:
                writer.add_scalar('reward', reward_mean, global_step=i_eps)
                writer.add_scalar('loss/pg_loss', pg_loss, global_step=i_eps)
                writer.add_scalar('loss/q_loss', q_loss, global_step=i_eps)
                writer.add_scalar('loss/v_loss', v_loss, global_step=i_eps)

            if i_eps % 5 == 0:
                print(
                    f'EPS:{i_eps}, reward_mean:{round(reward_mean, 3)}, pg_loss:{round(pg_loss, 3)}, q_loss:{round(q_loss, 3)}, alpha_loss:{round(v_loss, 3)}'
                )
            if i_eps % 200 == 0:
                policy.save_model(model_save_dir,
                                  save_file,
                                  save_actor=True,
                                  save_critic=True)
    writer.close()
    env.close()
    return mean, std
Exemple #6
0
def train():
    mean, std = [], []
    if not TRAIN:
        policy.load_model(model_save_dir, save_file, load_actor=True)
    live_time = []

    # while policy.warm_up():
    #     sample(env, policy, max_step)
    #     print (f'Warm up for buffer {policy.buffer.size()}', end='\r')

    for i_eps in range(episodes):
        rewards = sample(env, policy, max_step)
        reward_mean = np.mean(rewards)
        reward_std = np.std(rewards)

        mean.append(reward_mean)
        std.append(reward_std)
        if not TRAIN:
            print(f'EPS:{i_eps + 1}, reward:{round(reward_mean, 3)}')
        else:
            #==============learn==============
            pg_loss, q_loss, a_loss = policy.learn()
            if PLOT:
                live_time.append(reward_mean)
                plot(live_time, POLT_NAME, model_save_dir, 100)
            if WRITER:
                writer.add_scalar('reward', reward_mean, global_step=i_eps)
                writer.add_scalar('loss/pg_loss', pg_loss, global_step=i_eps)
                writer.add_scalar('loss/q_loss', q_loss, global_step=i_eps)
                writer.add_scalar('loss/alpha_loss', a_loss, global_step=i_eps)

            if i_eps % 5 == 0:
                print(
                    f'EPS:{i_eps}, reward_mean:{round(reward_mean, 3)}, pg_loss:{round(pg_loss, 3)}, q_loss:{round(q_loss, 3)}, alpha_loss:{round(a_loss, 3)}'
                )
            if i_eps % 200 == 0:
                policy.save_model(model_save_dir,
                                  save_file,
                                  save_actor=True,
                                  save_critic=True)
    writer.close()
    env.close()
    return mean, std
Exemple #7
0
def create_pdm(shapes):
    '''
    Create a new point distribution model based on landmark data.

    Step 1: Generalised Procrustes Analysis on the landmark data
    Step 2: Principal Component Analysis on the GPAed landmark data
            This process will return an amount of eigenvectors from
            which we construct deviations from the mean image.
    Step 3: Create a deformable model from the processed data

    In: list of directories of the landmark data
    Out: DeformableModel instance created with preprocessed data.
    '''
    # perform gpa
    mean, aligned = gpa(np.asarray(shapes))
    plot('gpa', mean, aligned)

    # perform PCA
    eigenvalues, eigenvectors, m = pca(aligned, mean=mean, max_variance=0.99)
    plot('eigenvectors', mean, eigenvectors)

    # create PointDistributionModel instance
    model = PointDistributionModel(eigenvalues, eigenvectors, mean)
    plot('deformablemodel', model)

    return model
Exemple #8
0
def create_pdm(shapes):
    '''
    Create a new point distribution model based on landmark data.

    Step 1: Generalised Procrustes Analysis on the landmark data
    Step 2: Principal Component Analysis on the GPAed landmark data
            This process will return an amount of eigenvectors from
            which we construct deviations from the mean image.
    Step 3: Create a deformable model from the processed data

    In: list of directories of the landmark data
    Out: DeformableModel instance created with preprocessed data.
    '''
    # perform gpa
    mean, aligned = gpa(np.asarray(shapes))
    plot('gpa', mean, aligned)

    # perform PCA
    eigenvalues, eigenvectors, m = pca(aligned, mean=mean, max_variance=0.99)
    plot('eigenvectors', mean, eigenvectors)

    # create PointDistributionModel instance
    model = PointDistributionModel(eigenvalues, eigenvectors, mean)
    plot('deformablemodel', model)

    return model
Exemple #9
0
    def _rollout(self, seed, n_iter, L, mask, render=False, plot_ret=True):
        mask = np.array(mask)
        for i in range(0, n_iter):
            ep_r = 0.0
            observes, actions, rewards = [], [], []
            done = False
            step = 0

            choose_idx = np.random.choice(np.where(mask == 1)[0])
            env = self.envs[choose_idx]

            obs = env.reset(rseed=seed)  # obs here are unscaled
            while not done:
                if render:
                    env.render()
                obs = obs.astype(np.float32).reshape((1, -1))
                obs = np.append(obs, [[0.001 * step]], axis=1)
                observes.append(obs)

                action = self.policy.act(obs)
                obs, reward, done, _ = env.step(np.squeeze(action, axis=0))
                if not isinstance(reward, float):
                    reward = np.asscalar(reward)
                reward = reward / self.rew_scale[choose_idx]
                ep_r += reward

                actions.append(action)
                rewards.append(reward)

                if done:
                    break
                step += 1

            if plot_ret:
                plot.plot('ep_r_L{}'.format(L), ep_r)
                plot.tick('ep_r_L{}'.format(L))
                plot.flush(self.log_path, verbose=False)
                print('average episode return={}'.format(ep_r))
            yield np.array(observes), np.array(actions), np.array(rewards)
def pdm(r):

    images, landmarks, landmarks_per_image = r
    # perform GPA
    mean, aligned = gpa(np.asarray(landmarks))
    plot('gpa', mean, aligned)

    # perform PCA
    eigenvalues, eigenvectors, m = pca(aligned)
    plot('eigenvectors', m, eigenvectors)

    # create PointDistributionModel instance
    model = PointDistributionModel(eigenvalues, eigenvectors, m)
    plot('deformablemodel', model)

    return model
Exemple #11
0
todosAtributos = False

if problem == 'Iris':
    dm = DataManipulation(iris_path, 0)
    if todosAtributos:
        data = dm.getData()  # Base iris com apenas todos atributos
    else:
        data = [[p[2:]]
                for p in dm.getData()]  # Base iris com apenas 2 atributos
else:
    dm = DataManipulation(art_path, 1)
    data = dm.getData()  # Base artificial

p = Perceptron(data, fn)
r = p.execution(realizacoes)
# Plot
bestAcc = r[0]
bestAccData = r[1]
bestW = r[2]
print()
print('### Informações do plot ###')
print('Melhor taxa de acerto: ', bestAcc)
print('Melhor vetor w: \n', bestW)
print('#############################')

if (problem == 'Iris' or problem == 'Artificial') and todosAtributos == False:
    plotData.plotPatterns(bestAccData, problem)
    plotData.plot(bestAccData, bestW[0])
    plotData.plot(bestAccData, bestW[1])
    plotData.plot(bestAccData, bestW[2]).show()
Exemple #12
0
def main():
    args = parse_args()
    cfg = Config.fromfile(args.config)

    device = torch.device('cuda' if cfg.use_gpu else 'cpu')

    if cfg.use_gpu and not torch.cuda.is_available():
        sys.exit('Error: CUDA requested but not available')

    os.makedirs(cfg.checkpoint_dir, exist_ok=True)

    assert cfg.model.num_classes == len(cfg.data.classes)
    num_classes = cfg.model.num_classes

    model_cfg = cfg.model.copy()
    model_name = model_cfg.pop('name')
    net = getattr(models, model_name)
    net = net(**model_cfg)
    net = DataParallel(net)
    net = net.to(device)

    if cfg.use_gpu:
        torch.backends.cudnn.benchmark = True

    try:
        weight = torch.Tensor(cfg.data.weights)
    except KeyError:
        if cfg.loss in ('CrossEntropy', 'mIoU', 'Focal'):
            sys.exit(
                'Error: The loss function used, need dataset weights values')

    optimizer_cfg = cfg.optimizer.copy()
    optimizer_name = optimizer_cfg.pop('name')
    if optimizer_name == 'Adam':
        optimizer_cfg.pop('momentum')
    optimizer = getattr(optim, optimizer_name)
    optimizer = optimizer(net.parameters(), **optimizer_cfg)

    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, **cfg.scheduler)

    resume = 0
    if cfg.checkpoint:

        def map_location(storage, _):
            return storage.cuda() if cfg.use_gpu else storage.cpu()

        # https://github.com/pytorch/pytorch/issues/7178
        chkpt = torch.load(cfg.checkpoint, map_location=map_location)
        net.load_state_dict(chkpt['state_dict'])

        if cfg.resume:
            optimizer.load_state_dict(chkpt['optimizer'])
            resume = chkpt['epoch']

    if cfg.loss == 'CrossEntropy':
        criterion = CrossEntropyLoss2d(weight=weight).to(device)
    elif cfg.loss == 'mIoU':
        criterion = mIoULoss2d(weight=weight).to(device)
    elif cfg.loss == 'Focal':
        criterion = FocalLoss2d(weight=weight).to(device)
    elif cfg.loss == 'Lovasz':
        criterion = LovaszLoss2d().to(device)
    elif cfg.loss == 'Dice':
        criterion = DiceLoss().to(device)
    elif cfg.loss == 'Mix':
        criterion = MixedLovaszCrossEntropyLoss(weight=weight).to(device)
    else:
        sys.exit('Error: Unknown cfg.loss value !')

    train_loader, val_loader = get_dataset_loaders(cfg)

    num_epochs = cfg.num_epochs
    if resume >= num_epochs:
        sys.exit('Error: Epoch {} already reached by the checkpoint provided'.
                 format(num_epochs))

    history = collections.defaultdict(list)
    log = Log(os.path.join(cfg.checkpoint_dir, 'log'))

    log.log('--- Hyper Parameters on this training: ---')
    log.log('Model:\t\t {}'.format(model_name))
    log.log('Backbone:\t {}'.format(cfg.model.backbone_name))
    log.log('Pretrained:\t {}'.format(cfg.model.pretrained))
    log.log('Loss function:\t {}'.format(cfg.loss))
    log.log('Batch Size:\t {}'.format(cfg.batch_size))
    log.log('optimizer:\t {}'.format(optimizer_name))
    log.log('Learning Rate:\t {}'.format(cfg.optimizer.lr))
    log.log('Momentum:\t {}'.format(cfg.optimizer.momentum))
    log.log('Weight Decay:\t {}'.format(cfg.optimizer.weight_decay))
    log.log('Step size:\t {}'.format(cfg.scheduler.step_size))
    log.log('Gamma:\t\t {}'.format(cfg.scheduler.gamma))
    log.log('Image Size:\t {}'.format(cfg.data.train.crop_size))
    log.log('Resize Scale:\t {}'.format(cfg.data.train.resize_scale))
    log.log('Flip Probability:\t {}'.format(cfg.data.train.flip_prob))
    log.log('Rotation Probability:\t {}'.format(cfg.data.train.rotation_prob))
    log.log('Rotation Degree:\t {}'.format(cfg.data.train.rotation_degree))
    log.log('Rotate Degree:\t {}'.format(cfg.data.train.rotate_degree))

    if 'weight' in locals():
        log.log('Weights:\t {}'.format(cfg.data.weights))
    log.log('------------------------------------------')

    for epoch in range(resume, num_epochs):
        log.log('Epoch: {}/{}'.format(epoch + 1, num_epochs))

        train_hist = train(train_loader, num_classes, device, net, optimizer,
                           criterion, exp_lr_scheduler)
        log.log(
            'Train    loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}'.
            format(
                train_hist['loss'],
                train_hist['miou'],
                cfg.data.classes[1],
                train_hist['fg_iou'],
                train_hist['mcc'],
            ))

        for k, v in train_hist.items():
            history['train ' + k].append(v)

        val_hist = validate(val_loader, num_classes, device, net, criterion)
        log.log(
            'Validate loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}'.
            format(val_hist['loss'], val_hist['miou'], cfg.data.classes[1],
                   val_hist['fg_iou'], val_hist['mcc']))

        for k, v in val_hist.items():
            history['val ' + k].append(v)

        visual = 'history-{:05d}-of-{:05d}.png'.format(epoch + 1, num_epochs)
        plot(os.path.join(cfg.checkpoint_dir, visual), history)

        checkpoint = 'checkpoint-{:05d}-of-{:05d}.pth'.format(
            epoch + 1, num_epochs)

        states = {
            'epoch': epoch + 1,
            'state_dict': net.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        torch.save(states, os.path.join(cfg.checkpoint_dir, checkpoint))
Exemple #13
0
            model.rollouts.insert(current_obs, actions.view(-1, 1), action_log_prob, values, rewards, masks)

        with torch.no_grad():
            next_value = model.get_values(model.rollouts.observations[-1])

        model.rollouts.compute_returns(next_value, config.GAMMA)

        value_loss, action_loss, dist_entropy = model.update(model.rollouts)

        model.rollouts.after_update()

        if frame_idx % 100 == 0:
            try:
                clear_output()
                end = timer()
                total_num_steps = (frame_idx + 1) * config.num_agents * config.rollout
                print(
                    "Updates {}, Num Timesteps {}, FPS {},\n"
                    "Mean/Median Reward {:.1f}/{:.1f}, Min/Max Reward {:.1f}/{:.1f},\n"
                    "Entropy {:.5f}, Value Loss {:.5f}, Policy Loss {:.5f}".format(
                        frame_idx, total_num_steps, int(total_num_steps / (end - start)),
                        np.mean(final_rewards), np.median(final_rewards), np.min(final_rewards), np.max(final_rewards),
                        dist_entropy, value_loss, action_loss))
                plot(log_dir, "PongNoFrameskip-v4", 'PPO', config.MAX_FRAMES * config.num_agents * config.rollout)

            except IOError:
                pass

    model.save_w()
    envs.close()
Exemple #14
0
def main():
    args = get_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    if args.cuda and torch.cuda.is_available() and args.cuda_deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

    base_dir = osp.expanduser(args.log_dir)
    log_dir = osp.join(base_dir, 'train_log')
    eval_log_dir = osp.join(base_dir, "eval_log")
    tensorboard_dir = osp.join(base_dir, "tensorboard_log")

    utils.cleanup_log_dir(log_dir)
    utils.cleanup_log_dir(eval_log_dir)
    utils.cleanup_log_dir(tensorboard_dir)
    utils.dump_config(args, osp.join(base_dir, 'config.txt'))

    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")
    writer = SummaryWriter(tensorboard_dir)

    # limited the number of steps for each episode
    # IMPORTANT: for load balance / spark-sim we automatically do this by setting
    # the number of stream jobs
    if not args.use_proper_time_limits:
        envs = make_vec_envs(env_name=args.env_name,
                             seed=args.seed,
                             num_processes=args.num_processes,
                             log_dir=log_dir,
                             device=device,
                             allow_early_resets=False,
                             args=args)
    else:
        envs = make_vec_envs(env_name=args.env_name,
                             seed=args.seed,
                             num_processes=args.num_processes,
                             log_dir=log_dir,
                             device=device,
                             allow_early_resets=True,
                             max_episode_steps=args.max_episode_steps,
                             args=args)

    # create actor critic
    actor_critic = Policy(envs.observation_space.shape,
                          envs.action_space,
                          base_kwargs={'recurrent': args.recurrent_policy})
    # if the resume directory is provided, then directly load that checkpoint
    if args.resume_dir is not None:
        print("=> Resuming from checkpoint: {}".format(args.resume_dir))
        actor_critic = torch.load(args.resume_dir, map_location='cpu')[0]
    actor_critic.to(device)

    # expert for imitation learning
    if args.use_imitation_learning:
        expert = LeastWorkAgent()
    else:
        expert = None

    if args.algo == 'a2c':
        agent = algorithms.A2C_ACKTR(actor_critic,
                                     args.value_loss_coef,
                                     args.entropy_coef,
                                     lr=args.lr,
                                     eps=args.eps,
                                     alpha=args.alpha,
                                     max_grad_norm=args.max_grad_norm,
                                     expert=expert,
                                     il_coef=args.il_coef)
    elif args.algo == 'ppo':
        agent = algorithms.PPO(actor_critic,
                               args.clip_param,
                               args.ppo_epoch,
                               args.num_mini_batch,
                               args.value_loss_coef,
                               args.entropy_coef,
                               lr=args.lr,
                               eps=args.eps,
                               max_grad_norm=args.max_grad_norm,
                               expert=expert,
                               il_coef=args.il_coef)
    elif args.algo == 'acktr':
        agent = algorithms.A2C_ACKTR(actor_critic,
                                     args.value_loss_coef,
                                     args.entropy_coef,
                                     acktr=True)
    elif args.algo == 'mib_a2c':
        agent = algorithms.MIB_A2C(actor_critic,
                                   args.entropy_coef,
                                   lr=args.lr,
                                   adapt_lr=args.adapt_lr,
                                   num_inner_steps=args.num_inner_steps,
                                   max_grad_norm=args.max_grad_norm,
                                   expert=expert,
                                   il_coef=args.il_coef)
    elif args.algo == 'mib_ppo':
        agent = algorithms.MIB_PPO(actor_critic=actor_critic,
                                   clip_param=args.clip_param,
                                   ppo_epoch=args.ppo_epoch,
                                   num_mini_batch=args.num_mini_batch,
                                   entropy_coef=args.entropy_coef,
                                   lr=args.lr,
                                   adapt_lr=args.adapt_lr,
                                   num_inner_steps=args.num_inner_steps,
                                   max_grad_norm=args.max_grad_norm,
                                   expert=expert,
                                   il_coef=args.il_coef)
    elif args.algo == 'lacie_a2c':
        agent = algorithms.LACIE_A2C(actor_critic=actor_critic,
                                     value_coef=args.value_loss_coef,
                                     entropy_coef=args.entropy_coef,
                                     regularize_coef=args.regularize_coef,
                                     lr=args.lr,
                                     eps=args.eps,
                                     alpha=args.alpha,
                                     max_grad_norm=args.max_grad_norm,
                                     expert=expert,
                                     il_coef=args.il_coef,
                                     num_cpc_steps=args.lacie_num_iter,
                                     cpc_lr=args.cpc_lr)
    elif args.algo == 'lacie_a2c_memory':
        lacie_buffer = LacieStorage(args.num_steps,
                                    envs.observation_space.shape,
                                    envs.action_space,
                                    max_size=args.lacie_buffer_size,
                                    batch_size=args.lacie_batch_size,
                                    n_processes=args.num_processes)
        lacie_buffer.to(device)
        agent = algorithms.LACIE_A2C_Memory(
            actor_critic=actor_critic,
            value_coef=args.value_loss_coef,
            entropy_coef=args.entropy_coef,
            regularize_coef=args.regularize_coef,
            lr=args.lr,
            eps=args.eps,
            alpha=args.alpha,
            max_grad_norm=args.max_grad_norm,
            expert=expert,
            il_coef=args.il_coef,
            num_cpc_steps=args.lacie_num_iter,
            lacie_batch_size=args.lacie_batch_size,
            lacie_buffer=lacie_buffer,
            use_memory_to_pred_weights=args.use_memory_to_pred_weights,
            cpc_lr=args.cpc_lr)
    elif args.algo == 'lacie_ppo':
        agent = algorithms.LACIE_PPO(actor_critic,
                                     args.clip_param,
                                     args.ppo_epoch,
                                     args.num_mini_batch,
                                     args.value_loss_coef,
                                     args.entropy_coef,
                                     regularize_coef=args.regularize_coef,
                                     lr=args.lr,
                                     eps=args.eps,
                                     max_grad_norm=args.max_grad_norm,
                                     expert=expert,
                                     il_coef=args.il_coef,
                                     cpc_lr=args.cpc_lr)
    elif args.algo == 'lacie_ppo_memory':
        lacie_buffer = LacieStorage(args.num_steps,
                                    envs.observation_space.shape,
                                    envs.action_space,
                                    max_size=args.lacie_buffer_size,
                                    batch_size=args.lacie_batch_size,
                                    n_processes=args.num_processes)
        lacie_buffer.to(device)
        agent = algorithms.LACIE_PPO_Memory(
            actor_critic,
            args.clip_param,
            args.ppo_epoch,
            args.num_mini_batch,
            args.value_loss_coef,
            args.entropy_coef,
            regularize_coef=args.regularize_coef,
            lr=args.lr,
            eps=args.eps,
            max_grad_norm=args.max_grad_norm,
            expert=expert,
            il_coef=args.il_coef,
            num_cpc_steps=args.lacie_num_iter,
            lacie_batch_size=args.lacie_batch_size,
            lacie_buffer=lacie_buffer,
            use_memory_to_pred_weights=args.use_memory_to_pred_weights,
            cpc_lr=args.cpc_lr)
    else:
        raise ValueError("Not Implemented algorithm...")

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                              envs.observation_space.shape, envs.action_space,
                              actor_critic.recurrent_hidden_state_size)

    obs = envs.reset()
    rollouts.obs[0].copy_(obs)
    rollouts.to(device)

    episode_rewards = deque(maxlen=10)

    start = time.time()

    num_updates = int(
        args.num_env_steps) // args.num_steps // args.num_processes

    # the gradient update interval to increase number of stream jobs
    curriculum_interval = int(num_updates / args.num_curriculum_time)

    for j in range(num_updates):
        random_seed = args.seed if args.fix_job_sequence else args.seed + j
        # if using load_balance environment: \
        # we have to gradually increase number of stream jos
        # if (args.env_name == 'load_balance') and ((j + 1) % curriculum_interval) == 0:
        #     args.num_stream_jobs = int(
        #         args.num_stream_jobs * args.num_stream_jobs_factor)

        #     # reconstruct environments to increase the number of stream jobs
        #     # also alter the random seed
        #     if not args.use_proper_time_limits:
        #         envs = make_vec_envs(env_name=args.env_name,
        #                              seed=random_seed,
        #                              num_processes=args.num_processes,
        #                              log_dir=log_dir,
        #                              device=device,
        #                              allow_early_resets=False,
        #                              args=args)
        #     else:
        #         envs = make_vec_envs(env_name=args.env_name,
        #                              seed=random_seed,
        #                              num_processes=args.num_processes,
        #                              log_dir=log_dir,
        #                              device=device,
        #                              allow_early_resets=True,
        #                              max_episode_steps=args.max_episode_steps,
        #                              args=args)

        #     print("Increase the number of stream jobs to " +
        #           str(args.num_stream_jobs))
        #     obs = envs.reset()
        #     rollouts.obs[0].copy_(obs)
        #     rollouts.to(device)

        # decrease learning rate linearly
        if args.use_linear_lr_decay:
            cur_lr = utils.update_linear_schedule(
                agent.optimizer, j, num_updates,
                agent.optimizer.lr if args.algo == "acktr" else args.lr)
            if args.algo.startswith('lacie'):
                cur_lr = utils.update_linear_schedule(agent.cpc_optimizer, j,
                                                      num_updates, args.cpc_lr)
        else:
            cur_lr = agent.optimizer.param_groups[0]["lr"]

        # Rolling out, collecting and storing SARS (State, action, reward, new state)
        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts.obs[step], rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])

            # Obser reward and next obs
            # TODO: park env does not support cuda tensor???
            obs, reward, done, infos = envs.step(action.cpu())
            for info in infos:
                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            bad_masks = torch.FloatTensor(
                [[0.0] if 'bad_transition' in info.keys() else [1.0]
                 for info in infos])
            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks, bad_masks)

        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts.obs[-1], rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.gae_lambda, args.use_proper_time_limits)

        results = agent.update(rollouts)

        rollouts.after_update()

        # SAVE trained model
        if (j % args.save_interval == 0
                or j == num_updates - 1) and args.save_dir != "":
            save_path = os.path.join(args.save_dir, args.algo)
            try:
                os.makedirs(save_path)
            except OSError:
                pass

            torch.save([
                actor_critic,
                getattr(utils.get_vec_normalize(envs), 'ob_rms', None)
            ], os.path.join(save_path, args.env_name + ".pt"))

        # LOG TRAINING results
        if j % args.log_interval == 0 and len(episode_rewards) > 1:
            total_num_steps = (j + 1) * args.num_processes * args.num_steps
            end = time.time()
            print("=" * 90)
            print("Updates {}, num timesteps {}, FPS {}, LR: {}"
                  "\n=> Last {} training episodes: mean/median reward "
                  "{:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}".format(
                      j, total_num_steps, int(total_num_steps / (end - start)),
                      cur_lr, len(episode_rewards), np.mean(episode_rewards),
                      np.median(episode_rewards), np.min(episode_rewards),
                      np.max(episode_rewards)))
            result_str = "=> "
            for k, v in results.items():
                result_str = result_str + "{}: {:.2f} ".format(k, v)
            print(result_str)

            writer.add_scalar("train/reward", np.mean(episode_rewards), j)
            for k, v in results.items():
                writer.add_scalar("train/" + k.replace(' ', '_'), v, j)

            plot(log_dir, 'load-balance', args.algo, args.num_env_steps)

        # EVALUATE performance of learned policy along with heuristic
        if (args.eval_interval is not None and len(episode_rewards) > 1
                and j % args.eval_interval == 0):
            # alter the random seed
            eval_results = evaluate(actor_critic,
                                    args.env_name,
                                    seed=args.seed,
                                    num_processes=args.num_processes,
                                    eval_log_dir=eval_log_dir,
                                    device=device,
                                    env_args=args)
            writer.add_scalars(
                'eval/reward',
                {k: np.mean(v)
                 for k, v in eval_results.items()}, j)
            # plot(eval_log_dir, 'load-balance', args.algo,
            #     args.num_env_steps)

    writer.close()
Exemple #15
0
    parser.add_argument('--render', default=False, type=bool)
    parser.add_argument('--plotstyle', default=None, type=str)
    parser.add_argument('--path', default='', type=str)
    args = parser.parse_args()

    if 'train' in args.do:
        train(dataset_name=args.dataset_name,
              env_name=args.env_name,
              seed_min=args.seed_min,
              seed_nb=args.seed_nb,
              quantiles=args.quantiles,
              path=args.path)
    if 'test' in args.do:
        test(args.env_name,
             dataset_name=args.dataset_name,
             seed_min=args.seed_min,
             seed_nb=args.seed_nb,
             n_trajectories=args.number_trajectories,
             quantiles=args.quantiles,
             render=args.render,
             path=args.path)
    if 'plot' in args.do:
        from utils.plot import plot
        plot(args.env_name,
             args.dataset_name,
             seed_min=args.seed_min,
             seed_nb=args.seed_nb,
             quantiles=args.quantiles,
             plotstyle=args.plotstyle,
             path=args.path)
from utils.plot import plot

parser = argparse.ArgumentParser(description=description)

parser.add_argument('-g', '--generations', metavar='N', type=int, help=generations_help)
parser.add_argument('--plot', nargs='+', type=str, help=plot_help, choices=choices_plot)

args = parser.parse_args()

if args.plot : args.plot = list(set(args.plot))
if args.generations is None: args.generations = 1

env = Environement(10,10,
	Blob(1,4,speed=12,size=3,energy=400,sense=1),
	Blob(9,0,speed=12,size=3,energy=400,sense=1, name='001'),
	Blob(8,3,speed=12,size=3,energy=400,sense=1, name='002'),
	Blob(1,4,speed=12,size=3,energy=400,sense=1, name='003'),
	Blob(9,0,speed=12,size=3,energy=400,sense=1, name='004'),
	Blob(8,6,speed=12,size=3,energy=400,sense=1, name='005'),
	Blob(1,4,speed=12,size=3,energy=400,sense=1, name='006'),
	Blob(9,4,speed=12,size=3,energy=400,sense=1, name='007'),
	Blob(8,9,speed=12,size=3,energy=400,sense=1, name='008')
)
print(env.blobs)
env.set_food()
env.draw_board()
env.simulate(args.generations)
env.print_blobs()

plot(env,args.plot)
Exemple #17
0
import matplotlib.pyplot as plt
import json
from collections import defaultdict
import numpy as np

from utils.plot import plot
from utils.italy_data import compute_provinces_confirmed_cases

ALIGN_AROUND = 20  # cases

if __name__ == "__main__":
    # Compute the number of cases for each country
    confirmed = compute_provinces_confirmed_cases()

    # Compute maximum number of cases we can align around: min (ALIGN_AROUND, x)
    # Take the second biggest one
    minimums = [sorted(v)[-2] for c, v in confirmed.items()]
    new_align_around = np.minimum(ALIGN_AROUND, np.min(minimums))

    # Compute the index for each country in order to align around the same number of cases
    align_indexes = defaultdict(list)
    for c, v in confirmed.items():
        dist = np.abs(np.array(v) - ALIGN_AROUND)
        align_indexes[c] = np.argmin(dist)

    plot(confirmed, align_indexes, ALIGN_AROUND, "cases")
Exemple #18
0
    #json.dump(agent_schedule, open('./results/agent_schedule.json', 'w'))
    print(agent_schedule)
    with open('./results/agent_schedule.json', 'w') as f:
        json.dump(agent_schedule, f)

    runner = ScheduleRunner(schedule=agent_schedule,
                            dag_data=edls.dag.data,
                            speed_setting=processor_speeds,
                            base_powers=edls.base_powers,
                            beta=1.0,
                            agent_system=False)
    runner.start()
    # print(runner.processor_times)
    print(f'Makespan: {runner.max_time}')
    print(f'Total Energy: {runner.task_energy + sum(runner.idle_energy)}')
    plot(runner.processor_times[:3], runner.max_time, 'edls_mod_gannt.png')

    # ------------ FOr PEFT
    """
    agent_schedule = json.load(open('./results/agent_schedule_peft.json'))
    keys = list(agent_schedule.keys())
    for task in keys:
        agent_schedule[int(task)] = agent_schedule[task]
    runner = ScheduleRunner(schedule=agent_schedule,
                            dag_data=edls.dag.data,
                            speed_setting=processor_speeds,
                            base_powers=edls.base_powers,
                            beta=1.0,
                            agent_system=False)
    runner.start()
    # print(runner.processor_times)
def main():
    parser = argparse.ArgumentParser(
        description='Eye Picture Classification Training With PyTorch')
    parser.add_argument(
        '--resume_model',
        default='/mnt/data/qyx_data/torch/saveModel/Eye_resnet18_test.pth',
        type=str,
        help='Checkpoint state_dict file to resume training from')
    parser.add_argument(
        '--resume',
        default=None,
        type=str,
        help='Checkpoint state_dict file to resume training from')

    parser.add_argument('--model',
                        default='resnet18',
                        type=str,
                        help='the train model')

    parser.add_argument('--start_epoch',
                        default=0,
                        type=int,
                        help='the start epoch of training')
    parser.add_argument('--max_epoch',
                        default=1,
                        type=int,
                        help='the epoch to end training')
    parser.add_argument('--log_step', default=20, type=int, help='log_step')
    parser.add_argument('--batchsize', default=128, type=int, help='batchsize')
    parser.add_argument('--pretrained',
                        default=1,
                        type=int,
                        help='if the model is pretrained')
    args = parser.parse_args()
    if not isinstance(args.pretrained, bool):
        if args.pretrained == 1:
            args.pretrained = True
        elif args.pretrained == 0:
            args.pretrained = False
    root = cfg.IMGROOT
    val_root = cfg.VALROOT

    if args.model == 'resnet18':
        model = make_model('resnet18',
                           num_classes=cfg.num_classes,
                           pretrained=args.pretrained,
                           input_size=(cfg.IMAGE_SIZE, cfg.IMAGE_SIZE))
    if args.model == 'resnet101':
        model = make_model('resnet101',
                           num_classes=cfg.num_classes,
                           pretrained=args.pretrained,
                           input_size=(cfg.IMAGE_SIZE, cfg.IMAGE_SIZE))
    elif args.model == 'vgg16':
        model = make_model('vgg16',
                           num_classes=cfg.num_classes,
                           pretrained=args.pretrained,
                           input_size=(cfg.IMAGE_SIZE, cfg.IMAGE_SIZE))
    elif args.model == 'alexnet':
        model = make_model('alexnet',
                           num_classes=cfg.num_classes,
                           pretrained=args.pretrained,
                           input_size=(cfg.IMAGE_SIZE, cfg.IMAGE_SIZE))
    elif args.model == 'inception_v3':
        model = make_model('inception_v3',
                           num_classes=cfg.num_classes,
                           pretrained=args.pretrained,
                           input_size=(cfg.IMAGE_SIZE, cfg.IMAGE_SIZE))
    elif args.model == 'inceptionresnetv2':
        model = make_model('inceptionresnetv2',
                           num_classes=cfg.num_classes,
                           pretrained=args.pretrained,
                           input_size=(cfg.IMAGE_SIZE, cfg.IMAGE_SIZE))
    elif args.model == 'googlenet':
        model = make_model('googlenet',
                           num_classes=cfg.num_classes,
                           pretrained=args.pretrained,
                           input_size=(cfg.IMAGE_SIZE, cfg.IMAGE_SIZE))
    elif args.model == 'densenet121':
        model = make_model('densenet121',
                           num_classes=cfg.num_classes,
                           pretrained=args.pretrained,
                           input_size=(cfg.IMAGE_SIZE, cfg.IMAGE_SIZE))

    device = t.device("cuda" if t.cuda.is_available() else "cpu")

    logger = logging.getLogger("Eye")
    logger.setLevel(logging.DEBUG)
    fileHanlder = logging.FileHandler(
        cfg.LOG + time.asctime(time.localtime(time.time())) + 'Eye_' +
        args.model + '.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fileHanlder.setFormatter(formatter)
    logger.addHandler(fileHanlder)
    logger.info('Train Dataset uploaded!')
    logger.info('device:{}'.format(device))

    train_transform = train_augment(cfg.IMAGE_SIZE)
    train_data = Eye(img_root=root,
                     tag_root='dataset/train.txt',
                     transform=train_transform)
    data_len = train_data.__len__()
    weight_prob = [data_len / w for w in [1, 6, 1, 1, 0.4, 0.8]]
    weight_list = [weight_prob[label] for data, label in train_data]
    train_sampler = WeightedRandomSampler(weights=weight_list,
                                          num_samples=6 * 3000,
                                          replacement=True)
    train_dataloader = DataLoader(train_data,
                                  batch_size=args.batchsize,
                                  drop_last=True,
                                  num_workers=8,
                                  sampler=train_sampler)

    val_transform = val_augment(cfg.IMAGE_SIZE)
    val_data = Eye(img_root=root,
                   tag_root='dataset/test.txt',
                   transform=val_transform)
    val_dataloader = DataLoader(val_data,
                                batch_size=args.batchsize,
                                shuffle=False,
                                drop_last=True,
                                num_workers=8)

    plot_util = plot()

    loss_list = []
    train_acc = []
    epoch_list = []
    val_acc = []
    iter_list = []
    val_losslist = []

    critertion = t.nn.CrossEntropyLoss()
    #critertion=FocalLossV1()

    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=cfg.WEIGHT_DECAY)
    model = nn.DataParallel(model)
    model = model.to(device)
    #summary(model,(3,cfg.IMAGE_SIZE,cfg.IMAGE_SIZE),batch_size=cfg.BATCHSIZE)
    # logger.info("Resume from the model {}".format(args.resume_model))
    # model.load_state_dict(t.load(args.resume_model))
    #model.train()

    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(cfg.num_classes)
    loss_meter.reset()
    confusion_matrix.reset()

    for epoch in range(args.start_epoch, args.max_epoch):
        model = epoch_train(train_dataloader, val_dataloader, logger, epoch,
                            args, critertion, optimizer, model, device,
                            loss_meter, confusion_matrix, train_acc, loss_list,
                            val_acc, iter_list, val_losslist)
        epoch_list.append(epoch)
        #调节学习率
        if epoch_list[-1] in lr_step:
            step_index = lr_step.index(epoch_list[-1]) + 1
            adjust_learning_rate(optimizer, cfg.GAMMA, step_index)
    #plot
    if os.path.exists('/mnt/data/qyx_data/torch/log/' + args.model + '_' +
                      str(args.pretrained)):
        plot_util.plot_cm_matrix(confusion_matrix,
                                 savepath='/mnt/data/qyx_data/torch/log/' +
                                 args.model + '_' + str(args.pretrained) +
                                 '/cm_matrix.png')
        plot_util.plot_accuracy(savepath='/mnt/data/qyx_data/torch/log/' +
                                args.model + '_' + str(args.pretrained) +
                                '/train_acc.png',
                                epoch=epoch_list,
                                train_accuracy=train_acc,
                                val_accuracy=val_acc)
        plot_util.plot_loss(savepath='/mnt/data/qyx_data/torch/log/' +
                            args.model + '_' + str(args.pretrained) +
                            '/loss.png',
                            iters=iter_list,
                            epoch=epoch_list,
                            loss=loss_list,
                            val_loss=val_losslist,
                            title='loss')
    else:
        os.mkdir('/mnt/data/qyx_data/torch/log/' + args.model + '_' +
                 str(args.pretrained))
        plot_util.plot_cm_matrix(confusion_matrix,
                                 savepath='/mnt/data/qyx_data/torch/log/' +
                                 args.model + '_' + str(args.pretrained) +
                                 '/cm_matrix.png')
        plot_util.plot_accuracy(savepath='/mnt/data/qyx_data/torch/log/' +
                                args.model + '_' + str(args.pretrained) +
                                '/train_acc.png',
                                epoch=epoch_list,
                                train_accuracy=train_acc,
                                val_accuracy=val_acc)
        plot_util.plot_loss(savepath='/mnt/data/qyx_data/torch/log/' +
                            args.model + '_' + str(args.pretrained) +
                            '/train_loss.png',
                            iters=iter_list,
                            epoch=epoch_list,
                            loss=loss_list,
                            val_loss=val_losslist,
                            title='loss')
Exemple #20
0

    with tf.Session() as sess:
	    tf.global_variables_initializer().run()
	    coord = tf.train.Coordinator()
	    threads = tf.train.start_queue_runners(coord=coord)

	    for iteration in xrange(TOTAL_ITER_NUM):

		# _, lossV, _trainY, _predict = sess.run([discOptimizer, loss, trainY, predict], feed_dict = {
		# 	train_status: True
		# 	})
		    _, lossV, _trainY, _predict = sess.run([discOptimizer, loss, batch_label, predict])
		    _label = np.argmax(_trainY, axis=1)
		    _accuracy = np.mean(_label == _predict)
		    plot.plot('train cross entropy', lossV)
		    plot.plot('train accuracy', _accuracy)


		    if iteration % 50 == 49:
			    dev_accuracy = []
			    dev_cross_entropy = []
			    for eval_idx in xrange(EVAL_ITER_NUM):
				# eval_loss_v, _trainY, _predict = sess.run([loss, trainY, predict], feed_dict ={train_status: False})
				    eval_loss_v, _trainY, _predict = sess.run([loss, batch_eval_label, predict_eval])
				    _label = np.argmax(_trainY, axis=1)
				    _accuracy = np.mean(_label == _predict)
				    dev_accuracy.append(_accuracy)
				    dev_cross_entropy.append(eval_loss_v)
			    plot.plot('dev accuracy', np.mean(dev_accuracy))
			    plot.plot('dev cross entropy', np.mean(dev_cross_entropy))
            #ensure equal usage of fake samples
            entropy1_fake = entropy1(D_fake)
            entropy1_fake.backward(mone)

            G_cost = entropy2_fake + entropy1_fake
            optimizerG.step()


            D_cost = D_cost.cpu().data.numpy()
            G_cost = G_cost.cpu().data.numpy()
            entorpy2_real = entorpy2_real.cpu().data.numpy()
            entorpy2_fake = entorpy2_fake.cpu().data.numpy()

            #monitoring the loss
            plot('errD', D_cost, iter_idx)
            # plot('time', time.time() - start_time, iter_idx)
            plot('errG', G_cost, iter_idx)
            plot('errD_real', entorpy2_real, iter_idx)
            plot('errD_fake', entorpy2_fake, iter_idx)


            # Save plot every  iter
            flush(os.path.join(opt.results_dir, opt.name))

            # Write losses to logs 
            log_writer.writerow([D_cost[0],G_cost[0],entorpy2_real[0],entorpy2_fake[0]])

            print "iter%d[epoch %d]\t %s %.4f \t %s %.4f \t %s %.4f \t %s %.4f" % (iter_idx, epoch,
                                                         'errD', D_cost,
                                                         'errG', G_cost,
Exemple #22
0
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
             label='{0} (area = {1:0.2f})'
             ''.format(disease_class[i], roc_auc[i]))
 
plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate',fontsize=15)
plt.ylabel('True Positive Rate',fontsize=15)
plt.title('ROC of model',fontsize=15)
plt.legend(loc="lower right")
plt.savefig('/mnt/data/qyx_data/torch/test_model/'+args.model+'_'+str(args.pretrained)+'/ROC.png')

cm_value = confusion_matrix.value()
plot_util = plot()
print(cm_value)
plot_util.plot_cm_matrix(cm_matrix=confusion_matrix,savepath='/mnt/data/qyx_data/torch/test_model/'+args.model+'_'+str(args.pretrained)+'/cm_matrix.png')
correct = 0
sensitive=dict()
specificity=dict()
for i in range(6):
    correct+=cm_value[i][i]
    sensitive[i] = cm_value[i][i]/cm_value.sum(axis=1)[i]
    specificity[i] = (cm_value.sum(axis=1)[i]-cm_value[i][i])/((cm_value.sum(axis=0)[i]-cm_value[i][i])+(cm_value.sum(axis=1)[i]-cm_value[i][i]))
accuracy = 100.*(correct)/(cm_value.sum())
print('accuracy={}'.format(accuracy))
print(sensitive)
print(specificity)

Exemple #23
0
def A2C_experiment(env, batch_size, max_frames, log_dir):
    log_dir = log_dir+"A2C/"

    try:
        os.makedirs(log_dir)
    except OSError:
        files = glob.glob(os.path.join(log_dir, '*.monitor.csv'))
        for f in files:
            os.remove(f)

    config = Config()

    # a2c control
    config.num_agents = 16
    config.rollout = 5

    # misc agent variables
    config.GAMMA = 0.99
    config.LR = 7e-4
    config.entropy_loss_weight = 0.01
    config.value_loss_weight = 0.5

    # batch size
    config.BATCH_SIZE = batch_size

    # Number of updates in 10000000 frames
    # config.MAX_FRAMES = int(1e7 / config.num_agents / config.rollout)
    config.MAX_FRAMES = int(max_frames / config.num_agents / config.rollout)

    # training loop
    seed = 1

    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

    torch.set_num_threads(1)
    #monitor = GPUMonitor()
    env_id = env
    envs = [make_env_a2c_atari(env_id, seed, i, log_dir) for i in range(config.num_agents)]
    envs = SubprocVecEnv(envs) if config.num_agents > 1 else DummyVecEnv(envs)

    obs_shape = envs.observation_space.shape
    obs_shape = (obs_shape[0] * 4, *obs_shape[1:])

    model = Model(log_dir, env=envs, config=config)

    current_obs = torch.zeros(config.num_agents, *obs_shape,
                              device=config.device, dtype=torch.float)

    def update_current_obs(obs):
        shape_dim0 = envs.observation_space.shape[0]
        obs = torch.from_numpy(obs.astype(np.float32)).to(config.device)
        current_obs[:, :-shape_dim0] = current_obs[:, shape_dim0:]
        current_obs[:, -shape_dim0:] = obs

    obs = envs.reset()
    update_current_obs(obs)

    model.rollouts.observations[0].copy_(current_obs)

    episode_rewards = np.zeros(config.num_agents, dtype=np.float)
    final_rewards = np.zeros(config.num_agents, dtype=np.float)

    start = timer()
    print_step = 1
    print_threshold = 10
    #os.remove('./log/A2C/logs.csv')
    for frame_idx in range(1, config.MAX_FRAMES + 1):
        for step in range(config.rollout):
            with torch.no_grad():
                values, actions, action_log_prob = model.get_action(model.rollouts.observations[step])
            cpu_actions = actions.view(-1).cpu().numpy()

            obs, reward, done, _ = envs.step(cpu_actions)

            episode_rewards += reward
            masks = 1. - done.astype(np.float32)
            final_rewards *= masks
            final_rewards += (1. - masks) * episode_rewards
            episode_rewards *= masks

            rewards = torch.from_numpy(reward.astype(np.float32)).view(-1, 1).to(config.device)
            masks = torch.from_numpy(masks).to(config.device).view(-1, 1)

            current_obs *= masks.view(-1, 1, 1, 1)
            update_current_obs(obs)

            model.rollouts.insert(current_obs, actions.view(-1, 1), action_log_prob, values, rewards, masks)

        with torch.no_grad():
            next_value = model.get_values(model.rollouts.observations[-1])

        model.rollouts.compute_returns(next_value, config.GAMMA)

        value_loss, action_loss, dist_entropy = model.update(model.rollouts)

        model.rollouts.after_update()

        if frame_idx % 100 == 0:
            try:
                clear_output()
                end = timer()
                total_num_steps = (frame_idx + 1) * config.num_agents * config.rollout
                #df = pd.DataFrame({'frame': frame_idx, 'timesteps': total_num_steps, 'fps': int(total_num_steps / (end - start)),
                #                   'mean reward': np.mean(final_rewards), 'median reward': np.median(final_rewards),
                #                   'min reward': np.min(final_rewards), 'max rewards': np.max(final_rewards),
                #                   'entropy': dist_entropy, 'value loss': value_loss, 'action loss': action_loss})
                #if not os.path.isfile('./log/A2C/logs.csv'):
                #    df.to_csv('./log/A2C/logs.csv', header='column_names')
                #else:
                #    df.to_csv('./log/A2C/logs.csv', mode='a', header=False)
                #with open("./log/A2C/logs.txt", "a") as myfile:
                #    myfile.write(
                #        "Frame {}, Num Timesteps {}, FPS {},"
                #        "Mean/Median Reward {:.1f}/{:.1f}, Min/Max Reward {:.1f}/{:.1f},"
                #        "Entropy {:.5f}, Value Loss {:.5f}, Policy Loss {:.5f}".
                #     format(frame_idx, total_num_steps,
                #            int(total_num_steps / (end - start)),
                #            np.mean(final_rewards),
                #            np.median(final_rewards),
                #            np.min(final_rewards),
                #            np.max(final_rewards), dist_entropy,
                #            value_loss, action_loss))
                plot(log_dir, env_id, 'A2C',
                     config.MAX_FRAMES * config.num_agents * config.rollout)

                dtime = int(timer() - start)
                plot_gpu(log_dir, env_id, 'A2C', config.MAX_FRAMES * config.num_agents * config.rollout, bin_size=10,
                         smooth=1, time=timedelta(seconds=dtime))
            except IOError:
                pass

    model.save_w()
    envs.close()
    return
Exemple #24
0
    # print(type(epsilon))
    action = model.get_action(observation, epsilon)
    prev_observation = observation
    observation, reward, done, _ = env.step(action)
    observation = None if done else observation
    model.update(prev_observation, action, reward, observation, frame_idx)
    episode_reward += reward

    if done:
        model.finish_nstep()
        observation = env.reset()
        model.save_reward(episode_reward)
        episode_reward = 0
        if np.mean(model.rewards[-10:]) > 20:
            plot(frame_idx, model.rewards, model.losses,
                 model.sigma_parameter_mag,
                 timedelta(seconds=int(timer() - start)))
            save_plot(frame_idx,
                      model.rewards,
                      model.losses,
                      model.sigma_parameter_mag,
                      timedelta(seconds=int(timer() - start)),
                      nstep=model.nsteps,
                      name=agent_name)
            log_content = "达到20提前结束,结束轮次," + str(frame_idx)
            print(log_content)
            with open(log_file, "a", encoding='utf-8') as lf:
                lf.write(log_content + "\n")
                lf.close()
            break
def main():
    parser = argparse.ArgumentParser(description='Eye Picture Classification Training With PyTorch')
    parser.add_argument('--resume_model',
                        default='/mnt/data/qyx_data/torch/saveModel/Eye_resnet18_test.pth',
                        type=str,
                        help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        help='Checkpoint state_dict file to resume training from')

    parser.add_argument('--model',default='resnet18',type=str,help='the train model')

    parser.add_argument('--start_epoch', default=0, type=int,help='the start epoch of training')
    parser.add_argument('--max_epoch', default=5, type=int, help='the epoch to end training')
    args = parser.parse_args()
    


    root=cfg.IMGROOT
    val_root=cfg.VALROOT
    
    if args.model == 'resnet18':
        model = models.resnet18(pretrained=True)
        model.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
        num_features = model.fc.in_features
        model.fc = nn.Linear(num_features, 7)
    elif args.model == 'vgg16':
        model = make_model('vgg16_bn',num_classes=7,pretrained=True,input_size = (224,224))
    elif args.model =='alexnet':
        model = make_model('alexnet',num_classes=7,pretrained=True,input_size = (224,224)) 
    elif args.model =='inception_v3':
        model = make_model('inception_v3',num_classes=7,pretrained=True,input_size = (224,224))
    elif args.model =='inceptionresnetv2':
        model = make_model('inceptionresnetv2',num_classes=7,pretrained=True,input_size = (224,224)) 
    elif args.model =='googlenet':
        model = make_model('googlenet',num_classes=7,pretrained=True,input_size = (224,224)) 
    elif args.model =='densenet121':
        model = make_model('densenet121',num_classes=7,pretrained=True,input_size = (224,224)) 
    device = t.device("cuda" if t.cuda.is_available() else "cpu")

    logger = logging.getLogger("Eye")
    logger.setLevel(logging.DEBUG)
    fileHanlder = logging.FileHandler('Eye_'+args.model+'.log')
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fileHanlder.setFormatter(formatter)
    logger.addHandler(fileHanlder)
    logger.info('Train Dataset uploaded!')
    logger.info('device:{}'.format(device))
    
    plot_util=plot()

    loss_list=[]
    train_acc=[]
    epoch_list=[]
    val_acc=[]

    critertion=t.nn.CrossEntropyLoss()
    #critertion=FocalLossV1()

    optimizer=t.optim.Adam(model.parameters(),lr=lr, weight_decay=cfg.WEIGHT_DECAY)
    model = nn.DataParallel(model)
    model = model.to(device)
    summary(model,(3,cfg.IMAGE_SIZE,cfg.IMAGE_SIZE),batch_size=cfg.BATCHSIZE)
    # logger.info("Resume from the model {}".format(args.resume_model))
    # model.load_state_dict(t.load(args.resume_model))
    model.train()
    k_model= []
    for i in range(cfg.K):
       k_model.append(model)

    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(7)
    loss_meter.reset()
    confusion_matrix.reset()

    for epoch in range(args.start_epoch,args.max_epoch):
        k_model = k_fold(logger,cfg.K,root,val_root,epoch,args,critertion,optimizer,k_model,device,loss_meter,confusion_matrix,train_acc,loss_list,val_acc)
        epoch_list.append(epoch)
        #调节学习率
        if epoch_list[-1] in lr_step:
            step_index = lr_step.index(epoch_list[-1]) + 1
            adjust_learning_rate(optimizer, cfg.GAMMA, step_index)
    #plot
    plot_util.plot_cm_matrix(confusion_matrix,savepath='/mnt/data/qyx_data/torch/'+args.model+' cm_matrix.png')
    plot_util.plot_accuracy(savepath='/mnt/data/qyx_data/torch/'+args.model+' train_acc.png',epoch=epoch_list,train_accuracy=train_acc,val_accuracy=val_acc)
    plot_util.plot_loss(savepath='/mnt/data/qyx_data/torch/'+args.model+' train_loss.png',iters=epoch_list,loss=loss_list,title='train_loss')