Beispiel #1
0
 def test_swallowing_exception(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock(side_effect=NotImplementedError("test"))
         writer.exceptions_to_ignore = (NotImplementedError, KeyError)
         with summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
Beispiel #2
0
class TensorBoardReporting(ReportingHook):
    """Log results to tensorboard.

    Writes tensorboard logs to a directory specified in the `mead-settings`
    section for tensorboard. Otherwise it defaults to `runs`.
    """
    def __init__(self, **kwargs):
        super(TensorBoardReporting, self).__init__(**kwargs)
        from tensorboardX import SummaryWriter
        # Base dir is often the dir created to save the model into
        base_dir = kwargs.get('base_dir', '.')
        log_dir = os.path.expanduser(kwargs.get('log_dir', 'runs'))
        if not os.path.isabs(log_dir):
            log_dir = os.path.join(base_dir, log_dir)
        # Run dir is the name of an individual run
        run_dir = kwargs.get('run_dir')
        pid = str(os.getpid())
        run_dir = '{}-{}'.format(run_dir, pid) if run_dir is not None else pid
        log_dir = os.path.join(log_dir, run_dir)
        flush_secs = int(kwargs.get('flush_secs', 2))
        self._log = SummaryWriter(log_dir, flush_secs=flush_secs)

    def step(self, metrics, tick, phase, tick_type=None, **kwargs):
        tick_type = ReportingHook._infer_tick_type(phase, tick_type)
        for metric in metrics.keys():
            name = "{}/{}/{}".format(phase, tick_type, metric)
            self._log.add_scalar(name, metrics[metric], tick)
    def __init__(self, log_dir, params, up_factor):
        """
        Args:
            log_dir: (str) The path to the folder where the summary files are
                going to be written. The summary object creates a train and a
                val folders to store the summary files.
            params: (train.utils.Params) The parameters loaded from the
                parameters.json file.
            up_factor: (int) The upscale factor that indicates how much the
                scores maps need to be upscaled to match the original scale
                (used when superposing the embeddings and score maps to the
                input images).

        Attributes:
            writer_train: (tensorboardX.writer.SummaryWriter) The tensorboardX
                writer that writes the training informations.
            writer_val: (tensorboardX.writer.SummaryWriter) The tensorboardX
                writer that writes the validation informations.
            epoch: (int) Stores the current epoch.
            ref_sz: (int) The size in pixels of the reference image.
            srch_sz: (int) The size in pixels of the search image.
            up_factor: (int) The upscale factor. See Args.

        """
        # We use two different summary writers so we can plot both curves in
        # the same plot, as suggested in https://www.quora.com/How-do-you-plot-training-and-validation-loss-on-the-same-graph-using-TensorFlow%E2%80%99s-TensorBoard
        self.writer_train = SummaryWriter(join(log_dir, 'train'))
        self.writer_val = SummaryWriter(join(log_dir, 'val'))
        self.epoch = None
        self.ref_sz = params.reference_sz
        self.srch_sz = params.search_sz
        self.up_factor = up_factor
Beispiel #4
0
    def test_add_custom_scalars(self):
        with TemporaryDirectory() as tmp_dir:
            writer = SummaryWriter(tmp_dir)
            writer.add_custom_scalars = MagicMock()
            with summary_writer_context(writer):
                SummaryWriterContext.add_custom_scalars_multilinechart(
                    ["a", "b"], category="cat", title="title"
                )
                with self.assertRaisesRegexp(
                    AssertionError, "Title \(title\) is already in category \(cat\)"
                ):
                    SummaryWriterContext.add_custom_scalars_multilinechart(
                        ["c", "d"], category="cat", title="title"
                    )
                SummaryWriterContext.add_custom_scalars_multilinechart(
                    ["e", "f"], category="cat", title="title2"
                )
                SummaryWriterContext.add_custom_scalars_multilinechart(
                    ["g", "h"], category="cat2", title="title"
                )

            SummaryWriterContext.add_custom_scalars(writer)
            writer.add_custom_scalars.assert_called_once_with(
                {
                    "cat": {
                        "title": ["Multiline", ["a", "b"]],
                        "title2": ["Multiline", ["e", "f"]],
                    },
                    "cat2": {"title": ["Multiline", ["g", "h"]]},
                }
            )
Beispiel #5
0
 def test_not_swallowing_exception(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock(side_effect=NotImplementedError("test"))
         with self.assertRaisesRegexp(
             NotImplementedError, "test"
         ), summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
Beispiel #6
0
 def test_writing(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock()
         with summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
         writer.add_scalar.assert_called_once_with(
             "test", torch.ones(1), global_step=0
         )
Beispiel #7
0
    def log_to_tensorboard(self, writer: SummaryWriter, epoch: int) -> None:
        def none_to_zero(x: Optional[float]) -> float:
            if x is None or math.isnan(x):
                return 0.0
            return x

        for name, value in [
            ("Training/td_loss", self.get_recent_td_loss()),
            ("Training/reward_loss", self.get_recent_reward_loss()),
        ]:
            writer.add_scalar(name, none_to_zero(value), epoch)
Beispiel #8
0
def one_stage_train(myModel, data_reader_trn, my_optimizer,
                    loss_criterion, snapshot_dir, log_dir,
                    i_iter, start_epoch, best_val_accuracy=0, data_reader_eval=None,
                    scheduler=None):
    report_interval = cfg.training_parameters.report_interval
    snapshot_interval = cfg.training_parameters.snapshot_interval
    max_iter = cfg.training_parameters.max_iter

    avg_accuracy = 0
    accuracy_decay = 0.99
    best_epoch = 0
    writer = SummaryWriter(log_dir)
    best_iter = i_iter
    iepoch = start_epoch
    snapshot_timer = Timer('m')
    report_timer = Timer('s')

    while i_iter < max_iter:
        iepoch += 1
        for i, batch in enumerate(data_reader_trn):
            i_iter += 1
            if i_iter > max_iter:
                break

            scheduler.step(i_iter)

            my_optimizer.zero_grad()
            add_graph = False
            scores, total_loss, n_sample = compute_a_batch(batch, myModel, eval_mode=False,
                                                           loss_criterion=loss_criterion,
                                                           add_graph=add_graph, log_dir=log_dir)
            total_loss.backward()
            accuracy = scores / n_sample
            avg_accuracy += (1 - accuracy_decay) * (accuracy - avg_accuracy)

            clip_gradients(myModel, i_iter, writer)
            my_optimizer.step()

            if i_iter % report_interval == 0:
                save_a_report(i_iter, total_loss.data[0], accuracy, avg_accuracy, report_timer,
                              writer, data_reader_eval,myModel, loss_criterion)

            if i_iter % snapshot_interval == 0 or i_iter == max_iter:
                best_val_accuracy, best_epoch, best_iter = save_a_snapshot(snapshot_dir, i_iter, iepoch, myModel,
                                                                         my_optimizer, loss_criterion, best_val_accuracy,
                                                                          best_epoch, best_iter, snapshot_timer,
                                                                          data_reader_eval)

    writer.export_scalars_to_json(os.path.join(log_dir, "all_scalars.json"))
    writer.close()
    print("best_acc:%.6f after epoch: %d/%d at iter %d" % (best_val_accuracy, best_epoch, iepoch, best_iter))
    sys.stdout.flush()
Beispiel #9
0
class TBVisualizer:
    def __init__(self, opt):
        self._opt = opt
        self._save_path = os.path.join(opt.checkpoints_dir, opt.name)

        self._log_path = os.path.join(self._save_path, 'loss_log2.txt')
        self._tb_path = os.path.join(self._save_path, 'summary.json')
        self._writer = SummaryWriter(self._save_path)

        with open(self._log_path, "a") as log_file:
            now = time.strftime("%c")
            log_file.write('================ Training Loss (%s) ================\n' % now)

    def __del__(self):
        self._writer.close()

    def display_current_results(self, visuals, it, is_train, save_visuals=False):
        for label, image_numpy in visuals.items():
            sum_name = '{}/{}'.format('Train' if is_train else 'Test', label)
            self._writer.add_image(sum_name, image_numpy, it)

            if save_visuals:
                util.save_image(image_numpy,
                                os.path.join(self._opt.checkpoints_dir, self._opt.name,
                                             'event_imgs', sum_name, '%08d.png' % it))

        self._writer.export_scalars_to_json(self._tb_path)

    def plot_scalars(self, scalars, it, is_train):
        for label, scalar in scalars.items():
            sum_name = '{}/{}'.format('Train' if is_train else 'Test', label)
            self._writer.add_scalar(sum_name, scalar, it)

    def print_current_train_errors(self, epoch, i, iters_per_epoch, errors, t, visuals_were_stored):
        log_time = time.strftime("[%d/%m/%Y %H:%M:%S]")
        visuals_info = "v" if visuals_were_stored else ""
        message = '%s (T%s, epoch: %d, it: %d/%d, t/smpl: %.3fs) ' % (log_time, visuals_info, epoch, i, iters_per_epoch, t)
        for k, v in errors.items():
            message += '%s:%.3f ' % (k, v)

        print(message)
        with open(self._log_path, "a") as log_file:
            log_file.write('%s\n' % message)

    def print_current_validate_errors(self, epoch, errors, t):
        log_time = time.strftime("[%d/%m/%Y %H:%M:%S]")
        message = '%s (V, epoch: %d, time_to_val: %ds) ' % (log_time, epoch, t)
        for k, v in errors.items():
            message += '%s:%.3f ' % (k, v)

        print(message)
        with open(self._log_path, "a") as log_file:
            log_file.write('%s\n' % message)

    def save_images(self, visuals):
        for label, image_numpy in visuals.items():
            image_name = '%s.png' % label
            save_path = os.path.join(self._save_path, "samples", image_name)
            util.save_image(image_numpy, save_path)
Beispiel #10
0
 def test_global_step(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock()
         with summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
             SummaryWriterContext.increase_global_step()
             SummaryWriterContext.add_scalar("test", torch.zeros(1))
         writer.add_scalar.assert_has_calls(
             [
                 call("test", torch.ones(1), global_step=0),
                 call("test", torch.zeros(1), global_step=1),
             ]
         )
         self.assertEqual(2, len(writer.add_scalar.mock_calls))
Beispiel #11
0
    def setup(self):
        """Setups TensorBoard logger."""

        def replace_loggers():
            # Replace all log_* methods with dummy _nop
            self.log_metrics = self._nop
            self.log_scalar = self._nop
            self.log_activations = self._nop
            self.log_gradients = self._nop

        # No log_dir given, bail out
        if not self.log_dir:
            replace_loggers()
            return

        # Detect tensorboard
        try:
            from tensorboardX import SummaryWriter
        except ImportError as ie:
            replace_loggers()
            return
        else:
            self.available = True

            # Construct full folder path
            self.log_dir = pathlib.Path(self.log_dir).expanduser()
            self.log_dir = self.log_dir / self.subfolder / self.exp_id
            self.log_dir.mkdir(parents=True, exist_ok=True)

            # Set up summary writer
            self.writer = SummaryWriter(self.log_dir)
Beispiel #12
0
 def test_writing_stack(self):
     with TemporaryDirectory() as tmp_dir1, TemporaryDirectory() as tmp_dir2:
         writer1 = SummaryWriter(tmp_dir1)
         writer1.add_scalar = MagicMock()
         writer2 = SummaryWriter(tmp_dir2)
         writer2.add_scalar = MagicMock()
         with summary_writer_context(writer1):
             with summary_writer_context(writer2):
                 SummaryWriterContext.add_scalar("test2", torch.ones(1))
             SummaryWriterContext.add_scalar("test1", torch.zeros(1))
         writer1.add_scalar.assert_called_once_with(
             "test1", torch.zeros(1), global_step=0
         )
         writer2.add_scalar.assert_called_once_with(
             "test2", torch.ones(1), global_step=0
         )
Beispiel #13
0
    def __init__(self, opt):
        self.opt = opt
        print('> training arguments:')
        for arg in vars(opt):
            print('>>> {0}: {1}'.format(arg, getattr(opt, arg)))
# >>> model_name: lstm
# >>> dataset: twitter
# >>> optimizer: <class 'torch.optim.adam.Adam'>
# >>> initializer: <function xavier_uniform_ at 0x10858b510>
# >>> learning_rate: 0.001
# >>> dropout: 0
# >>> num_epoch: 20
# >>> batch_size: 128
# >>> log_step: 5
# >>> logdir: tmp_log
# >>> embed_dim: 100
# >>> hidden_dim: 200
# >>> max_seq_len: 80
# >>> polarities_dim: 3
# >>> hops: 3
# >>> device: cpu
# >>> model_class: <class 'models.lstm.LSTM'>
# >>> inputs_cols: ['text_raw_indices']

        absa_dataset = ABSADatesetReader(dataset=opt.dataset, embed_dim=opt.embed_dim, max_seq_len=opt.max_seq_len)
        self.train_data_loader = DataLoader(dataset=absa_dataset.train_data, batch_size=opt.batch_size, shuffle=True)
        self.test_data_loader = DataLoader(dataset=absa_dataset.test_data, batch_size=len(absa_dataset.test_data), shuffle=False)
        self.writer = SummaryWriter(log_dir=opt.logdir)

        self.model = opt.model_class(absa_dataset.embedding_matrix, opt).to(opt.device)  # 这里的 embedding_matrix 只存了索引对应的 vector, 没有存字典
        self.reset_parameters()
Beispiel #14
0
class TensorBoard(Callback):

    # TODO: add option to write images; find fix for graph

    def __init__(self, log_dir, update_frequency = 10):
        super(Callback, self).__init__()
        self.log_dir = log_dir
        self.writer = None
        self.update_frequency = update_frequency

    def on_train_begin(self, **_):
        self.writer = SummaryWriter(os.path.join(self.log_dir, datetime.datetime.now().__str__()))
        rndm_input = torch.autograd.Variable(torch.rand(1, *self.model.input_shape), requires_grad = True).to(self.logger['device'])
        # fwd_pass = self.model(rndm_input)
        self.writer.add_graph(self.model, rndm_input)
        return self

    def on_epoch_end(self, **_):
        if (self.logger['epoch'] % self.update_frequency) == 0:
            epoch_metrics = self.logger['epoch_metrics'][self.logger['epoch']]
            for e_metric, e_metric_dct in epoch_metrics.iteritems():
                for e_metric_split, e_metric_val in e_metric_dct.iteritems():
                    self.writer.add_scalar('{}/{}'.format(e_metric_split, e_metric), e_metric_val, self.logger['epoch'])
            for name, param in self.model.named_parameters():
                self.writer.add_histogram(name.replace('.', '/'), param.clone().cpu().data.numpy(), self.logger['epoch'])
        return self

    def on_train_end(self, **_):
        return self.writer.close()
Beispiel #15
0
 def __init__(self, log_file_dir=None):
     self.counter = 0
     self.epi_counter = 0
     self.step_counter = 0
     self.u_stats = dict()
     self.path_info = defaultdict(list)
     self.extra_info = dict()
     self.scores = []
     self.tstart = time.time()
     self.writer = SummaryWriter(log_file_dir)
Beispiel #16
0
 def run(self):
     self.writer = SummaryWriter(os.path.join(self.env.model_dir, self.env.args.run))
     while True:
         name, kwargs = self.queue.get()
         if name is None:
             break
         func = getattr(self, 'summary_' + name)
         try:
             func(**kwargs)
         except:
             traceback.print_exc()
Beispiel #17
0
 def __init__(self, learn:Learner, base_dir:Path, name:str, loss_iters:int=25, hist_iters:int=500, stats_iters:int=100):
     super().__init__(learn=learn)
     self.base_dir,self.name,self.loss_iters,self.hist_iters,self.stats_iters  = base_dir,name,loss_iters,hist_iters,stats_iters
     log_dir = base_dir/name
     self.tbwriter = SummaryWriter(log_dir=str(log_dir))
     self.hist_writer = HistogramTBWriter()
     self.stats_writer = ModelStatsTBWriter()
     self.graph_writer = GraphTBWriter()
     self.data = None
     self.metrics_root = '/metrics/'
     self._update_batches_if_needed()
 def __init__(self, gpu='0'):
     # Device configuration
     self.__device = torch.device('cuda:'+gpu if torch.cuda.is_available() else 'cpu')
     self.__writer = SummaryWriter('logs')
     self.__model = CNNDriver()
     # Set model to train mode
     self.__model.train()
     print(self.__model)
     self.__writer.add_graph(self.__model, torch.rand(10, 3, 66, 200))
     # Put model on GPU
     self.__model = self.__model.to(self.__device)
Beispiel #19
0
    def __init__(self, opt):
        self._opt = opt
        self._save_path = os.path.join(opt.checkpoints_dir, opt.name)

        self._log_path = os.path.join(self._save_path, 'loss_log2.txt')
        self._tb_path = os.path.join(self._save_path, 'summary.json')
        self._writer = SummaryWriter(self._save_path)

        with open(self._log_path, "a") as log_file:
            now = time.strftime("%c")
            log_file.write('================ Training Loss (%s) ================\n' % now)
Beispiel #20
0
    def log_to_tensorboard(self, writer: SummaryWriter, epoch: int) -> None:
        def none_to_zero(x: Optional[float]) -> float:
            if x is None or math.isnan(x):
                return 0.0
            return x

        for name, value in [
            ("Reward_CPE/Direct Method Reward", self.direct_method.normalized),
            ("Reward_CPE/IPS Reward", self.inverse_propensity.normalized),
            ("Reward_CPE/Doubly Robust Reward", self.doubly_robust.normalized),
            (
                "Value_CPE/Sequential Doubly Robust",
                self.sequential_doubly_robust.normalized,
            ),
            (
                "Value_CPE/Weighted Doubly Robust",
                self.weighted_doubly_robust.normalized,
            ),
            ("Value_CPE/MAGIC Estimator", self.magic.normalized),
        ]:
            writer.add_scalar(name, none_to_zero(value), epoch)
    def train(self, epoch_to_restore=0):
        g = Generator(self.nb_channels_first_layer, self.dim)

        if epoch_to_restore > 0:
            filename_model = os.path.join(self.dir_models, 'epoch_{}.pth'.format(epoch_to_restore))
            g.load_state_dict(torch.load(filename_model))
        else:
            g.apply(weights_init)

        g.cuda()
        g.train()

        dataset = EmbeddingsImagesDataset(self.dir_z_train, self.dir_x_train)
        dataloader = DataLoader(dataset, self.batch_size, shuffle=True, num_workers=4, pin_memory=True)
        fixed_dataloader = DataLoader(dataset, 16)
        fixed_batch = next(iter(fixed_dataloader))

        criterion = torch.nn.L1Loss()

        optimizer = optim.Adam(g.parameters())
        writer = SummaryWriter(self.dir_logs)

        try:
            epoch = epoch_to_restore
            while True:
                g.train()
                for _ in range(self.nb_epochs_to_save):
                    epoch += 1

                    for idx_batch, current_batch in enumerate(tqdm(dataloader)):
                        g.zero_grad()
                        x = Variable(current_batch['x']).type(torch.FloatTensor).cuda()
                        z = Variable(current_batch['z']).type(torch.FloatTensor).cuda()
                        g_z = g.forward(z)

                        loss = criterion(g_z, x)
                        loss.backward()
                        optimizer.step()

                    writer.add_scalar('train_loss', loss, epoch)

                z = Variable(fixed_batch['z']).type(torch.FloatTensor).cuda()
                g.eval()
                g_z = g.forward(z)
                images = make_grid(g_z.data[:16], nrow=4, normalize=True)
                writer.add_image('generations', images, epoch)
                filename = os.path.join(self.dir_models, 'epoch_{}.pth'.format(epoch))
                torch.save(g.state_dict(), filename)

        finally:
            print('[*] Closing Writer.')
            writer.close()
Beispiel #22
0
 def __init__(self, **kwargs):
     super(TensorBoardReporting, self).__init__(**kwargs)
     from tensorboardX import SummaryWriter
     # Base dir is often the dir created to save the model into
     base_dir = kwargs.get('base_dir', '.')
     log_dir = os.path.expanduser(kwargs.get('log_dir', 'runs'))
     if not os.path.isabs(log_dir):
         log_dir = os.path.join(base_dir, log_dir)
     # Run dir is the name of an individual run
     run_dir = kwargs.get('run_dir')
     pid = str(os.getpid())
     run_dir = '{}-{}'.format(run_dir, pid) if run_dir is not None else pid
     log_dir = os.path.join(log_dir, run_dir)
     flush_secs = int(kwargs.get('flush_secs', 2))
     self._log = SummaryWriter(log_dir, flush_secs=flush_secs)
Beispiel #23
0
class TensorBoard(Callback):
    def __init__(self, logdir):
        super().__init__()
        self.logdir = logdir
        self.writer = None

    def on_train_begin(self):
        os.makedirs(self.logdir, exist_ok=True)
        self.writer = SummaryWriter(self.logdir)

    def on_epoch_end(self, epoch):
        for k, v in self.metrics_collection.train_metrics.items():
            self.writer.add_scalar('train/{}'.format(k), float(v), global_step=epoch)

        for k, v in self.metrics_collection.val_metrics.items():
            self.writer.add_scalar('val/{}'.format(k), float(v), global_step=epoch)

        for idx, param_group in enumerate(self.estimator.optimizer.param_groups):
            lr = param_group['lr']
            self.writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=epoch)

    def on_train_end(self):
        self.writer.close()
Beispiel #24
0
    def __init__(self, config):
        self.config = config
        self.logger = logging.getLogger("STGAN")
        self.logger.info("Creating STGAN architecture...")

        self.G = Generator(len(self.config.attrs), self.config.g_conv_dim, self.config.g_layers, self.config.shortcut_layers, use_stu=self.config.use_stu, one_more_conv=self.config.one_more_conv)
        self.D = Discriminator(self.config.image_size, len(self.config.attrs), self.config.d_conv_dim, self.config.d_fc_dim, self.config.d_layers)

        self.data_loader = globals()['{}_loader'.format(self.config.dataset)](
            self.config.data_root, self.config.mode, self.config.attrs,
            self.config.crop_size, self.config.image_size, self.config.batch_size)

        self.current_iteration = 0
        self.cuda = torch.cuda.is_available() & self.config.cuda

        if self.cuda:
            self.device = torch.device("cuda")
            self.logger.info("Operation will be on *****GPU-CUDA***** ")
            print_cuda_statistics()
        else:
            self.device = torch.device("cpu")
            self.logger.info("Operation will be on *****CPU***** ")

        self.writer = SummaryWriter(log_dir=self.config.summary_dir)
Beispiel #25
0
    train_obs_v = torch.FloatTensor(train_obs)
    train_act_v = torch.LongTensor(train_act)
    return train_obs_v, train_act_v, reward_bound, reward_mean


if __name__ == "__main__":
    env = DiscreteOneHotWrapper(gym.make("FrozenLake-v0"))
    # env = gym.wrappers.Monitor(env, directory="mon", force=True)
    obs_size = env.observation_space.shape[0]
    n_actions = env.action_space.n

    net = Net(obs_size, HIDDEN_SIZE, n_actions)
    objective = nn.CrossEntropyLoss()
    optimizer = optim.Adam(params=net.parameters(), lr=0.01)
    writer = SummaryWriter(comment="-frozenlake-naive")

    for iter_no, batch in enumerate(iterate_batches(env, net, BATCH_SIZE)):
        obs_v, acts_v, reward_b, reward_m = filter_batch(batch, PERCENTILE)
        optimizer.zero_grad()
        action_scores_v = net(obs_v)
        loss_v = objective(action_scores_v, acts_v)
        loss_v.backward()
        optimizer.step()
        print("%d: loss=%.3f, reward_mean=%.1f, reward_bound=%.1f" %
              (iter_no, loss_v.item(), reward_m, reward_b))
        writer.add_scalar("loss", loss_v.item(), iter_no)
        writer.add_scalar("reward_bound", reward_b, iter_no)
        writer.add_scalar("reward_mean", reward_m, iter_no)
        if reward_m > 0.8:
            print("Solved!")
Beispiel #26
0
def get_pnp_solver():
    from inference.detector import CuboidPNPSolver
    from inference.cuboid import Cuboid3d
    from shared_code import load_params, get_config_options
    params = load_params()

    matrix_camera, dist_coeffs, config_detect = get_config_options(params)

    for model in params['weights']:
        pnp_solver = CuboidPNPSolver(model, matrix_camera, Cuboid3d(params['dimensions'][model]), dist_coeffs=dist_coeffs)

    return pnp_solver, config_detect


writer = SummaryWriter()
pnp_solver, config = get_pnp_solver()
for epoch in range(1, opt.epochs + 1):

    if not trainingdata is None:
        _runnetwork(epoch, trainingdata, True, writer, pnp_solver, config)

    if not opt.datatest == "":
        _runnetwork(epoch, testingdata, False, writer, pnp_solver, config)
        if opt.data == "":
            break  # lets get out of this if we are only testing
    try:
        torch.save(net.state_dict(), '{}/net_{}_{}.pth'.format(opt.outf, opt.namefile, epoch))
    except:
        pass
def main(args):
    # initialization
    print("Input arguments:")
    for key, val in vars(args).items():
        print("{:16} {}".format(key, val))

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)
    writer = SummaryWriter(logdir=os.path.join(args.log_dir, args.method))

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    cudnn.benchmark = True

    # conduct seg network
    seg_model = get_model(num_classes=args.num_classes)

    saved_state_dict = torch.load(args.restore_from)
    new_params = seg_model.state_dict().copy()

    if args.init:
        for i in saved_state_dict:
            i_parts = i.split('.')
            if not i_parts[0] == 'fc':
                new_params['encoder.' + '.'.join(i_parts[:])] = saved_state_dict[i]
        seg_model.load_state_dict(new_params)
        print('loading params w/o fc')
    else:
        seg_model.load_state_dict(saved_state_dict)
        print('loading params all')

    model = DataParallelModel(seg_model)
    model.float()
    model.cuda()

    # define dataloader
    train_loader = data.DataLoader(DataGenerator(root=args.root, list_path=args.lst,
                                                    crop_size=args.crop_size, training=True),
                                   batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = data.DataLoader(DataGenerator(root=args.val_root, list_path=args.val_lst,
                                                  crop_size=args.crop_size, training=False),
                                 batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True)

    # define criterion & optimizer
    criterion = ABRLovaszLoss(ignore_index=args.ignore_label, only_present=True, cls_p=args.num_classes, cls_h=args.hbody_cls, cls_f=args.fbody_cls)
    criterion = DataParallelCriterion(criterion).cuda()

    optimizer = optim.SGD(
        [{'params': filter(lambda p: p.requires_grad, seg_model.parameters()), 'lr': args.learning_rate}],
        lr=args.learning_rate, momentum=0.9, weight_decay=5e-4)

    # key points
    best_val_mIoU = 0
    best_val_pixAcc = 0
    start = time.time()

    for epoch in range(0, args.epochs):
        print('\n{} | {}'.format(epoch, args.epochs - 1))
        # training
        _ = train(model, train_loader, epoch, criterion, optimizer, writer)

        # validation
        if epoch %10 ==0 or epoch > args.epochs*0.8:
            val_pixacc, val_miou = validation(model, val_loader, epoch, writer)
            # save model
            if val_pixacc > best_val_pixAcc:
                best_val_pixAcc = val_pixacc
            if val_miou > best_val_mIoU:
                best_val_mIoU = val_miou
                model_dir = os.path.join(args.snapshot_dir, args.method + '_miou.pth')
                torch.save(seg_model.state_dict(), model_dir)
                print('Model saved to %s' % model_dir)

    os.rename(model_dir, os.path.join(args.snapshot_dir, args.method + '_miou'+str(best_val_mIoU)+'.pth'))
    print('Complete using', time.time() - start, 'seconds')
    print('Best pixAcc: {} | Best mIoU: {}'.format(best_val_pixAcc, best_val_mIoU))
Beispiel #28
0
 def start(self):
     self.writer = SummaryWriter(logdir=self.log_path)
def train(epoch_num,
          model,
          train_dataloader,
          dev_dataloader,
          optimizer,
          criterion,
          label_list,
          out_model_file,
          log_dir,
          print_step,
          data_type='word'):

    model.train()
    writer = SummaryWriter(log_dir=log_dir + '/' +
                           time.strftime('%H:%M:%S', time.gmtime()))

    global_step = 0
    best_dev_loss = float('inf')

    for epoch in range(int(epoch_num)):
        print(f'---------------- Epoch: {epoch+1:02} ----------')

        epoch_loss = 0
        train_steps = 0

        all_preds = np.array([], dtype=int)
        all_labels = np.array([], dtype=int)

        for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):

            optimizer.zero_grad()

            if data_type == 'word':
                logits = model(batch.text)
            elif data_type == 'highway':
                logits = model(batch.text_word, batch.text_char)

            loss = criterion(logits.view(-1, len(label_list)), batch.label)

            labels = batch.label.detach().cpu().numpy()
            preds = np.argmax(logits.detach().cpu().numpy(), axis=1)

            loss.backward()
            optimizer.step()
            global_step += 1

            epoch_loss += loss.item()
            train_steps += 1

            all_preds = np.append(all_preds, preds)
            all_labels = np.append(all_labels, labels)

            if global_step % print_step == 0:

                train_loss = epoch_loss / train_steps
                train_acc, train_report = classifiction_metric(
                    all_preds, all_labels, label_list)

                dev_loss, dev_acc, dev_report = evaluate(
                    model, dev_dataloader, criterion, label_list, data_type)
                c = global_step // print_step

                writer.add_scalar("loss/train", train_loss, c)
                writer.add_scalar("loss/dev", dev_loss, c)

                writer.add_scalar("acc/train", train_acc, c)
                writer.add_scalar("acc/dev", dev_acc, c)

                for label in label_list:
                    writer.add_scalar(label + ":" + "f1/train",
                                      train_report[label]['f1-score'], c)
                    writer.add_scalar(label + ":" + "f1/dev",
                                      dev_report[label]['f1-score'], c)

                print_list = ['macro avg', 'weighted avg']
                for label in print_list:
                    writer.add_scalar(label + ":" + "f1/train",
                                      train_report[label]['f1-score'], c)
                    writer.add_scalar(label + ":" + "f1/dev",
                                      dev_report[label]['f1-score'], c)

                if dev_loss < best_dev_loss:
                    best_dev_loss = dev_loss
                    torch.save(model.state_dict(), out_model_file)

                model.train()

    writer.close()
Beispiel #30
0
def initialize_and_train(task_config, trainset, augmentset, validset, testset,
                         uset, uset_aug, hp, run_tag):
    """The train process.

    Args:
        task_config (dictionary): the configuration of the task
        trainset (SnippextDataset): the training set
        augmentset (SnippextDataset): the augmented training set
        validset (SnippextDataset): the validation set
        testset (SnippextDataset): the testset
        uset (SnippextDataset): the unlabeled dataset
        uset_aug (SnippextDataset): the unlabeled dataset, augmented
        hp (Namespace): the parsed hyperparameters
        run_tag (string): the tag of the run (for logging purpose)

    Returns:
        None
    """
    padder = SnippextDataset.pad

    # iterators for dev/test set
    valid_iter = data.DataLoader(dataset=validset,
                                 batch_size=hp.batch_size * 4,
                                 shuffle=False,
                                 num_workers=0,
                                 collate_fn=padder)
    test_iter = data.DataLoader(dataset=testset,
                                batch_size=hp.batch_size * 4,
                                shuffle=False,
                                num_workers=0,
                                collate_fn=padder)

    # initialize model
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    if device == 'cpu':
        model = MultiTaskNet([task_config],
                             device,
                             hp.finetuning,
                             bert_path=hp.bert_path)
        optimizer = AdamW(model.parameters(), lr=hp.lr)
    else:
        model = MultiTaskNet([task_config],
                             device,
                             hp.finetuning,
                             bert_path=hp.bert_path).cuda()
        optimizer = AdamW(model.parameters(), lr=hp.lr)
        model, optimizer = amp.initialize(model, optimizer, opt_level='O2')

    # learning rate scheduler
    num_steps = (len(trainset) // hp.batch_size * 2) * hp.n_epochs
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=num_steps //
                                                10,
                                                num_training_steps=num_steps)

    # create logging
    if not os.path.exists(hp.logdir):
        os.makedirs(hp.logdir)
    writer = SummaryWriter(log_dir=hp.logdir)

    # start training
    best_dev_f1 = best_test_f1 = 0.0
    for epoch in range(1, hp.n_epochs + 1):
        train(model,
              trainset,
              augmentset,
              uset,
              uset_aug,
              optimizer,
              scheduler=scheduler,
              batch_size=hp.batch_size,
              num_aug=hp.num_aug,
              alpha=hp.alpha,
              alpha_aug=hp.alpha_aug,
              u_lambda=hp.u_lambda,
              fp16=hp.fp16)

        print(f"=========eval at epoch={epoch}=========")
        dev_f1, test_f1 = eval_on_task(epoch, model, task_config['name'],
                                       valid_iter, validset, test_iter,
                                       testset, writer, run_tag)

        if hp.save_model:
            if dev_f1 > best_dev_f1:
                best_dev_f1 = dev_f1
                torch.save(model.state_dict(), run_tag + '_dev.pt')
            if test_f1 > best_test_f1:
                best_test_f1 = test_f1
                torch.save(model.state_dict(), run_tag + '_test.pt')

    writer.close()
Beispiel #31
0
class DQN(object):
    # 每次把一个任务分配给一个虚拟机
    def __init__(self, task_dim, vms, vm_dim):
        self.task_dim = task_dim  # 任务维度
        self.vms = vms  # 虚拟机数量
        self.vm_dim = vm_dim  # 虚拟机维度

        self.s_task_dim = self.task_dim  # 任务状态维度
        self.s_vm_dim = self.vms * self.vm_dim  # 虚拟机状态维度
        self.a_dim = self.vms  # 动作空间:虚拟机的个数

        self.lr = 0.003  # learning rate
        self.batch_size = 32  # 128
        self.epsilon = 0.95
        self.epsilon_decay = 0.997
        self.epsilon_min = 0.1
        self.step = 0

        self.eval_net = QNet_v1(self.s_task_dim, self.s_vm_dim, self.a_dim)
        self.eval_net.apply(self.weights_init)
        self.target_net = QNet_v1(self.s_task_dim, self.s_vm_dim, self.a_dim)
        self.optimizer = torch.optim.Adam(self.eval_net.parameters(),
                                          lr=self.lr)

        self.hard_update(self.target_net, self.eval_net)  # 初始化为相同权重

        self.loss_f = nn.MSELoss()

        try:
            shutil.rmtree('dqn/logs/')  # 递归删除文件夹
        except:
            print("没有发现logs文件目录")
        self.writer = SummaryWriter("dqn/logs/")
        dummy_input = Variable(torch.rand(5, self.s_task_dim + self.s_vm_dim))
        with SummaryWriter(logdir="dqn/logs/graph", comment="Q_net") as w:
            w.add_graph(self.eval_net, (dummy_input))

    # 多个状态传入,给每个状态选择一个动作
    def choose_action(self, s_list):
        if self.epsilon > self.epsilon_min:  # epsilon最小值
            self.epsilon *= self.epsilon_decay
        if np.random.uniform(
        ) > self.epsilon:  # np.random.uniform()输出0到1之间的一个随机数
            self.eval_net.eval()
            actions_value = self.eval_net(torch.from_numpy(s_list).float())
            # 原始方式,直接根据最大值选择动作
            # actions = torch.max(actions_value, 1)[1].data.numpy()
            # Boltzmann动作选择策略,按概率选择动作
            actions_pro_value = torch.softmax(
                actions_value, dim=1).data.numpy()  # softmax 计算概率
            actions = []  # action 存储action值
            indexs = [i for i in range(self.a_dim)]
            for line in actions_pro_value:
                actions.append(
                    np.random.choice(indexs,
                                     p=line.ravel()).tolist())  # 根据概率选择动作
            actions = np.array(actions)
        else:
            # 范围:[low,high),随机选择,虚拟机编号1到self.vms+1,共n_actions个任务
            actions = np.random.randint(0, self.vms, size=[1, len(s_list)])[0]

        # 后面的代码增加分配VM的合理性
        adict = {}
        for i, num in enumerate(actions):
            if num not in adict:
                adict[num] = 1
            elif adict[num] > 2 and np.random.uniform(
            ) < adict[num] / 4:  # 如果VM被分配的任务个数大于2,按后面的概率随机给任务分配VM
                actions[i] = np.random.randint(self.vms)  # 范围:[0,20)
                adict[num] += 1
            else:
                adict[num] += 1
        return actions

    def learn(self):
        # 更新 Target Net
        if self.step % TARGET_REPLACE_ITER == 0:
            self.hard_update(self.target_net, self.eval_net)

        # 训练Q网络
        self.eval_net.train()
        q_eval = self.eval_net(self.bstate).gather(
            1, self.baction)  # shape (batch, 1), gather表示获取每个维度action为下标的Q值
        self.target_net.eval()
        q_next = self.target_net(self.bstate_).detach()  # 设置 Target Net 不需要梯度
        q_target = self.breward + GAMMA * q_next.max(1)[0].view(
            self.batch_size, 1)  # shape (batch, 1)
        loss = self.loss_f(q_eval, q_target)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # 画图
        if self.step % 10 == 0:
            self.writer.add_scalar('Q-value',
                                   q_eval.detach().numpy()[0], self.step)
            self.writer.add_scalar('Loss', loss.detach().numpy(), self.step)

        return loss.detach().numpy()

    def store_memory(self, state_all, action_all, reward_all):
        indexs = np.random.choice(len(state_all[:-1]), size=self.batch_size)

        self.bstate = torch.from_numpy(state_all[indexs, :]).float()
        self.bstate_ = torch.from_numpy(state_all[indexs + 1, :]).float()
        self.baction = torch.LongTensor(action_all[indexs, :])
        self.breward = torch.from_numpy(
            reward_all[indexs, :]).float()  # 奖励值值越大越好

    # 全部更新
    def hard_update(self, target_net, eval_net):
        target_net.load_state_dict(eval_net.state_dict())

    # 初始化网络参数
    def weights_init(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)
            nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm1d):  # 批归一化层初始化
            nn.init.uniform_(m.bias)  # 初始化为U(0,1)
            nn.init.constant_(m.bias, 0)
Beispiel #32
0
def main():
    args = parse_args()
    reset_config(config, args)

    logger, final_output_dir, tb_log_dir = create_logger(
        config, args.cfg, 'train')
    
    time_stamp = tb_log_dir.split('_')[-1]
    new_folder = os.path.join(final_output_dir, time_stamp)
    os.makedirs(new_folder)

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('models.'+config.MODEL.NAME+'.get_pose_net_second_deconv')(
        config, is_train=True
    ).cuda()
    second_deconv = eval('models.'+config.MODEL.NAME+'.get_second_deconv')(
        config#, pretrained='output/coco/pose_resnet_50/256x192_d256x3_adam_lr1e-3/2021-02-15-03-49/model_best.pth.tar'
    ).cuda()

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', config.MODEL.NAME + '.py'),
        final_output_dir)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # dump_input = torch.rand((config.TRAIN.BATCH_SIZE,
    #                          3,
    #                          config.MODEL.IMAGE_SIZE[1],
    #                          config.MODEL.IMAGE_SIZE[0]))
    # writer_dict['writer'].add_graph(model, (dump_input, ), verbose=False)

    gpus = [int(i) for i in config.GPUS.split(',')]
    # model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

    # define loss function (criterion) and optimizer
    criterion = JointsMSELoss(
        use_target_weight=config.LOSS.USE_TARGET_WEIGHT, use_gain_loss=False
    ).cuda()

    # optimizer = get_optimizer(config, model)
    second_deconv_optimizer = get_optimizer(config, second_deconv)

    # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
    #     optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR
    # )
    second_deconv_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        second_deconv_optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR
    )

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = eval('dataset.'+config.DATASET.DATASET)(
        config,
        config.DATASET.ROOT,
        config.DATASET.TRAIN_SET,
        True,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
    )
    valid_dataset = eval('dataset.'+config.DATASET.DATASET)(
        config,
        config.DATASET.ROOT,
        config.DATASET.TEST_SET,
        False,
        transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE*len(gpus),
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=True
    )
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.TEST.BATCH_SIZE*len(gpus),
        shuffle=False,
        num_workers=config.WORKERS,
        pin_memory=True
    )

    best_perf = 0.0
    best_model = False
    for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH):
        # lr_scheduler.step()
        second_deconv_lr_scheduler.step()
        
        # train for one epoch
        train(config, train_loader, model, second_deconv, criterion, second_deconv_optimizer, epoch,
                new_folder, tb_log_dir, writer_dict)
        
        # evaluate on validation set
        perf_indicator = validate(config, valid_loader, valid_dataset, model, second_deconv,  
                                    criterion, new_folder, tb_log_dir,
                                    writer_dict)

        if perf_indicator > best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(new_folder))
        save_checkpoint({
            'epoch': epoch + 1,
            'model': get_model_name(config),
            'state_dict': second_deconv.state_dict(),
            'perf': perf_indicator,
            'optimizer': second_deconv_optimizer.state_dict(),
        }, best_model, new_folder)

    final_model_state_file = os.path.join(new_folder,
                                          'final_state.pth.tar')
    logger.info('saving final model state to {}'.format(
        final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
parser.add_argument('--log-directory',
                    type=str,
                    default='/home/sungwonlyu/experiment/gan',
                    metavar='N',
                    help='log directory')
parser.add_argument(
    '--parameters',
    type=list,
    default=[784, 400, 10, 10],
    metavar='N',
    help='gan parameters [input_size, hidden_size, latent_size, L]')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

writer = SummaryWriter(args.log_directory + '/' + args.time_stamp + '/')

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

train_loader = dataloader.train_loader('mnist', args.data_directory,
                                       args.batch_size)

input_size, hidden_size, latent_size, L = args.parameters

if args.load_model != '000000':
    gan = torch.load(args.log_directory + '/' + args.load_model + '/gan.pt')
else:
    gan = model2.GAN()
        return self.net(x)


def calc_qvals(rewards):
    res = []
    sum_r = 0.0
    for r in reversed(rewards):
        sum_r *= GAMMA
        sum_r += r
        res.append(sum_r)
    return list(reversed(res))


if __name__ == "__main__":
    env = gym.make("CartPole-v0")
    writer = SummaryWriter(comment="-cartpole-reinforce")

    net = PGN(env.observation_space.shape[0], env.action_space.n)
    print(net)

    agent = ptan.agent.PolicyAgent(net, preprocessor=ptan.agent.float32_preprocessor,
                                   apply_softmax=True)
    exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, gamma=GAMMA)

    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

    total_rewards = []
    step_idx = 0
    done_episodes = 0

    batch_episodes = 0
Beispiel #35
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:
        def print_pass(*args):
            pass
        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch]()

    # freeze all layers but the last fc
    for name, param in model.named_parameters():
        if name not in ['fc.weight', 'fc.bias']:
            param.requires_grad = False
    # init the fc layer
    # monkey patch fix for cifar100
    if args.data == 'cifar100':
        model.fc = nn.Linear(model.fc.weight.size(1), 100)
    model.fc.weight.data.normal_(mean=0.0, std=0.01)
    model.fc.bias.data.zero_()

    # load from pre-trained, before DistributedDataParallel constructor
    if args.pretrained:
        if os.path.isfile(args.pretrained):
            print("=> loading checkpoint '{}'".format(args.pretrained))
            checkpoint = torch.load(args.pretrained, map_location="cpu")

            # rename moco pre-trained keys
            state_dict = checkpoint['state_dict']
            for k in list(state_dict.keys()):
                # retain only encoder_q up to before the embedding layer
                if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'):
                    # remove prefix
                    state_dict[k[len("module.encoder_q."):]] = state_dict[k]
                # delete renamed or unused k
                del state_dict[k]

            args.start_epoch = 0
            msg = model.load_state_dict(state_dict, strict=False)
            assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}

            print("=> loaded pre-trained model '{}'".format(args.pretrained))
        else:
            print("=> no checkpoint found at '{}'".format(args.pretrained))

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    if args.rank == 0:
        writer = SummaryWriter(logdir=args.save_dir)
    else:
        writer = None
    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    # optimize only the linear classifier
    parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
    assert len(parameters) == 2  # fc.weight, fc.bias
    optimizer = torch.optim.SGD(parameters, args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    if args.data == 'cifar10':
        if os.path.exists(f'{data_path}/cifar10'):
            traindir = os.path.join(f'{data_path}/cifar10')
        else:
            traindir = '../cifar10'
    elif args.data == 'cifar100':
        if os.path.exists(f'{data_path}/cifar100'):
            traindir = os.path.join(f'{data_path}/cifar100')
        else:
            traindir = '../cifar100'
    valdir = traindir

    if args.data == 'cifar10':
        dataset_cls = datasets.CIFAR10
        normalize = transforms.Normalize(mean=(0.4914, 0.4822, 0.4465),
                                        std=(0.2023, 0.1994, 0.2010))
    elif args.data == 'cifar100':
        dataset_cls = datasets.CIFAR100
        normalize = transforms.Normalize(mean=(0.5071, 0.4867, 0.4408),
                                        std=(0.2675, 0.2565, 0.2761))
    train_dataset = dataset_cls(
        traindir, train=True,
        transform=transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(
            train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        dataset_cls(valdir, train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args, writer)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args, writer)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args, writer, epoch)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                                                    and args.rank % ngpus_per_node == 0):
            if epoch % 10 == 0 or epoch == args.epochs - 1:
                save_checkpoint({
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best, filename='{}/checkpoint.pt'.format(args.save_dir))
            if epoch == args.start_epoch:
                sanity_check(model.state_dict(), args.pretrained)
Beispiel #36
0
    print(args)
    
    param = {'env' : args.env,
             'batch_size' : 32,
             'lr' : 0.0001,
            'GAMMA' : 0.95,
            'replay_buffer' : 500000,
             'end_eps' : 0.1,
            'exp_length' : 2000000}
    param['version'] = ", ".join([ "{}:{}".format(key,val) for key, val in param.items()]) + " "+str(datetime.datetime.now())[:16]
    print(param['version'])

    memory = utils.ReplayMemory(param['replay_buffer'])
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    eps = utils.EpsilonDecay(start_eps = 1.0, end_eps = param['end_eps'], length = param['exp_length'])
    writer = SummaryWriter(log_dir = "tensorboard/" + param['version'])
    checkpoint = utils.CheckpointIfBetter(param, device)

    env = wrap_deepmind(gym.make(param['env']), frame_stack = True)
    dqn = model.DQN(num_actions = env.action_space.n).to(device)
    target_dqn = copy.deepcopy(dqn)
    
    def dqn_epsilon_agent(state, net = dqn, th = 0.05):
        if random.random() > th:
            yhat = net(default_states_preprocessor(state))
            return int(yhat.argmax().cpu().numpy())
        else:
            return env.action_space.sample()

    optimizer = optim.Adam(dqn.parameters(), lr = param['lr'])
            n2_win += 1
        elif r > 0.5:
            n1_win += 1
    return n1_win / (n1_win + n2_win)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    parser.add_argument("--cuda", default=False, action="store_true", help="Enable CUDA")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    saves_path = os.path.join("saves", args.name)
    os.makedirs(saves_path, exist_ok=True)
    writer = SummaryWriter(comment="-" + args.name)

    net = model.Net(input_shape=model.OBS_SHAPE, actions_n=game.GAME_COLS).to(device)
    best_net = ptan.agent.TargetNet(net)
    print(net)

    optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9)

    replay_buffer = collections.deque(maxlen=REPLAY_BUFFER)
    mcts_store = mcts.MCTS()
    step_idx = 0
    best_idx = 0

    with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker:
        while True:
            t = time.time()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        self.bn = nn.BatchNorm2d(20)

    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), 2)
        x = F.relu(x) + F.relu(-x)
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = self.bn(x)
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x


dummy_input = torch.rand(13, 1, 28, 28)

model = Net1()

with SummaryWriter(log_dir='test_tensorboard') as writer:
    # for epoch in range(100):
    #     writer.add_scalar('scalar/test',np.random.rand(),epoch)
    #     writer.add_scalar('scalar/test',epoch*np.sin(epoch))
    #     writer.add_scalars('scalar/0/scalars_test',{'xsinx':epoch*np.sin(epoch)},epoch)
    #     writer.add_scalars('scalar/0/scalars_test',{'xcosx': epoch * np.cos(epoch)}, epoch)
    #     writer.add_scalars('scalar/1/scalars_test', {'xsinx': epoch * np.sin(epoch)}, epoch)
    #     writer.add_scalars('scalar/1/scalars_test', {'xcosx': epoch * np.cos(epoch)}, epoch)
    writer.add_graph(model, (dummy_input, ))
    writer.close()
    parser = argparse.ArgumentParser(description="config")
    parser.add_argument(
        "--config",
        nargs="?",
        type=str,
        help="Configuration file to use"
    )

    args = parser.parse_args()

    with open(args.config) as fp:
        cfg = yaml.load(fp)

    index = 0
    logdir = os.path.join('runs', cfg["training"]["logdir"] + "/" + str(index))
    while True:
        index += 1
        if os.path.exists(logdir):
            logdir = logdir[:-len(str(index-1))] + str(index)
        else:
            break
    writer = SummaryWriter(log_dir=logdir)

    print('RUNDIR: {}'.format(logdir))
    shutil.copy(args.config, logdir)

    logger = get_logger(logdir)
    logger.info('Let the games begin')

    train(cfg, writer, logger)
Beispiel #40
0
def train(args, return_early=False):

    writer = SummaryWriter(args.log_dir)    
    envs = utils.make_parallel_envs(args) 
    master = setup_master(args) 
    # evaluate 时用
    eval_master, eval_env = setup_master(args, return_env=True) 
    obs = envs.reset() # shape - num_processes x num_agents x obs_dim
    master.initialize_obs(obs)
    n = len(master.all_agents)
    episode_rewards = torch.zeros([args.num_processes, n], device=args.device)
    final_rewards = torch.zeros([args.num_processes, n], device=args.device)


    start = datetime.datetime.now()
    for j in range(args.num_updates):

        for step in range(args.num_steps):
            with torch.no_grad():
                actions_list = master.act(step)
            agent_actions = np.transpose(np.array(actions_list),(1,0,2))
            obs, reward, done, info = envs.step(agent_actions)
            reward = torch.from_numpy(np.stack(reward)).float().to(args.device)
            episode_rewards += reward
            masks = torch.FloatTensor(1-1.0*done).to(args.device)
            final_rewards *= masks
            final_rewards += (1 - masks) * episode_rewards
            episode_rewards *= masks

            master.update_rollout(obs, reward, masks)

        master.wrap_horizon()
        return_vals = master.update()
        value_loss = return_vals[:, 0]
        action_loss = return_vals[:, 1]
        dist_entropy = return_vals[:, 2]
        master.after_update()

        if j%args.save_interval == 0 and not args.test:
            savedict = {'models': [agent.actor_critic.state_dict() for agent in master.all_agents]}
            ob_rms = (None, None) if envs.ob_rms is None else (envs.ob_rms[0].mean, envs.ob_rms[0].var)
            savedict['ob_rms'] = ob_rms
            savedir = args.save_dir+'/ep'+str(j)+'.pt'
            torch.save(savedict, savedir)

        total_num_steps = (j + 1) * args.num_processes * args.num_steps

        if j%args.log_interval == 0:
            end = datetime.datetime.now()
            seconds = (end-start).total_seconds()
            mean_reward = final_rewards.mean(dim=0).cpu().numpy()
            print("Updates {} | Num timesteps {} | Time {} | FPS {}\nMean reward {}\nEntropy {:.4f} Value loss {:.4f} Policy loss {:.4f}\n".
                  format(j, total_num_steps, str(end-start), int(total_num_steps / seconds), 
                  mean_reward, dist_entropy[0], value_loss[0], action_loss[0]))
            if not args.test:
                for idx in range(n):
                    writer.add_scalar('agent'+str(idx)+'/training_reward', mean_reward[idx], j)

                writer.add_scalar('all/value_loss', value_loss[0], j)
                writer.add_scalar('all/action_loss', action_loss[0], j)
                writer.add_scalar('all/dist_entropy', dist_entropy[0], j)

        if args.eval_interval is not None and j%args.eval_interval==0:
            ob_rms = (None, None) if envs.ob_rms is None else (envs.ob_rms[0].mean, envs.ob_rms[0].var)
            print('===========================================================================================')
            _, eval_perstep_rewards, final_min_dists, num_success, eval_episode_len = evaluate(args, None, master.all_policies,
                                                                                               ob_rms=ob_rms, env=eval_env,
                                                                                               master=eval_master)
            print('Evaluation {:d} | Mean per-step reward {:.2f}'.format(j//args.eval_interval, eval_perstep_rewards.mean()))
            print('Num success {:d}/{:d} | Episode Length {:.2f}'.format(num_success, args.num_eval_episodes, eval_episode_len))
            if final_min_dists:
                print('Final_dists_mean {}'.format(np.stack(final_min_dists).mean(0)))
                print('Final_dists_var {}'.format(np.stack(final_min_dists).var(0)))
            print('===========================================================================================\n')

            if not args.test:
                writer.add_scalar('all/eval_success', 100.0*num_success/args.num_eval_episodes, j)
                writer.add_scalar('all/episode_length', eval_episode_len, j)
                for idx in range(n):
                    writer.add_scalar('agent'+str(idx)+'/eval_per_step_reward', eval_perstep_rewards.mean(0)[idx], j)
                    if final_min_dists:
                        writer.add_scalar('agent'+str(idx)+'/eval_min_dist', np.stack(final_min_dists).mean(0)[idx], j)

            curriculum_success_thres = 0.9
            if return_early and num_success*1./args.num_eval_episodes > curriculum_success_thres:
                savedict = {'models': [agent.actor_critic.state_dict() for agent in master.all_agents]}
                ob_rms = (None, None) if envs.ob_rms is None else (envs.ob_rms[0].mean, envs.ob_rms[0].var)
                savedict['ob_rms'] = ob_rms
                savedir = args.save_dir+'/ep'+str(j)+'.pt'
                torch.save(savedict, savedir)
                print('===========================================================================================\n')
                print('{} agents: training complete. Breaking.\n'.format(args.num_agents))
                print('===========================================================================================\n')
                break

    writer.close()
    if return_early:
        return savedir
Beispiel #41
0
                        type=int,
                        default=256,
                        help='The number of RNN units in the encoder. 2x this '
                        'number of RNN units will be used in the decoder')

    parser.add_argument('--embedding_size',
                        type=int,
                        default=128,
                        help='Embedding size used in both encoder and decoder')

    parser.add_argument(
        '--max_length',
        type=int,
        default=200,
        help='Sequences will be padded or truncated to this size.')

    args = parser.parse_args()

    writer = SummaryWriter('./logs/%s_%s' %
                           (args.model_name, str(int(time.time()))))
    if args.scheduled_teacher_forcing:
        schedule = np.arange(1.0, 0.0, -1.0 / args.epochs)
    else:
        schedule = np.ones(args.epochs) * args.teacher_forcing_fraction

    main(args.model_name, args.use_cuda, args.batch_size, schedule,
         args.keep_prob, args.val_size, args.lr, args.decoder_type,
         args.vocab_limit, args.hidden_size, args.embedding_size,
         args.max_length)
    # main(str(int(time.time())), args.use_cuda, args.batch_size, schedule, args.keep_prob, args.val_size, args.lr, args.decoder_type, args.vocab_limit, args.hidden_size, args.embedding_size, args.max_length)
    pass


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda")
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    parser.add_argument("-m", "--model", required=True, help="File with model to load")
    args = parser.parse_args()

    saves_path = os.path.join("saves", "02_env_" + args.name)
    os.makedirs(saves_path, exist_ok=True)

    envs = [common.make_env() for _ in range(NUM_ENVS)]
    writer = SummaryWriter(comment="-02_env_" + args.name)

    net = common.AtariA2C(envs[0].observation_space.shape, envs[0].action_space.n)
    net_em = i2a.EnvironmentModel(envs[0].observation_space.shape, envs[0].action_space.n)
    net.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage))
    if args.cuda:
        net.cuda()
        net_em.cuda()
    print(net_em)
    optimizer = optim.Adam(net_em.parameters(), lr=LEARNING_RATE)

    step_idx = 0
    best_loss = np.inf
    with ptan.common.utils.TBMeanTracker(writer, batch_size=100) as tb_tracker:
        for mb_obs, mb_obs_next, mb_actions, mb_rewards, done_rewards, done_steps in iterate_batches(envs, net, cuda=args.cuda):
            if len(done_rewards) > 0:
Beispiel #43
0
    def train(self,
              train_dataset,
              output_dir,
              show_running_loss=True,
              eval_df=None):
        """
        Trains the model on train_dataset.

        Utility function to be used by the train_model() method. Not intended to be used directly.
        """

        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args

        tb_writer = SummaryWriter()
        train_sampler = RandomSampler(train_dataset)
        train_dataloader = DataLoader(train_dataset,
                                      sampler=train_sampler,
                                      batch_size=args["train_batch_size"])

        t_total = len(train_dataloader) // args[
            "gradient_accumulation_steps"] * args["num_train_epochs"]

        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [{
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args["weight_decay"]
        }, {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        }]

        warmup_steps = math.ceil(t_total * args["warmup_ratio"])
        args["warmup_steps"] = warmup_steps if args[
            "warmup_steps"] == 0 else args["warmup_steps"]

        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=args["learning_rate"],
                          eps=args["adam_epsilon"])
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=args["warmup_steps"],
            num_training_steps=t_total)

        if args["fp16"]:
            try:
                from apex import amp
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
                )

            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args["fp16_opt_level"])

        if args["n_gpu"] > 1:
            model = torch.nn.DataParallel(model)

        global_step = 0
        tr_loss, logging_loss = 0.0, 0.0
        model.zero_grad()
        train_iterator = trange(int(args["num_train_epochs"]),
                                desc="Epoch",
                                disable=args['silent'])

        model.train()
        for _ in train_iterator:
            # epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(
                    tqdm(train_dataloader,
                         desc="Current iteration",
                         disable=args['silent'])):
                batch = tuple(t.to(self.device) for t in batch)

                inputs = self._get_inputs_dict(batch)
                if self.sliding_window:
                    outputs = model(inputs)
                else:
                    outputs = model(**inputs)
                # model outputs are always tuple in pytorch-transformers (see doc)
                loss = outputs[0]
                if show_running_loss:
                    print("\rRunning loss: %f" % loss, end="")

                if args['n_gpu'] > 1:
                    loss = loss.mean(
                    )  # mean() to average on multi-gpu parallel training
                if args["gradient_accumulation_steps"] > 1:
                    loss = loss / args["gradient_accumulation_steps"]

                if args["fp16"]:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args["max_grad_norm"])
                else:
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args["max_grad_norm"])

                tr_loss += loss.item()
                if (step + 1) % args["gradient_accumulation_steps"] == 0:
                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    model.zero_grad()
                    global_step += 1

                    if args["logging_steps"] > 0 and global_step % args[
                            "logging_steps"] == 0:
                        # Log metrics
                        if args['evaluate_during_training']:
                            # Only evaluate when single GPU otherwise metrics may not average well
                            results, _, _ = self.eval_model(eval_df,
                                                            verbose=True)
                            for key, value in results.items():
                                tb_writer.add_scalar('eval_{}'.format(key),
                                                     value, global_step)
                        tb_writer.add_scalar("lr",
                                             scheduler.get_lr()[0],
                                             global_step)
                        tb_writer.add_scalar("loss", (tr_loss - logging_loss) /
                                             args["logging_steps"],
                                             global_step)
                        logging_loss = tr_loss

                    if args["save_steps"] > 0 and global_step % args[
                            "save_steps"] == 0:
                        # Save model checkpoint
                        output_dir_current = os.path.join(
                            output_dir, "checkpoint-{}".format(global_step))

                        if not os.path.exists(output_dir_current):
                            os.makedirs(output_dir_current)

                        # Take care of distributed/parallel training
                        model_to_save = model.module if hasattr(
                            model, "module") else model
                        model_to_save.save_pretrained(output_dir_current)
                        self.tokenizer.save_pretrained(output_dir_current)

        return global_step, tr_loss / global_step
  model = nn.DataParallel(model)
#load trained model if needed
#model.load_state_dict(torch.load('/workspace/1.pth'))
print('Model created.')

epochs=50
lr=0.0001
batch_size=64

depth_dataset = DepthDataset(traincsv=traincsv, root_dir='./content/',
                transform=transforms.Compose([Augmentation(0.5),ToTensor()]))
train_loader=DataLoader(depth_dataset, batch_size, shuffle=True)
l1_criterion = nn.L1Loss()

optimizer = torch.optim.Adam( model.parameters(), lr )
writer = SummaryWriter()
# Start training...
for epoch in range(epochs):
    batch_time = AverageMeter()
    losses = AverageMeter()
    N = len(train_loader)

    # Switch to train mode
    model.train()

    end = time.time()

    for i, sample_batched in enumerate(train_loader):
        optimizer.zero_grad()

        #Prepare sample and target
def train(args, train_dataset, model, tokenizer):
    """ Train the model """
    if args.local_rank in [-1, 0]:
        tb_writer = SummaryWriter()

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
        model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
                                                          output_device=args.local_rank,
                                                          find_unused_parameters=True)

    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size)
    logger.info("  Total train batch size (w. parallel, distributed & accumulation) = %d",
                   args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1))
    logger.info("  Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
    set_seed(args)  # Added here for reproducibility (even between python 2 and 3)
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, (batch,lengths) in enumerate(epoch_iterator):
            inputs, labels = mask_tokens(batch, tokenizer, args) if args.mlm else (batch, batch)
            inputs = inputs.to(args.device)
            labels = labels.to(args.device)
            model.train()
            max_len = len(inputs[0])   
            mask = (torch.arange(max_len).expand(len(lengths), max_len) < lengths.unsqueeze(1)).long().to(args.device)
            #print(inputs)
          #  print(mask)
            outputs = model(inputs, masked_lm_labels=labels) if args.mlm else model(inputs,mask, labels=labels)
            loss = outputs[0]  # model outputs are always tuple in pytorch-transformers (see doc)
           # print(outputs)
            if args.n_gpu > 1:
                loss = loss.mean()  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    # Log metrics
                    if args.local_rank == -1 and args.evaluate_during_training:  # Only evaluate when single GPU otherwise metrics may not average well
                        results = evaluate(args, model, tokenizer)
                        for key, value in results.items():
                            tb_writer.add_scalar('eval_{}'.format(key), value, global_step)
                    tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step)
                    tb_writer.add_scalar('loss', (tr_loss - logging_loss)/args.logging_steps, global_step)
                    logging_loss = tr_loss

                if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
                    # Save model checkpoint
                    output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
                    model_to_save.save_pretrained(output_dir)
                    torch.save(args, os.path.join(output_dir, 'training_args.bin'))
                    logger.info("Saving model checkpoint to %s", output_dir)

            if args.max_steps > 0 and global_step > args.max_steps:
                epoch_iterator.close()
                break
        if args.max_steps > 0 and global_step > args.max_steps:
            train_iterator.close()
            break

    if args.local_rank in [-1, 0]:
        tb_writer.close()

    return global_step, tr_loss / global_step
def train(model, train_loader, val_loader, optimizer, num_epochs,
          path_to_save_best_weights):
    model.train()

    log_softmax = nn.LogSoftmax(dim=1)  # Use for NLLLoss()
    softmax = nn.Softmax(dim=1)

    # weights = [1.0,1.0,1.0,1.0,1.0, 0.0]
    # class_weights = torch.FloatTensor(weights).to(device)
    criterion_nlloss = nn.NLLLoss()  #(weight=class_weights)
    metrics_evaluator = PerformanceMetricsEvaluator()

    to_tensor = transforms.ToTensor()

    writer = SummaryWriter('runs/unet')

    since = time.time()

    best_model_weights = model.state_dict()
    best_IoU = 0.0
    best_val_loss = 1000000000

    curr_val_loss = 0.0
    curr_training_loss = 0.0
    curr_training_IoU = 0.0
    curr_val_IoU = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:

            if phase == 'train':
                # scheduler.step(best_val_loss)
                model.train()
                data_loader = train_loader
            else:
                model.eval()
                data_loader = val_loader

            running_loss = 0.0
            running_IoU = 0

            # Iterate over data.
            ind = 0
            for imgs, masks in tqdm(data_loader):
                imgs = imgs.to(device)
                masks = masks.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                logits = model(imgs)
                log_softmax_logits = log_softmax(logits)
                loss = criterion_nlloss(log_softmax_logits, masks)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                # ================================================================== #
                #                        Tensorboard Logging                         #
                # ================================================================== #

                unet_softmax_collupsed = softmax(logits)
                unet_softmax_collupsed = np.argmax(
                    unet_softmax_collupsed.detach().cpu(), axis=1)

                if ind % 10 == 0:
                    if phase == 'val':
                        img_name = 'ValidationEpoch: {}'.format(str(epoch))
                    else:
                        img_name = 'TrainingEpoch: {}'.format(str(epoch))

                    rgb_prediction = unet_softmax_collupsed.repeat(3, 1,
                                                                   1).float()
                    rgb_prediction = np.moveaxis(rgb_prediction.numpy(), 0, -1)
                    converted_img = img_to_visible(rgb_prediction)
                    converted_img = torch.unsqueeze(to_tensor(converted_img),
                                                    0)
                    # converted_img = np.moveaxis(converted_img, -1, 0)
                    masks_changed = masks.detach().cpu()
                    masks_changed = masks_changed.repeat(3, 1, 1).float()
                    masks_changed = np.moveaxis(masks_changed.numpy(), 0, -1)
                    masks_changed = img_to_visible(masks_changed)
                    masks_changed = torch.unsqueeze(to_tensor(masks_changed),
                                                    0)

                    # print(np.unique(converted_img, return_counts=True))
                    third_tensor = torch.cat(
                        (converted_img, imgs.detach().cpu(), masks_changed),
                        -1)
                    writer.add_image(
                        img_name,
                        # vutils.make_grid([
                        # imgs.detach().cpu(),
                        # rgb_prediction
                        third_tensor,
                        # ]),
                        epoch)

                # statistics
                running_loss += loss.detach().item()
                running_IoU += metrics_evaluator.mean_IU(
                    unet_softmax_collupsed.numpy()[0],
                    masks.cpu().numpy()[0])
                ind += 1
            epoch_loss = running_loss / len(data_loader)
            epoch_IoU = running_IoU / len(data_loader)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_IoU))

            # deep copy the model
            if phase == 'val' and epoch_loss < best_val_loss:  # TODO add IoU
                best_val_loss = epoch_loss
                best_IoU = epoch_IoU
                best_model_weights = model.state_dict()

            if phase == 'val':
                # print(optimizer.param_groups[0]['lr'])
                curr_val_loss = epoch_loss
                curr_val_IoU = epoch_IoU
            else:
                curr_training_loss = epoch_loss
                curr_training_IoU = epoch_IoU

        writer.add_scalars('TrainValIoU', {
            'trainIoU': curr_training_IoU,
            'validationIoU': curr_val_IoU
        }, epoch)
        writer.add_scalars('TrainValLoss', {
            'trainLoss': curr_training_loss,
            'validationLoss': curr_val_loss
        }, epoch)
    # Saving best model
    torch.save(
        best_model_weights,
        os.path.join(path_to_save_best_weights,
                     'unet{:2f}.pth'.format(best_val_loss)))

    # Show the timing and final statistics
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Loss: {:4f}'.format(best_val_loss))  # TODO add IoU
Beispiel #47
0
        weight_decay=_C.OPTIM.WEIGHT_DECAY,
    )
    lr_scheduler = optim.lr_scheduler.LambdaLR(  # type: ignore
        optimizer,
        lr_lambda=lambda iteration: 1 - iteration / _C.OPTIM.NUM_ITERATIONS)
    # COS scheduler
    # lr_scheduler = optim.lr_scheduler.LambdaLR(  # type: ignore
    #     optimizer, lr_lambda=lambda iteration: np.cos(iteration / _C.OPTIM.NUM_ITERATIONS * np.pi / 2)
    # )

    # --------------------------------------------------------------------------------------------
    #  BEFORE TRAINING STARTS
    # --------------------------------------------------------------------------------------------

    # Tensorboard summary writer for logging losses and metrics.
    tensorboard_writer = SummaryWriter(logdir=_A.serialization_dir)

    # Checkpoint manager to serialize checkpoints periodically while training and keep track of
    # best performing checkpoint.
    checkpoint_manager = CheckpointManager(model,
                                           optimizer,
                                           _A.serialization_dir,
                                           mode="max")

    # Evaluator submits predictions to EvalAI and retrieves results.
    evaluator = NocapsEvaluator(phase="val")

    # Load checkpoint to resume training from there if specified.
    # Infer iteration number through file name (it's hacky but very simple), so don't rename
    # saved checkpoints if you intend to continue training.
    if _A.start_from_checkpoint != "":
Beispiel #48
0
from torch.backends import cudnn
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader

import utils.transforms as extended_transforms
from datasets import voc
from models import *
from utils import check_mkdir, evaluate, AverageMeter, CrossEntropyLoss2d

sys.path.append("../..")

cudnn.benchmark = True

ckpt_path = '../../ckpt'
exp_name = 'voc-fcn8s'
writer = SummaryWriter(os.path.join(ckpt_path, 'exp', exp_name))

args = {
    'epoch_num': 300,
    'lr': 1e-10,
    'weight_decay': 1e-4,
    'momentum': 0.95,
    'lr_patience': 100,  # large patience denotes fixed lr
    'snapshot': '',  # empty string denotes learning from scratch
    'print_freq': 20,
    'val_save_to_img_file': False,
    'val_img_sample_rate':
    0.1  # randomly sample some validation results to display
}

Beispiel #49
0
class Logger:
    def __init__(self, model_name, data_name):
        self.model_name = model_name
        self.data_name = data_name

        self.comment = '{}_{}'.format(model_name, data_name)
        self.data_subdir = '{}/{}'.format(model_name, data_name)

        # TensorBoard
        self.writer = SummaryWriter(comment=self.comment)

    def log(self, d_error, g_error, epoch, n_batch, num_batches):

        # var_class = torch.autograd.variable.Variable
        if isinstance(d_error, torch.autograd.Variable):
            d_error = d_error.data.cpu().numpy()
        if isinstance(g_error, torch.autograd.Variable):
            g_error = g_error.data.cpu().numpy()

        step = Logger._step(epoch, n_batch, num_batches)
        self.writer.add_scalar('{}/D_error'.format(self.comment), d_error,
                               step)
        self.writer.add_scalar('{}/G_error'.format(self.comment), g_error,
                               step)

    def log_images(self,
                   images,
                   num_images,
                   epoch,
                   n_batch,
                   num_batches,
                   format='NCHW',
                   normalize=True):
        '''
        input images are expected in format (NCHW)
        '''
        if type(images) == np.ndarray:
            images = torch.from_numpy(images)

        if format == 'NHWC':
            images = images.transpose(1, 3)

        step = Logger._step(epoch, n_batch, num_batches)
        img_name = '{}/images{}'.format(self.comment, '')

        # Make horizontal grid from image tensor
        horizontal_grid = vutils.make_grid(images,
                                           normalize=normalize,
                                           scale_each=True)
        # Make vertical grid from image tensor
        nrows = int(np.sqrt(num_images))
        grid = vutils.make_grid(images,
                                nrow=nrows,
                                normalize=True,
                                scale_each=True)

        # Add horizontal images to tensorboard
        self.writer.add_image(img_name, horizontal_grid, step)

        # Save plots
        self.save_torch_images(horizontal_grid, grid, epoch, n_batch)

    def save_torch_images(self,
                          horizontal_grid,
                          grid,
                          epoch,
                          n_batch,
                          plot_horizontal=True):
        out_dir = './data/images/{}'.format(self.data_subdir)
        Logger._make_dir(out_dir)

        # Plot and save horizontal
        fig = plt.figure(figsize=(16, 16))
        plt.imshow(np.moveaxis(horizontal_grid.numpy(), 0, -1))
        plt.axis('off')
        if plot_horizontal:
            display.display(plt.gcf())
        self._save_images(fig, epoch, n_batch, 'hori')
        plt.close()

        # Save squared
        fig = plt.figure()
        plt.imshow(np.moveaxis(grid.numpy(), 0, -1))
        plt.axis('off')
        self._save_images(fig, epoch, n_batch)
        plt.close()

    def _save_images(self, fig, epoch, n_batch, comment=''):
        out_dir = './data/images/{}'.format(self.data_subdir)
        Logger._make_dir(out_dir)
        fig.savefig('{}/{}_epoch_{}_batch_{}.png'.format(
            out_dir, comment, epoch, n_batch))

    def display_status(self, epoch, num_epochs, n_batch, num_batches, d_error,
                       g_error, d_pred_real, d_pred_fake):

        # var_class = torch.autograd.variable.Variable
        if isinstance(d_error, torch.autograd.Variable):
            d_error = d_error.data.cpu().numpy()
        if isinstance(g_error, torch.autograd.Variable):
            g_error = g_error.data.cpu().numpy()
        if isinstance(d_pred_real, torch.autograd.Variable):
            d_pred_real = d_pred_real.data
        if isinstance(d_pred_fake, torch.autograd.Variable):
            d_pred_fake = d_pred_fake.data

        print('Epoch: [{}/{}], Batch Num: [{}/{}]'.format(
            epoch, num_epochs, n_batch, num_batches))
        print('Discriminator Loss: {:.4f}, Generator Loss: {:.4f}'.format(
            d_error, g_error))
        print('D(x): {:.4f}, D(G(z)): {:.4f}'.format(d_pred_real.mean(),
                                                     d_pred_fake.mean()))

    def save_models(self, generator, discriminator, epoch):
        out_dir = './data/models/{}'.format(self.data_subdir)
        Logger._make_dir(out_dir)
        torch.save(generator.state_dict(),
                   '{}/G_epoch_{}'.format(out_dir, epoch))
        torch.save(discriminator.state_dict(),
                   '{}/D_epoch_{}'.format(out_dir, epoch))

    def close(self):
        self.writer.close()

    # Private Functionality

    @staticmethod
    def _step(epoch, n_batch, num_batches):
        return epoch * num_batches + n_batch

    @staticmethod
    def _make_dir(directory):
        try:
            os.makedirs(directory)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
Beispiel #50
0

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda",
                        default=False,
                        action="store_true",
                        help="Enable cuda")
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    make_env = lambda: ptan.common.wrappers.wrap_dqn(
        gym.make("PongNoFrameskip-v4"))
    envs = [make_env() for _ in range(NUM_ENVS)]
    writer = SummaryWriter(comment="-pong-a2c_" + args.name)

    net = AtariA2C(envs[0].observation_space.shape,
                   envs[0].action_space.n).to(device)
    print(net)

    agent = ptan.agent.PolicyAgent(lambda x: net(x)[0],
                                   apply_softmax=True,
                                   device=device)
    exp_source = ptan.experience.ExperienceSourceFirstLast(
        envs, agent, gamma=GAMMA, steps_count=REWARD_STEPS)

    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3)

    batch = []
Beispiel #51
0
def repeat_eval_ckpt(root_result_dir, ckpt_dir):
    root_result_dir = os.path.join(root_result_dir, 'eval',
                                   'eval_all_' + args.extra_tag)
    os.makedirs(root_result_dir, exist_ok=True)

    log_file = os.path.join(root_result_dir,
                            'log_eval_all_%s.txt' % cfg.TEST.SPLIT)
    logger = create_logger(log_file)
    logger.info('**********************Start logging**********************')

    # save config
    for key, val in vars(args).items():
        logger.info("{:16} {}".format(key, val))
    save_config_to_file(cfg, logger=logger)

    # create dataloader & network
    test_loader = create_dataloader(logger)
    model = PointRCNN(num_classes=test_loader.dataset.num_class,
                      use_xyz=True,
                      mode='TEST')
    model.cuda()

    # copy important files to backup
    backup_dir = os.path.join(root_result_dir, 'backup_files')
    os.makedirs(backup_dir, exist_ok=True)
    os.system('cp *.py %s/' % backup_dir)
    os.system('cp ../lib/net/*.py %s/' % backup_dir)
    os.system('cp ../lib/datasets/kitti_rcnn_dataset.py %s/' % backup_dir)

    # evaluated ckpt record
    ckpt_record_file = os.path.join(root_result_dir,
                                    'eval_list_%s.txt' % cfg.TEST.SPLIT)
    with open(ckpt_record_file, 'a'):
        pass

    # tensorboard log
    tb_log = SummaryWriter(
        log_dir=os.path.join(root_result_dir, 'tensorboard_%s' %
                             cfg.TEST.SPLIT))

    while True:
        # check whether there is checkpoint which is not evaluated
        cur_epoch_id, cur_ckpt = get_no_evaluated_ckpt(ckpt_dir,
                                                       ckpt_record_file)
        if cur_epoch_id == -1 or int(float(cur_epoch_id)) < args.start_epoch:
            wait_second = 30
            print('Wait %s second for next check: %s' %
                  (wait_second, ckpt_dir))
            time.sleep(wait_second)
            continue

        # load checkpoint
        train_utils.load_checkpoint(model, filename=cur_ckpt)

        # start evaluation
        cur_result_dir = os.path.join(root_result_dir,
                                      'epoch_%s' % cur_epoch_id,
                                      cfg.TEST.SPLIT)
        tb_dict = eval_one_epoch(model, test_loader, cur_epoch_id,
                                 cur_result_dir, logger)

        step = int(float(cur_epoch_id))
        if step == float(cur_epoch_id):
            for key, val in tb_dict.items():
                tb_log.add_scalar(key, val, step)

        # record this epoch which has been evaluated
        with open(ckpt_record_file, 'a') as f:
            print('%s' % cur_epoch_id, file=f)
        logger.info('Epoch %s has been evaluated' % cur_epoch_id)
Beispiel #52
0
from options import read_options
import numpy as np
import torch
import random
from evaluator import evaluate_synonym_ranking_prediction
from losses import TripletLoss
from tensorboardX import SummaryWriter
from test_semantic_classes import obtain_semantic_classes
from predictor import pair_prediction, pair_prediction_with_pair_feature


if __name__ == '__main__':
    args = read_options()

    # Add TensorBoard Writer
    writer = SummaryWriter(log_dir=None, comment=args.comment)

    # Initialize random seed
    random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)
    if args.device_id != -1:
        torch.cuda.manual_seed_all(args.random_seed)
        torch.backends.cudnn.deterministic = True
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    else:
        torch.set_default_tensor_type(torch.FloatTensor)
    torch.set_printoptions(precision=9)
    torch.set_num_threads(1)

    # Load command line options
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar  4 14:48:00 2019

@author: ben
"""

import math
from tensorboardX import SummaryWriter

import torch.nn as nn
import torch
import numpy as np

writer = SummaryWriter()

#Visualise some functions

funcs = {"sin": math.sin, "cos": math.cos, "tan": math.tan}
for angle in range(-360, 360):
        angle_rad = angle * math.pi / 180
        for name, fun in funcs.items():
            val = fun(angle_rad)
            writer.add_scalar(name, val, angle)

#Visualise a model

class ANN(nn.Module):
    
    def __init__(self, in_dim, out_dim):
Beispiel #54
0
    checkpoint = torch.load('model_best_checkpoint_resnet50.pth.tar')
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()
    # ------------------------------------ step 3/4 : 定义损失函数和优化器等 -------------------------
    lr_init = 0.0001
    lr_stepsize = 20
    weight_decay = 0.001
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=lr_init,
                           weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=lr_stepsize,
                                                gamma=0.1)

    writer = SummaryWriter('runs/resnet50')
    # ------------------------------------ step 4/4 : 训练 -----------------------------------------
    best_prec1 = 0
    for epoch in range(epochs):
        scheduler.step()
        train(train_loader, model, criterion, optimizer, epoch, writer)
        # 在验证集上测试效果
        valid_prec1, valid_prec5 = validate(valid_loader,
                                            model,
                                            criterion,
                                            epoch,
                                            writer,
                                            phase="VAL")
        is_best = valid_prec1 > best_prec1
        best_prec1 = max(valid_prec1, best_prec1)
        save_checkpoint(
Beispiel #55
0
# from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
# from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
from evaluator import Evaluator
from ddpg import DDPG
from util import *
from tensorboardX import SummaryWriter
from observation_processor import queue
from multi import fastenv

# from llll import Subprocess

gym.undo_logger_setup()

import time

writer = SummaryWriter()

def train(num_iterations, agent, env, evaluate, bullet):
    fenv = fastenv(env, args.action_repeat, args.vis, args.atari)
    window_length = args.window_length
    validate_interval = args.validate_interval
    save_interval = args.save_interval
    max_episode_length = args.max_episode_length // args.action_repeat
    debug = args.debug
    visualize = args.vis
    traintimes = args.traintimes
    output = args.output
    resume = args.resume
    validate_episodes = args.validate_episodes

    if resume is not None:
Beispiel #56
0
class H2C(nn.Module):
    def __init__(self, nfeat, nhid, nout, dropout, alpha, nheads, cuda,
                 learning_rate_H, learning_rate_L, momentum, gamma):
        super(H2C, self).__init__()

        self.HigherActor, self.HigherCriticor = HigherActionModule(
            cuda), HigherActionModule(cuda)
        self.LowerActor = LowerActionModule(nfeat, nhid, nout, dropout, alpha,
                                            nheads, cuda)

        self.learning_rate_H = learning_rate_H
        self.learning_rate_L = learning_rate_L
        self.alpha = 1
        self.lamda = 2
        self.momentum = momentum
        self.gamma = gamma
        self.explore = 0.8  #initial exploration rate

        self.optimizer_H = optim.Adam(self.HigherActor.parameters(),
                                      self.learning_rate_H,
                                      betas=[0.9, 0.999])
        self.optimizer_L = optim.SGD(self.LowerActor.parameters(),
                                     self.learning_rate_L, self.momentum)

        self.loss_func_L = nn.CosineEmbeddingLoss()
        self.loss_func_H = nn.MSELoss()

        self.update_count_H = 0
        self.update_count_L = 0
        #record initialize
        path = "./logging/400_500_12/"
        self.writer_L = SummaryWriter(
            path + 'lower/loss/' +
            time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime(time.time())))
        self.writer_H = SummaryWriter(
            path + 'higher/loss/' +
            time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime(time.time())))
        self.writer_WR = SummaryWriter(
            path + 'reward/whole/' +
            time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime(time.time())))
        self.writer_R = SummaryWriter(
            path + 'reward/step/' +
            time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime(time.time())))
        self.writer_RA = SummaryWriter(
            path + 'reward/idle/' +
            time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime(time.time())))

    def load_model(self, model_path):

        if os.path.exists(model_path):
            print("load model!")
            check_point = torch.load(model_path)
            self.HigherActor.load_state_dict(check_point['HigherActor'])
            self.LowerActor.load_state_dict(check_point['LowerActor'])
            self.HigherCriticor.load_state_dict(check_point['HigherCriticor'])
            self.HigherActor.chosen_goals = check_point['chosen_goals']

    def save_model(self, model_path):

        if not os.path.exists("./trained_model/"):
            os.mkdir("./trained_model/")
        check_point = {
            'HigherActor': self.HigherActor.state_dict(),
            'LowerActor': self.LowerActor.state_dict(),
            'HigherCriticor': self.HigherCriticor.state_dict(),
            'chosen_goals': self.HigherActor.chosen_goals,
        }
        torch.save(check_point, model_path)

    def reshape_input(self, inputs):

        results = []
        for node in inputs.keys():
            results.append(inputs[node])

        return results

    def state_reshape(self, state):
        # reshape private state into same dimension
        state_final = []
        batch_size = len(state)
        for batch in range(batch_size):
            state_final_one_batch = []
            state_one_batch = state[batch]
            for item in state_one_batch:
                state_node = self.reshape_for_input_step(item)
                state_final_one_batch.append(state_node)

            state_final.append(state_final_one_batch)
        return Variable(torch.FloatTensor(state_final).permute(1, 0, 2, 3, 4),
                        requires_grad=True)

    def reshape_for_input_step(self, state_private):
        amb_count = [
            len(state_private) if len(state_private) < NUM_AMBS else NUM_AMBS
        ][0]
        final_state = np.zeros((NUM_AMBS, 10))
        if amb_count != 0:
            final_state[:amb_count, :] = state_private[:amb_count]
        else:
            pass
        final_state = final_state.reshape((1, NUM_AMBS, 10))
        return final_state

    def select_higher_action(self, state_now, bufferhigher, t, goal, flag):
        #select the optimal distribution state as higher action from replay buffer in training process.
        #two pattern could be chosen select.For online training, select the goal not the state.
        update_count = bufferhigher.counts()
        if bufferhigher.counts() == 0:
            return goal
        else:
            state, reward, new_state, done, T, next_T, real_goal = bufferhigher.getBatch(
                update_count)

            states, values = self.HigherActor(
                Variable(
                    torch.cat([
                        torch.FloatTensor(state_now).view(-1, NUM_NODES),
                        torch.FloatTensor(t).view(-1, NUM_TIMESTEP)
                    ],
                              dim=1).repeat(
                                  torch.FloatTensor(real_goal).shape[0],
                                  1).cuda().view(-1, NUM_NODES + NUM_TIMESTEP,
                                                 1)),
                Variable(
                    torch.FloatTensor(real_goal).cuda().view(-1, NUM_NODES,
                                                             1)))
            states2, values2 = self.HigherActor(
                Variable(
                    torch.cat([
                        torch.FloatTensor(state_now).view(-1, NUM_NODES),
                        torch.FloatTensor(t).view(-1, NUM_TIMESTEP)
                    ],
                              dim=1).repeat(
                                  torch.FloatTensor(state).shape[0],
                                  1).cuda().view(-1, NUM_NODES + NUM_TIMESTEP,
                                                 1)),
                Variable(
                    torch.FloatTensor(state).cuda().view(-1, NUM_NODES, 1)))
            max_index = values.max(0)[1].data[0].cpu().numpy().tolist()
            max_index_2 = values2.max(0)[1].data[0].cpu().numpy().tolist()

            if np.random.random(1) > 0:
                goal = state[max_index_2]
            else:
                goal = real_goal[max_index]

            if flag:  #flag=1 if online training
                goal = real_goal[max_index]

            return goal

    def update_learningrate(self):

        self.explore = self.explore * 1.01

    def select_lower_action(self, state_pri, state_order, adj, idle_num, goal,
                            n_round, online_times, flag, T):
        # preprocesing for private state
        state_node = [state_pri]
        state_pri = self.state_reshape(state_node)
        # output the probability of actions for agents in each node
        value_list = self.LowerActor(state_pri, state_order, goal, adj, T)
        #select the optimal actions
        action = value_list.max(2)[1].permute(2, 1,
                                              0).data.cpu().numpy().tolist()
        # cut out the idle agents' action
        action_list = [
            action[i][:idle_num[i]]
            if idle_num[i] <= NUM_AMBS else action[i][:idle_num[i]]
            for i in range(len(idle_num))
        ]
        # exploration with 1- self.explore
        if n_round < online_times:
            for j in range(len(action_list)):
                if len(action_list[j]) > 0:
                    for z in range(len(action_list[j])):
                        if np.random.random(1) >= self.explore:
                            action_list[j][z] = [
                                np.random.choice(range(NUM_ACTIONS), 1).item()
                            ]
                        else:
                            pass
                else:
                    pass
        else:
            pass

        return action_list

    def update_actor(self, buffers, batch_size, adj):
        state_pri, idle_driver, action, goal, state_higher, next_state_higher, reward, done, T, next_T, order_num, next_goal = buffers.getBatch(
            batch_size)
        #sample one batch of tuple to update
        loss = self.update_higher_actor(state_higher, reward,
                                        next_state_higher, done, T, next_T,
                                        goal, next_goal)
        self.update_lower_actor(state_pri, order_num, idle_driver, action,
                                goal, state_higher, next_state_higher, adj,
                                loss, batch_size, T)

        if self.update_count_L % 288 == 0:
            self.update_learningrate()

    def update_higher_actor(self, state, reward, new_state, done, T, next_T,
                            goal, next_goal):

        state = torch.FloatTensor(state).cuda().view(-1, NUM_NODES, 1)
        reward = Variable(torch.FloatTensor(reward).cuda().view(-1, 1),
                          requires_grad=True)
        done = Variable(torch.FloatTensor(done).cuda().view(-1, 1),
                        requires_grad=True)
        next_state = Variable(torch.FloatTensor(new_state).cuda().view(
            -1, NUM_NODES, 1),
                              requires_grad=True)
        GAMMA = torch.Tensor(np.ones(
            (next_state.size(0), 1)) * self.gamma).cuda()

        T = torch.FloatTensor(T).cuda().view(-1, NUM_TIMESTEP, 1)
        next_T = torch.FloatTensor(next_T).cuda().view(-1, NUM_TIMESTEP, 1)

        state = Variable(torch.cat([state, T],
                                   dim=1).view(-1, NUM_TIMESTEP + NUM_NODES,
                                               1),
                         requires_grad=True)
        new_state = Variable(torch.cat([next_state, next_T],
                                       dim=1).view(-1,
                                                   NUM_TIMESTEP + NUM_NODES,
                                                   1),
                             requires_grad=True)

        goal = Variable(torch.FloatTensor(goal).cuda().view(-1, NUM_NODES, 1),
                        requires_grad=True)
        next_goal = Variable(torch.FloatTensor(next_goal).cuda().view(
            -1, NUM_NODES, 1),
                             requires_grad=True)

        predict_state, value = self.HigherActor(state, goal)
        predict_next_state, next_value = self.HigherCriticor(
            new_state, next_goal)

        expect_value = reward + torch.mul(torch.mul(GAMMA, next_value), done)
        TD_ERROR = expect_value - value
        predict_state = predict_state.view(-1, NUM_NODES, 1)
        target = torch.ones_like(reward).cuda()

        loss_value = self.loss_func_H(value, expect_value)
        loss_state = self.loss_func_L(predict_state, next_state, target)
        loss = loss_state * self.alpha + loss_value

        self.writer_H.add_scalar('loss/value_loss_higher', loss,
                                 self.update_count_H)
        self.writer_H.add_scalar('loss/value_loss_value', loss_value,
                                 self.update_count_H)
        self.writer_H.add_scalar('loss/value_loss_state', loss_state,
                                 self.update_count_H)

        self.optimizer_H.zero_grad()
        loss.backward()
        self.optimizer_H.step()
        self.update_count_H += 1

        if self.update_count_H % 50 == 0:
            self.HigherCriticor.load_state_dict(self.HigherActor.state_dict())

        return TD_ERROR

    def update_lower_actor(self, state_pri, order_num, idle_driver, action,
                           goal, state_higher, next_state_higher, adj,
                           loss_higher, batch_size, T):

        state_pri = self.state_reshape(state_pri)
        value_list = self.LowerActor(state_pri, order_num, goal, adj,
                                     T).permute(0, 1, 3, 2)

        next_state_higher = np.array(next_state_higher) - np.array(
            state_higher)
        goal = np.array(goal) - np.array(state_higher)

        next_state_higher_vec = Variable(
            torch.FloatTensor(next_state_higher).cuda().view(-1, NUM_NODES, 1),
            requires_grad=True)
        goal_vec = Variable(torch.FloatTensor(goal).cuda().view(
            -1, NUM_NODES, 1),
                            requires_grad=True)
        target = torch.ones_like(goal_vec).cuda()[:, 1, :].view(-1, 1)
        grad_loss = self.loss_func_L(next_state_higher_vec, goal_vec, target)

        #mask
        action_space = []
        for batch in range(batch_size):
            action_space_batch = []
            action_batch = action[batch]
            for act_node in action_batch:
                action_space_node = np.zeros(NUM_AMBS * NUM_ACTIONS)
                for i in range(len(act_node)):
                    action_space_node[i * NUM_ACTIONS + act_node[i][0]] += 1
                action_space_node = [
                    True if x == 1 else False for x in action_space_node
                ]
                action_space_batch.append(action_space_node)
            action_space.append(action_space_batch)
        action_space = torch.ByteTensor(action_space).cuda()
        action_space = action_space.squeeze(2)

        log_action = torch.log(value_list).view(-1, NUM_NODES,
                                                NUM_AMBS * NUM_ACTIONS)

        loss = -1 * (
            log_action.masked_select(action_space) *
            (loss_higher.detach() - self.lamda * grad_loss.detach())).mean()

        self.writer_L.add_scalar('loss/value_loss_lower', loss,
                                 self.update_count_L)
        self.writer_L.add_scalar('loss/value_loss_lower_TD',
                                 loss_higher.mean(), self.update_count_L)
        self.writer_L.add_scalar('loss/value_loss_lower_GRAD', grad_loss,
                                 self.update_count_L)

        self.optimizer_L.zero_grad()
        loss.backward()
        self.optimizer_L.step()
        self.update_count_L += 1
def main():
  cfg = Config()

  # Redirect logs to both console and file.
  if cfg.log_to_file:
    ReDirectSTD(cfg.stdout_file, 'stdout', False)
    ReDirectSTD(cfg.stderr_file, 'stderr', False)

  # Lazily create SummaryWriter
  writer = None

  TVT, TMO = set_devices(cfg.sys_device_ids)

  if cfg.seed is not None:
    set_seed(cfg.seed)

  # Dump the configurations to log.
  import pprint
  print('-' * 60)
  print('cfg.__dict__')
  pprint.pprint(cfg.__dict__)
  print('-' * 60)

  ###########
  # Dataset #
  ###########

  train_set = create_dataset(**cfg.train_set_kwargs)

  test_sets = []
  test_set_names = []
  if cfg.dataset == 'combined':
    for name in ['market1501', 'cuhk03', 'duke']:
      cfg.test_set_kwargs['name'] = name
      test_sets.append(create_dataset(**cfg.test_set_kwargs))
      test_set_names.append(name)
  else:
    test_sets.append(create_dataset(**cfg.test_set_kwargs))
    test_set_names.append(cfg.dataset)

  ###########
  # Models  #
  ###########

  model = Model(local_conv_out_channels=cfg.local_conv_out_channels,
                num_classes=len(train_set.ids2labels))
  # Model wrapper
  model_w = DataParallel(model)

  #############################
  # Criteria and Optimizers   #
  #############################

  id_criterion = nn.CrossEntropyLoss()
  g_tri_loss = TripletLoss(margin=cfg.global_margin)
  l_tri_loss = TripletLoss(margin=cfg.local_margin)

  optimizer = optim.Adam(model.parameters(),
                         lr=cfg.base_lr,
                         weight_decay=cfg.weight_decay)

  # Bind them together just to save some codes in the following usage.
  modules_optims = [model, optimizer]

  ################################
  # May Resume Models and Optims #
  ################################

  if cfg.resume:
    resume_ep, scores = load_ckpt(modules_optims, cfg.ckpt_file)

  # May Transfer Models and Optims to Specified Device. Transferring optimizer
  # is to cope with the case when you load the checkpoint to a new device.
  TMO(modules_optims)

  ########
  # Test #
  ########

  def test(load_model_weight=False):
    if load_model_weight:
      if cfg.model_weight_file != '':
        map_location = (lambda storage, loc: storage)
        sd = torch.load(cfg.model_weight_file, map_location=map_location)
        load_state_dict(model, sd)
        print('Loaded model weights from {}'.format(cfg.model_weight_file))
      else:
        load_ckpt(modules_optims, cfg.ckpt_file)

    use_local_distance = (cfg.l_loss_weight > 0) \
                         and cfg.local_dist_own_hard_sample

    for test_set, name in zip(test_sets, test_set_names):
      test_set.set_feat_func(ExtractFeature(model_w, TVT))
      print('\n=========> Test on dataset: {} <=========\n'.format(name))
      test_set.eval(
        normalize_feat=cfg.normalize_feature,
        use_local_distance=use_local_distance)

  if cfg.only_test:
    test(load_model_weight=True)
    return

  ############
  # Training #
  ############

  start_ep = resume_ep if cfg.resume else 0
  for ep in range(start_ep, cfg.total_epochs):

    # Adjust Learning Rate
    if cfg.lr_decay_type == 'exp':
      adjust_lr_exp(
        optimizer,
        cfg.base_lr,
        ep + 1,
        cfg.total_epochs,
        cfg.exp_decay_at_epoch)
    else:
      adjust_lr_staircase(
        optimizer,
        cfg.base_lr,
        ep + 1,
        cfg.staircase_decay_at_epochs,
        cfg.staircase_decay_multiply_factor)

    may_set_mode(modules_optims, 'train')

    g_prec_meter = AverageMeter()
    g_m_meter = AverageMeter()
    g_dist_ap_meter = AverageMeter()
    g_dist_an_meter = AverageMeter()
    g_loss_meter = AverageMeter()

    l_prec_meter = AverageMeter()
    l_m_meter = AverageMeter()
    l_dist_ap_meter = AverageMeter()
    l_dist_an_meter = AverageMeter()
    l_loss_meter = AverageMeter()

    id_loss_meter = AverageMeter()

    loss_meter = AverageMeter()

    ep_st = time.time()
    step = 0
    epoch_done = False
    while not epoch_done:

      step += 1
      step_st = time.time()

      ims, im_names, labels, mirrored, epoch_done = train_set.next_batch()

      ims_var = Variable(TVT(torch.from_numpy(ims).float()))
      labels_t = TVT(torch.from_numpy(labels).long())
      labels_var = Variable(labels_t)

      global_feat, local_feat, logits = model_w(ims_var)

      g_loss, p_inds, n_inds, g_dist_ap, g_dist_an, g_dist_mat = global_loss(
        g_tri_loss, global_feat, labels_t,
        normalize_feature=cfg.normalize_feature)

      if cfg.l_loss_weight == 0:
        l_loss = 0
      elif cfg.local_dist_own_hard_sample:
        # Let local distance find its own hard samples.
        l_loss, l_dist_ap, l_dist_an, _ = local_loss(
          l_tri_loss, local_feat, None, None, labels_t,
          normalize_feature=cfg.normalize_feature)
      else:
        l_loss, l_dist_ap, l_dist_an = local_loss(
          l_tri_loss, local_feat, p_inds, n_inds, labels_t,
          normalize_feature=cfg.normalize_feature)

      id_loss = 0
      if cfg.id_loss_weight > 0:
        id_loss = id_criterion(logits, labels_var)

      loss = g_loss * cfg.g_loss_weight \
             + l_loss * cfg.l_loss_weight \
             + id_loss * cfg.id_loss_weight

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      ############
      # Step Log #
      ############

      # precision
      g_prec = (g_dist_an > g_dist_ap).data.float().mean()
      # the proportion of triplets that satisfy margin
      g_m = (g_dist_an > g_dist_ap + cfg.global_margin).data.float().mean()
      g_d_ap = g_dist_ap.data.mean()
      g_d_an = g_dist_an.data.mean()

      g_prec_meter.update(g_prec)
      g_m_meter.update(g_m)
      g_dist_ap_meter.update(g_d_ap)
      g_dist_an_meter.update(g_d_an)
      g_loss_meter.update(to_scalar(g_loss))

      if cfg.l_loss_weight > 0:
        # precision
        l_prec = (l_dist_an > l_dist_ap).data.float().mean()
        # the proportion of triplets that satisfy margin
        l_m = (l_dist_an > l_dist_ap + cfg.local_margin).data.float().mean()
        l_d_ap = l_dist_ap.data.mean()
        l_d_an = l_dist_an.data.mean()

        l_prec_meter.update(l_prec)
        l_m_meter.update(l_m)
        l_dist_ap_meter.update(l_d_ap)
        l_dist_an_meter.update(l_d_an)
        l_loss_meter.update(to_scalar(l_loss))

      if cfg.id_loss_weight > 0:
        id_loss_meter.update(to_scalar(id_loss))

      loss_meter.update(to_scalar(loss))

      if step % cfg.log_steps == 0:
        time_log = '\tStep {}/Ep {}, {:.2f}s'.format(
          step, ep + 1, time.time() - step_st, )

        if cfg.g_loss_weight > 0:
          g_log = (', gp {:.2%}, gm {:.2%}, '
                   'gd_ap {:.4f}, gd_an {:.4f}, '
                   'gL {:.4f}'.format(
            g_prec_meter.val, g_m_meter.val,
            g_dist_ap_meter.val, g_dist_an_meter.val,
            g_loss_meter.val, ))
        else:
          g_log = ''

        if cfg.l_loss_weight > 0:
          l_log = (', lp {:.2%}, lm {:.2%}, '
                   'ld_ap {:.4f}, ld_an {:.4f}, '
                   'lL {:.4f}'.format(
            l_prec_meter.val, l_m_meter.val,
            l_dist_ap_meter.val, l_dist_an_meter.val,
            l_loss_meter.val, ))
        else:
          l_log = ''

        if cfg.id_loss_weight > 0:
          id_log = (', idL {:.4f}'.format(id_loss_meter.val))
        else:
          id_log = ''

        total_loss_log = ', loss {:.4f}'.format(loss_meter.val)

        log = time_log + \
              g_log + l_log + id_log + \
              total_loss_log
        print(log)

    #############
    # Epoch Log #
    #############

    time_log = 'Ep {}, {:.2f}s'.format(ep + 1, time.time() - ep_st, )

    if cfg.g_loss_weight > 0:
      g_log = (', gp {:.2%}, gm {:.2%}, '
               'gd_ap {:.4f}, gd_an {:.4f}, '
               'gL {:.4f}'.format(
        g_prec_meter.avg, g_m_meter.avg,
        g_dist_ap_meter.avg, g_dist_an_meter.avg,
        g_loss_meter.avg, ))
    else:
      g_log = ''

    if cfg.l_loss_weight > 0:
      l_log = (', lp {:.2%}, lm {:.2%}, '
               'ld_ap {:.4f}, ld_an {:.4f}, '
               'lL {:.4f}'.format(
        l_prec_meter.avg, l_m_meter.avg,
        l_dist_ap_meter.avg, l_dist_an_meter.avg,
        l_loss_meter.avg, ))
    else:
      l_log = ''

    if cfg.id_loss_weight > 0:
      id_log = (', idL {:.4f}'.format(id_loss_meter.avg))
    else:
      id_log = ''

    total_loss_log = ', loss {:.4f}'.format(loss_meter.avg)

    log = time_log + \
          g_log + l_log + id_log + \
          total_loss_log
    print(log)

    # Log to TensorBoard

    if cfg.log_to_file:
      if writer is None:
        writer = SummaryWriter(log_dir=osp.join(cfg.exp_dir, 'tensorboard'))
      writer.add_scalars(
        'loss',
        dict(global_loss=g_loss_meter.avg,
             local_loss=l_loss_meter.avg,
             id_loss=id_loss_meter.avg,
             loss=loss_meter.avg, ),
        ep)
      writer.add_scalars(
        'tri_precision',
        dict(global_precision=g_prec_meter.avg,
             local_precision=l_prec_meter.avg, ),
        ep)
      writer.add_scalars(
        'satisfy_margin',
        dict(global_satisfy_margin=g_m_meter.avg,
             local_satisfy_margin=l_m_meter.avg, ),
        ep)
      writer.add_scalars(
        'global_dist',
        dict(global_dist_ap=g_dist_ap_meter.avg,
             global_dist_an=g_dist_an_meter.avg, ),
        ep)
      writer.add_scalars(
        'local_dist',
        dict(local_dist_ap=l_dist_ap_meter.avg,
             local_dist_an=l_dist_an_meter.avg, ),
        ep)

    # save ckpt
    if cfg.log_to_file:
      save_ckpt(modules_optims, ep + 1, 0, cfg.ckpt_file)

  ########
  # Test #
  ########

  test(load_model_weight=False)
Beispiel #58
0
def test(env):

    n_test_round = 20
    step_higher = 12

    model_path = "./trained_model/model_400_50_12checkpoint.pth"
    writer = SummaryWriter(
        './logging/400_500_12/reward/test/' +
        time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime(time.time())))
    model = H2C(nfeat=128,
                nhid=100,
                nout=100,
                dropout=0.1,
                alpha=0.05,
                nheads=2,
                cuda=True,
                learning_rate_H=0.005,
                learning_rate_L=0.01,
                momentum=0.78,
                gamma=0.8)
    model.load_model(model_path=model_path)
    public_state_last_round = np.random.randint(1, 20, size=(NUM_TIMESTEP))

    for j in range(n_test_round):

        print("round {}".format(j))

        env.reset()
        env.step_ini()
        state_private = env.get_observation_private()
        state_statistic = env.get_observation_statistic()

        select_higher_action_T = [
            env.StatetoOneHot([i], NUM_TIMESTEP) for i in range(NUM_TIMESTEP)
        ]

        for i in range(NUM_TIMESTEP - 1):
            # print(state_private)
            if i % 30 == 0:
                print("step {}".format(i))
            else:
                pass

            T = select_higher_action_T[i]
            next_T = select_higher_action_T[i + 1]

            if i % step_higher == 0:

                goal = model.HigherActor.chosen_goals[math.floor(i /
                                                                 step_higher)]
            else:
                pass

            joint_action = model.select_lower_action(
                state_private['state_pri'],
                state_private['order_num'],
                env.adj,
                state_private['idle_driver'],
                goal,
                j,
                online_times=0,
                flag=False,
                T=T)

            reward, next_state_pri, next_state_sta, done = env.step(
                joint_action)
            state_private = next_state_pri
            state_statistic = next_state_sta

        whole_reward = sum(env.final_response_rate['finish']) / sum(
            env.final_response_rate['all'])
        writer.add_scalar('Whole_value/reward', whole_reward, j)
    train_obs_v = torch.FloatTensor(train_obs)
    train_act_v = torch.LongTensor(train_act)
    return train_obs_v, train_act_v, reward_bound, reward_mean


if __name__ == "__main__":
    env = DiscreteOneHotWrapper(gym.make("FrozenLake-v0"))
    # env = gym.wrappers.Monitor(env, directory="mon", force=True)
    obs_size = env.observation_space.shape[0]
    n_actions = env.action_space.n

    net = Net(obs_size, HIDDEN_SIZE, n_actions)
    objective = nn.CrossEntropyLoss()
    optimizer = optim.Adam(params=net.parameters(), lr=0.01)
    writer = SummaryWriter(comment="-frozenlake-naive")

    for iter_no, batch in enumerate(iterate_batches(env, net, BATCH_SIZE)):
        obs_v, acts_v, reward_b, reward_m = filter_batch(batch, PERCENTILE)
        optimizer.zero_grad()
        action_scores_v = net(obs_v)
        loss_v = objective(action_scores_v, acts_v)
        loss_v.backward()
        optimizer.step()
        print("%d: loss=%.3f, reward_mean=%.1f, reward_bound=%.1f" % (
            iter_no, loss_v.item(), reward_m, reward_b))
        writer.add_scalar("loss", loss_v.item(), iter_no)
        writer.add_scalar("reward_bound", reward_b, iter_no)
        writer.add_scalar("reward_mean", reward_m, iter_no)
        if reward_m > 0.8:
            print("Solved!")
def main():
    env = gym.make(env_name)
    env.seed(500)
    torch.manual_seed(500)

    # num_inputs = env.observation_space.shape[0]
    num_inputs = 2
    num_actions = env.action_space.n
    print('state size:', num_inputs)
    print('action size:', num_actions)

    online_net = DRQN(num_inputs, num_actions)
    target_net = DRQN(num_inputs, num_actions)
    update_target_model(online_net, target_net)

    optimizer = optim.Adam(online_net.parameters(), lr=lr)
    N_EPISODES = 5000
    # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, N_EPISODES)
    writer = SummaryWriter('logs')

    online_net.to(device)
    target_net.to(device)
    online_net.train()
    target_net.train()
    memory = Memory(replay_memory_capacity)
    running_score = 0
    epsilon = 1.0
    steps = 0
    loss = 0

    for e in range(N_EPISODES):
        done = False

        score = 0
        state = env.reset()
        state = state_to_partial_observability(state)
        state = torch.Tensor(state).to(device)

        hidden = None

        while not done:
            steps += 1

            # print(state.type(), hidden)
            action, hidden = get_action(state, target_net, epsilon, env,
                                        hidden)
            next_state, reward, done, _ = env.step(action)

            next_state = state_to_partial_observability(next_state)
            next_state = torch.Tensor(next_state).to(device)

            mask = 0 if done else 1
            reward = reward if not done or score == 499 else -1

            memory.push(state, next_state, action, reward, mask)

            score += reward
            state = next_state

            if steps > initial_exploration and len(memory) > batch_size:
                epsilon -= 0.00005
                epsilon = max(epsilon, 0.01)

                batch = memory.sample(batch_size)
                loss = DRQN.train_model(online_net, target_net, optimizer,
                                        batch)

                if steps % update_target == 0:
                    update_target_model(online_net, target_net)
                # scheduler.step()

        score = score if score == 500.0 else score + 1
        if running_score == 0:
            running_score = score
        else:
            running_score = 0.99 * running_score + 0.01 * score
        if e % log_interval == 0:
            print(
                '{} episode | score: {:.2f} | loss: {:.5f} | epsilon: {:.2f}'.
                format(e, running_score, loss, epsilon))
            writer.add_scalar('log/score', float(running_score), e)
            writer.add_scalar('log/loss', float(loss), e)

        if running_score > goal_score:
            break