Exemple #1
0
class LogROCCallback(object):
    """save roc graphs periodically in TensorBoard.
        write TensorBoard event file, holding the roc graph for every epoch
        logging_dir : str
        this function can only be executed after 'eval_metric.py',
            since that function is responsible for the graph creation
            where the tensorboard file will be created
        roc_path : list[str]
            list of paths to future roc's
        class_names : list[str]
            list of class names.
        """
    def __init__(self,
                 logging_dir=None,
                 prefix='val',
                 roc_path=None,
                 class_names=None):
        self.prefix = prefix
        self.roc_path = roc_path
        self.class_names = class_names
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error(
                'You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to log ROC graph as an image in TensorBoard."""
        for class_name in self.class_names:
            roc = os.path.join(self.roc_path, 'roc_' + class_name + '.png')
            if not os.path.exists(roc):
                continue
            im = scipy.misc.imread(roc)
            self.summary_writer.add_image(self.prefix + '_' + class_name, im)
Exemple #2
0
class LogDistributionsCallback(object):
    """
    This function has been deprecated because it consumes too much time.
    The faster way is to use "ParseLogCallback" with a 'iter_monitor' flag
    Log metrics periodically in TensorBoard.
    This callback works almost same as `callback.Speedometer`, but write TensorBoard event file
    for visualization.
    logging_dir : str
        where the tensorboard file will be created
    layers_list : list[str]
        list of layers to be tracked
    """
    def __init__(self, logging_dir, prefix=None, layers_list=None):
        self.prefix = prefix
        self.layers_list = layers_list
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error(
                'You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to log layers' distributions in TensorBoard."""
        if param.locals is None:
            return
        for name, value in param.locals['arg_params'].iteritems():
            # TODO - implement layer to choose from..
            if self.layers_list is None:
                continue
            if self.prefix is not None:
                name = '%s-%s' % (self.prefix, name)
            self.summary_writer.add_histogram(name, value.asnumpy().flatten())
class LogROCCallback(object):
    """save roc graphs periodically in TensorBoard.
        write TensorBoard event file, holding the roc graph for every epoch
        logging_dir : str
        this function can only be executed after 'eval_metric.py', since that function is responsible for the graph creation
            where the tensorboard file will be created
        roc_path : list[str]
            list of paths to future roc's
        class_names : list[str]
            list of class names.
        """
    def __init__(self, logging_dir=None, prefix='val', roc_path=None, class_names=None):
        self.prefix = prefix
        self.roc_path = roc_path
        self.class_names = class_names
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error('You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to log ROC graph as an image in TensorBoard."""
        for class_name in self.class_names:
            roc = os.path.join(self.roc_path, 'roc_'+class_name+'.png')
            if not os.path.exists(roc):
                continue
            im = scipy.misc.imread(roc)
            self.summary_writer.add_image(self.prefix+'_'+class_name, im)
class LogDistributionsCallback(object):
    """
    This function has been deprecated because it consumes too much time.
    The faster way is to use "ParseLogCallback" with a 'iter_monitor' flag

    Log metrics periodically in TensorBoard.
    This callback works almost same as `callback.Speedometer`, but write TensorBoard event file
    for visualization.
    logging_dir : str
        where the tensorboard file will be created
    layers_list : list[str]
        list of layers to be tracked
    """
    def __init__(self, logging_dir, prefix=None, layers_list=None):
        self.prefix = prefix
        self.layers_list = layers_list
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error('You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to log layers' distributions in TensorBoard."""
        if param.locals is None:
            return
        for name, value in param.locals['arg_params'].iteritems():
            # TODO - implement layer to choose from..
            if self.layers_list is None:
                continue
            if self.prefix is not None:
                name = '%s-%s' % (self.prefix, name)
            self.summary_writer.add_histogram(name, value.asnumpy().flatten())
Exemple #5
0
 def __init__(self, logging_dir, prefix=None):
     self.prefix = prefix
     try:
         from tensorboard import SummaryWriter
         self.summary_writer = SummaryWriter(logging_dir)
     except ImportError:
         logging.error('You can install tensorboard via `pip install tensorboard`.')
class ParseLogCallback(object):
    """
    1. log distribution's std to tensorboard (as distribution)
    This function make use of mxnet's "monitor" module, and it's output to a log file.
    while training, it is possible to specify layers to be monitored.
    these layers will be printed to a given log file,
    their values are computed **asynchronously**.

    2. log training loss to tensorboard (as scalar)

    Currently - does not support resume training..
    """
    def __init__(self, dist_logging_dir=None, scalar_logging_dir=None,
                 logfile_path=None, batch_size=None, iter_monitor=0,
                 frequent=None, prefix='ssd'):
        self.scalar_logging_dir = scalar_logging_dir
        self.dist_logging_dir = dist_logging_dir
        self.logfile_path = logfile_path
        self.batch_size = batch_size
        self.iter_monitor = iter_monitor
        self.frequent = frequent
        self.prefix = prefix
        self.batch = 0
        self.line_idx = 0
        try:
            from tensorboard import SummaryWriter
            self.dist_summary_writer = SummaryWriter(dist_logging_dir)
            self.scalar_summary_writer = SummaryWriter(scalar_logging_dir)
        except ImportError:
            logging.error('You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to parse a log file and and add params to TensorBoard."""

        # save distributions from the monitor output log
        if not self.iter_monitor == 0 and self.batch % self.iter_monitor == 0:
            with open(self.logfile_path) as fp:
                for i in range(self.line_idx):
                    fp.next()
                for line in fp:
                    if line.startswith('Batch'):
                        line = line.split(' ')
                        line = [x for x in line if x]
                        layer_name = line[2]
                        layer_value = np.array(float(line[3].split('\t')[0])).flatten()
                        if np.isfinite(layer_value):
                            self.dist_summary_writer.add_histogram(layer_name, layer_value)
                    self.line_idx += 1

        # save training loss
        if self.batch % self.frequent == 0:
            if param.eval_metric is None:
                return
            name_value = param.eval_metric.get_name_value()
            for name, value in name_value:
                if self.prefix is not None:
                    name = '%s-%s' % (self.prefix, name)
                self.scalar_summary_writer.add_scalar(name, value, global_step=self.batch)
        self.batch += 1
 def __init__(self, logging_dir, score_store=False, prefix=None):
     self.prefix = prefix
     self.step = 0
     self.score_store = score_store
     try:
         self.summary_writer = SummaryWriter(logging_dir)
     except ImportError:
         logging.error(
             'You can install tensorboard via `pip install tensorboard`.')
Exemple #8
0
 def __init__(self, logging_dir=None, prefix='val', roc_path=None, class_names=None):
     self.prefix = prefix
     self.roc_path = roc_path
     self.class_names = class_names
     try:
         from tensorboard import SummaryWriter
         self.summary_writer = SummaryWriter(logging_dir)
     except ImportError:
         logging.error('You can install tensorboard via `pip install tensorboard`.')
Exemple #9
0
class LogMetricsCallback(object):
    """Log metrics periodically in TensorBoard.
    This callback works almost same as `callback.Speedometer`, but write TensorBoard event file
    for visualization. For more usage, please refer https://github.com/dmlc/tensorboard

    Parameters
    ----------
    logging_dir : str
        TensorBoard event file directory.
        After that, use `tensorboard --logdir=path/to/logs` to launch TensorBoard visualization.
    prefix : str
        Prefix for a metric name of `scalar` value.
        You might want to use this param to leverage TensorBoard plot feature,
        where TensorBoard plots different curves in one graph when they have same `name`.
        The follow example shows the usage(how to compare a train and eval metric in a same graph).

    Examples
    --------
    >>> # log train and eval metrics under different directories.
    >>> training_log = 'logs/train'
    >>> evaluation_log = 'logs/eval'
    >>> # in this case, each training and evaluation metric pairs has same name,
    >>> # you can add a prefix to make it separate.
    >>> batch_end_callbacks = [mx.contrib.tensorboard.LogMetricsCallback(training_log)]
    >>> eval_end_callbacks = [mx.contrib.tensorboard.LogMetricsCallback(evaluation_log)]
    >>> # run
    >>> model.fit(train,
    >>>     ...
    >>>     batch_end_callback = batch_end_callbacks,
    >>>     eval_end_callback  = eval_end_callbacks)
    >>> # Then use `tensorboard --logdir=logs/` to launch TensorBoard visualization.
    """
    def __init__(self, logging_dir, prefix=None):
        self.prefix = prefix
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error(
                'You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to log training speed and metrics in TensorBoard."""
        if param.eval_metric is None:
            return
        name_value = param.eval_metric.get_name_value()
        for name, value in name_value:
            if self.prefix is not None:
                name = '%s-%s' % (self.prefix, name)
            self.summary_writer.add_scalar(name,
                                           value,
                                           global_step=param.epoch)
Exemple #10
0
class LogMetricsCallback(object):
    """Log metrics periodically in TensorBoard.
    This callback works almost same as `callback.Speedometer`, but write TensorBoard event file
    for visualization. For more usage, please refer https://github.com/dmlc/tensorboard

    Parameters
    ----------
    logging_dir : str
        TensorBoard event file directory.
        After that, use `tensorboard --logdir=path/to/logs` to launch TensorBoard visualization.
    prefix : str
        Prefix for a metric name of `scalar` value.
        You might want to use this param to leverage TensorBoard plot feature,
        where TensorBoard plots different curves in one graph when they have same `name`.
        The follow example shows the usage(how to compare a train and eval metric in a same graph).

    Examples
    --------
    >>> # log train and eval metrics under different directories.
    >>> training_log = 'logs/train'
    >>> evaluation_log = 'logs/eval'
    >>> # in this case, each training and evaluation metric pairs has same name,
    >>> # you can add a prefix to make it separate.
    >>> batch_end_callbacks = [mx.contrib.tensorboard.LogMetricsCallback(training_log)]
    >>> eval_end_callbacks = [mx.contrib.tensorboard.LogMetricsCallback(evaluation_log)]
    >>> # run
    >>> model.fit(train,
    >>>     ...
    >>>     batch_end_callback = batch_end_callbacks,
    >>>     eval_end_callback  = eval_end_callbacks)
    >>> # Then use `tensorboard --logdir=logs/` to launch TensorBoard visualization.
    """
    def __init__(self, logging_dir, prefix=None):
        self.prefix = prefix
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error('You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to log training speed and metrics in TensorBoard."""
        if param.eval_metric is None:
            return
        name_value = param.eval_metric.get_name_value()
        for name, value in name_value:
            if self.prefix is not None:
                name = '%s-%s' % (self.prefix, name)
            self.summary_writer.add_scalar(name, value)
Exemple #11
0
def main():
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    CarSet = CarDataSet(ROOT, TRAIN, MASK)
    # split train val 
    # train_idx, valid_idx = augmented_train_valid_split(CarSet, test_size = 0.15,shuffle = True ,random_seed=args.seed)
    # train_sampler = SubsetRandomSampler(train_idx)
    # val_samper = SubsetRandomSampler(valid_idx)
    
    train_loader = DataLoader(CarSet,
                            #   sampler=train_sampler,
                              shuffle=True,
                              batch_size=args.batch_size,
                              **kwargs)
    # val_loader = DataLoader(CarSet,
    #                         sampler=val_samper,
    #                         batch_size=2,
    #                         **kwargs)
    model = uNet(NUM_CLASS)
    if args.cuda:
        model.cuda()
    optimizer=optim.Adam(model.parameters(),lr=args.lr,betas=(0.9, 0.999))
    writer=SummaryWriter('logs/'+datetime.now().strftime('%B-%d'))
    best_loss=1e+5
    iters=0
    # resume training 
    if args.resume:
        model,optimizer,args.start_epoch,best_loss,iters = resume(args.resume,model)

    for epoch in range(args.start_epoch ,args.epochs):
        adjust_lr(optimizer,epoch,decay=5)
        t1=time.time()
        loss, iters = train(epoch,
                            model,
                            optimizer,
                            train_loader,
                            writer,
                            iters)
        is_best = loss < best_loss
        best_loss = min(best_loss, loss)
        state={
            'epoch':epoch,
            'state_dict':model.state_dict(),
            'optimizer':optimizer,
            'loss':best_loss,
            'iters': iters,
        }
        save_checkpoint(state, is_best)
    writer.close()
Exemple #12
0
    def add_loss(self, prefix, **losses):
        for name, value in losses.items():
            if name not in self.writers:
                dir = os.path.join(self.log_dir, name)
                self.writers[name] = SummaryWriter(dir)

            self.writers[name].add_scalar(prefix, value, self.next())
Exemple #13
0
 def __init__(self, logging_dir, prefix=None):
     self.prefix = prefix
     try:
         from tensorboard import SummaryWriter
         self.summary_writer = SummaryWriter(logging_dir)
     except ImportError:
         logging.error('You can install tensorboard via `pip install tensorboard`.')
Exemple #14
0
def main():

    # parametor
    epoch_num = 60
    batch_size = 128
    ana_freq = 10
    gpu = -1

    # set logger
    logging.config.fileConfig('./log/log.conf')
    logger = getLogger(__name__)

    logger.info('file = {}'.format(__file__))
    logger.info('epoch_num = {}'.format(epoch_num))
    logger.info('batch_size = {}'.format(batch_size))
    logger.info('ana_freq = {}'.format(ana_freq))
    logger.info('gpu = {}'.format(gpu))

    # set writer
    writer = SummaryWriter('results/' + datetime.now().strftime('%B%d  %H:%M:%S'))

    # read data
    data_obj = dataset.data_dsprites.DspritesDataset(db_path='/Users/yamada/lab/dat/dsprites')
    # data_obj = dataset.data_celeba.CelebADataset(db_path='./dataset/celebA', data_size=1000)
    # data_obj = dataset.data_mnist.MnistDataset()
    data_obj.train_size = 200  # adjust train data size for speed
    data_obj.test_size = 20

    # model and optimizer
    model = ae.AE(data_obj)
    opt = chainer.optimizers.Adam()

    trainer = Trainer(model=model, optimizer=opt, writer=writer, gpu=gpu)
    trainer.fit(data_obj, epoch_num=epoch_num, batch_size=batch_size, ana_freq=ana_freq)
Exemple #15
0
class Logger():
    def __init__(self, root):
        self.writer = SummaryWriter(root)
        self.last_indexes = defaultdict(int)

    def scalar(self, key, value, index=None):
        index = index if index is not None else self.last_indexes[key]
        self.last_indexes[key] += 1

        value = to_numeric(value)

        self.writer.add_scalar(key, value, index)

    def from_stats(self, key_value_dictionary, index=None):
        for key in key_value_dictionary:
            self.scalar(key, key_value_dictionary[key], index)
Exemple #16
0
    def __init__(self, logging_dir=None, prefix='val', images_path=None,
                 class_names=None, batch_size=None, mean_pixels=None, det_thresh=0.5):

        self.logging_dir = logging_dir
        self.prefix = prefix
        if not os.path.exists(images_path):
            os.mkdir(images_path)
        self.images_path = images_path
        self.class_names = class_names
        self.batch_size = batch_size
        self.mean_pixels = mean_pixels
        self.det_thresh = det_thresh
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error('You can install tensorboard via `pip install tensorboard`.')
 def __init__(self, dist_logging_dir=None, scalar_logging_dir=None,
              logfile_path=None, batch_size=None, iter_monitor=0,
              frequent=None, prefix='ssd'):
     self.scalar_logging_dir = scalar_logging_dir
     self.dist_logging_dir = dist_logging_dir
     self.logfile_path = logfile_path
     self.batch_size = batch_size
     self.iter_monitor = iter_monitor
     self.frequent = frequent
     self.prefix = prefix
     self.batch = 0
     self.line_idx = 0
     try:
         from tensorboard import SummaryWriter
         self.dist_summary_writer = SummaryWriter(dist_logging_dir)
         self.scalar_summary_writer = SummaryWriter(scalar_logging_dir)
     except ImportError:
         logging.error('You can install tensorboard via `pip install tensorboard`.')
 def __init__(self, logging_dir=None, prefix='val', roc_path=None, class_names=None):
     self.prefix = prefix
     self.roc_path = roc_path
     self.class_names = class_names
     try:
         from tensorboard import SummaryWriter
         self.summary_writer = SummaryWriter(logging_dir)
     except ImportError:
         logging.error('You can install tensorboard via `pip install tensorboard`.')
Exemple #19
0
 def __init__(self, dist_logging_dir=None, scalar_logging_dir=None,
              logfile_path=None, batch_size=None, iter_monitor=0,
              frequent=None, prefix='ssd'):
     self.scalar_logging_dir = scalar_logging_dir
     self.dist_logging_dir = dist_logging_dir
     self.logfile_path = logfile_path
     self.batch_size = batch_size
     self.iter_monitor = iter_monitor
     self.frequent = frequent
     self.prefix = prefix
     self.batch = 0
     self.line_idx = 0
     try:
         from tensorboard import SummaryWriter
         self.dist_summary_writer = SummaryWriter(dist_logging_dir)
         self.scalar_summary_writer = SummaryWriter(scalar_logging_dir)
     except ImportError:
         logging.error('You can install tensorboard via `pip install tensorboard`.')
class LogMetricsCallback(object):
    def __init__(self, logging_dir, prefix=None):
        self.prefix = prefix
        self.itr = 0
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error(
                'You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, name_value):
        """Callback to log training speed and metrics in TensorBoard."""
        if name_value is None:
            return
        for name, value in name_value:
            if self.prefix is not None:
                name = '%s-%s' % (self.prefix, name)
            self.summary_writer.add_scalar(name, value, self.itr)
        self.itr += 1
Exemple #21
0
def main():

    env_name = 'CartPole-v0'
    env = gym.make(env_name)
    action_space = env.action_space.n
    observation_space = env.observation_space.low.shape
    # set logger
    logging.config.fileConfig('./log/log.conf')
    logger = logging.getLogger(__name__)
    logger.info('START')

    # set network model
    shared_model = A3CFFSoftmaxFFF(observation_space, action_space)
    # set optimizer
    opt = RMSpropAsync(lr=LEARNING_RATE , alpha=0.99 , eps=RMSPROP_EPS)
    opt.setup(shared_model)
    opt.add_hook(chainer.optimizer.GradientClipping(40))

    writer = SummaryWriter('results/' + datetime.datetime.now().strftime('%B%d  %H:%M:%S'))
    state = env.reset()
    state = chainer.Variable(np.expand_dims(np.array(state).astype(np.float32), axis=0))
    pi, v = shared_model.get_pi_and_v(state)
    writer.add_graph([pi, v])
    writer.close()

    async_train(env_name, shared_model, opt, phi)

    logger.info('END')
Exemple #22
0
def build_report_manager(opt):
    if opt.tensorboard:
        from tensorboard import SummaryWriter
        writer = SummaryWriter(opt.tensorboard_log_dir +
                               datetime.now().strftime("/%b-%d_%H-%M-%S"),
                               comment="Unmt")
    else:
        writer = None

    report_mgr = ReportMgr(opt.report_every,
                           start_time=-1,
                           tensorboard_writer=writer)
    return report_mgr
Exemple #23
0
class LogMetricsCallback(object):
    """Log metrics periodically in TensorBoard.
    This callback works almost same as `callback.Speedometer`, but write TensorBoard event file
    for visualization. For more usage, please refer https://github.com/dmlc/tensorboard

    Parameters
    ----------
    logging_dir : str
        TensorBoard event file directory.
        After that, use `tensorboard --logdir=path/to/logs` to launch TensorBoard visualization.
    prefix : str
        Prefix for a metric name of `scalar` value.
        You might want to use this param to leverage TensorBoard plot feature,
        where TensorBoard plots different curves in one graph when they have same `name`.
        The follow example shows the usage(how to compare a train and eval metric in a same graph).
    """
    def __init__(self, logging_dir, prefix=None, global_step=100):
        self.prefix = prefix
        self.global_step = global_step
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error(
                'You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to log training speed and metrics in TensorBoard."""
        if param.eval_metric is None:
            return
        name_value = param.eval_metric.get_name_value()
        for name, value in name_value:
            if self.prefix is not None:
                name = '%s-%s' % (self.prefix, name)
            self.summary_writer.add_scalar(name,
                                           value,
                                           global_step=self.global_step)
def main():

    # parametor
    epoch_num = 400
    batch_size = 128
    ana_freq = 5
    gpu = -1

    # set logger
    logging.config.fileConfig('./log/log.conf')
    logger = getLogger(__name__)

    logger.info('file = {}'.format(__file__))
    logger.info('epoch_num = {}'.format(epoch_num))
    logger.info('batch_size = {}'.format(batch_size))
    logger.info('ana_freq = {}'.format(ana_freq))
    logger.info('gpu = {}'.format(gpu))

    # set writer
    current_time = datetime.now().strftime('%B%d  %H:%M:%S')
    head = './results/' + current_time
    writer = SummaryWriter(head)

    # read data
    # data_obj = dataset.data_dsprites.DspritesDataset(db_path='/Users/yamada/lab/dat/dsprites')
    data_obj = dataset.data_dsprites.DspritesDataset(
        db_path='/home/masanori_yamada/lab/dat/dsprites')
    # data_obj = dataset.data_celeba.CelebADataset(db_path='/home/masanori_yamada / lab / dat / celeba / syorizumi', data_size=200000)
    # data_obj = dataset.data_celeba.CelebADataset(db_path='/Users/yamada/lab/dat/celeba/syorizumi', data_size=200)
    # data_obj = dataset.data_mnist.MnistDataset()
    # data_obj.train_size = 10 #00  # adjust train data size for speed
    data_obj.test_size = 16

    # model and optimizer
    model = cae.CAE(data_obj)
    opt = chainer.optimizers.Adam()

    trainer = Trainer(model=model, optimizer=opt, writer=writer, gpu=gpu)
    try:
        trainer.fit(data_obj,
                    epoch_num=epoch_num,
                    batch_size=batch_size,
                    ana_freq=ana_freq,
                    noise_type='salt')
    except KeyboardInterrupt:
        trainer.save(head)
    def __init__(self, logging_dir=None, prefix='val', images_path=None,
                 class_names=None, batch_size=None, mean_pixels=None, det_thresh=0.5):

        self.logging_dir = logging_dir
        self.prefix = prefix
        if not os.path.exists(images_path):
            os.mkdir(images_path)
        self.images_path = images_path
        self.class_names = class_names
        self.batch_size = batch_size
        self.mean_pixels = mean_pixels
        self.det_thresh = det_thresh
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error('You can install tensorboard via `pip install tensorboard`.')
Exemple #26
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()

    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info("\n" + collect_env_info())
    logger.info(args)

    cfg = load_config(args)
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)
Exemple #27
0
def main(args):

    mkdir_if_missing(args.logs_dir)
    writer = SummaryWriter(args.logs_dir)

    sys.stdout = Logger(osp.join(args.logs_dir, 'test_log.txt'))
    print(args)

    cudnn.benchmark = True

    # create data loaders
    data_dir = args.data_dir
    dataset, num_class, test_loader = \
        get_data(
            args.dataset, data_dir,
            args.crop_w, args.crop_h,
            args.batch_size, args.workers)

    # create model
    model = VGGNet(
        args.depth, with_bn=True, pretrained=True, num_class=num_class,
        input_size=(args.crop_w, args.crop_h))
    model = model.cuda()

    # load from checkpoint
    checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar'))
    model.load_state_dict(checkpoint['state_dict'])
    args.start_epoch = checkpoint['epoch']
    best_recall5 = checkpoint['best_recall5']
    print("=> get epoch {}  best top5 recall {:.1%}".format(args.start_epoch, best_recall5))

    # create trainer
    evaluator = Evaluator(model)

    # test
    print('Test with best model:')
    evaluator.test(test_loader)
Exemple #28
0
import torchvision.transforms as standard_transforms
import torchvision.utils as vutils
from tensorboard import SummaryWriter
from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

import utils.joint_transforms as joint_transforms
import utils.transforms as extended_transforms
from datasets import voc
from models import *
from utils import check_mkdir, evaluate, AverageMeter, CrossEntropyLoss2d

ckpt_path = '../../ckpt'
exp_name = 'voc-psp_net'
writer = SummaryWriter(os.path.join(ckpt_path, 'exp', exp_name))

args = {
    'train_batch_size': 1,
    'lr': 1e-2 / sqrt(16 / 4),
    'lr_decay': 0.9,
    'max_iter': 3e4,
    'longer_size': 512,
    'crop_size': 473,
    'stride_rate': 2 / 3.,
    'weight_decay': 1e-4,
    'momentum': 0.9,
    'snapshot': '',
    'print_freq': 10,
    'val_save_to_img_file': True,
    'val_img_sample_rate': 0.01,  # randomly sample some validation results to display,
Exemple #29
0
from torch.utils.data import DataLoader

import utils.simul_transforms as simul_transforms
import utils.transforms as expanded_transforms
from config import ckpt_path
from datasets.cityscapes import CityScapes
from datasets.cityscapes.config import num_classes, ignored_label
from datasets.cityscapes.utils import colorize_mask
from models import FCN8ResNet
from utils.io import rmrf_mkdir
from utils.loss import CrossEntropyLoss2d
from utils.training import calculate_mean_iu

cudnn.benchmark = True
exp_name = 'fcn8resnet_cityscapes224*448'
writer = SummaryWriter('exp/' + exp_name)
pil_to_tensor = standard_transforms.ToTensor()
train_record = {'best_val_loss': 1e20, 'corr_mean_iu': 0, 'corr_epoch': -1}

train_args = {
    'batch_size': 16,
    'epoch_num':
    800,  # I stop training only when val loss doesn't seem to decrease anymore, so just set a large value.
    'pretrained_lr': 1e-6,  # used for the pretrained layers of model
    'new_lr': 1e-6,  # used for the newly added layers of model
    'weight_decay': 5e-4,
    'snapshot':
    'epoch_184_loss_0.8953_mean_iu_0.3923_lr_0.00001000.pth',  # empty string denotes initial training, otherwise it should be a string of snapshot name
    'print_freq': 50,
    'input_size': (224, 448),  # (height, width)
}
Exemple #30
0
def main(args):
    writer = SummaryWriter(args.logs_dir)

    sys.stdout = Logger(osp.join(args.logs_dir, 'train_log.txt'))
    print(args)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.benchmark = True

    # Create data loaders
    data_dir = osp.join(args.data_dir, args.dataset)
    dataset, num_classes, dim_featx, dim_featy, train_loader, val_loader, test_loader = \
        get_data(
            args.dataset, data_dir, args.data_type,
            args.batch_size, args.workers, args.combine_trainval,
            head_feat_dir=args.head_feat_dir,
            face_feat_dir=args.face_feat_dir,
            body_feat_dir=args.body_feat_dir,
            upperbody_feat_dir=args.upperbody_feat_dir)

    # Create model
    model = RANet(4, num_features=dim_featx)
    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()

    # load from checkpoint
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch']
        best_top1 = checkpoint['best_top1']
        print("=> start epoch {}  best top1 {:.1%}".format(
            args.start_epoch, best_top1))
    else:
        best_top1 = 0

    # Criterion
    criterion = OIM4bLoss(dim_featy,
                          num_classes,
                          scalar=args.oim_scalar,
                          momentum=args.oim_momentum)
    criterion.init_lut(train_loader)
    criterion.cuda()

    # Optimizer
    if args.optimizer == 'sgd':
        param_groups = model.parameters()
        optimizer = torch.optim.SGD(param_groups,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay,
                                    nesterov=True)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    else:
        raise ValueError("Cannot recognize optimizer type:", args.optimizer)

    # Evaluator and Trainer
    evaluator = RAEvaluator(model)
    trainer = RATrainer(model, criterion)

    # Schedule learning rate
    def adjust_lr(epoch):
        if args.optimizer == 'sgd':
            lr = args.lr * (0.1**(epoch // 20))
        elif args.optimizer == 'adam':
            lr = args.lr if epoch <= 50 else \
                args.lr * (0.01 ** (epoch - 50) / 30)
        else:
            raise ValueError("Cannot recognize optimizer type:",
                             args.optimizer)
        for g in optimizer.param_groups:
            g['lr'] = lr * g.get('lr_mult', 1)

    # start training
    top1 = evaluator.evaluate(val_loader, print_summary=True)
    test_top1 = evaluator.test(test_loader,
                               dataset.gallery,
                               dataset.query,
                               print_summary=True)
    for epoch in range(args.start_epoch, args.epochs):
        adjust_lr(epoch)
        loss, prec = trainer.train(epoch,
                                   train_loader,
                                   optimizer,
                                   print_freq=1)
        writer.add_scalar('Train loss', loss, epoch + 1)
        writer.add_scalar('Train accuracy', prec, epoch + 1)

        top1 = evaluator.evaluate(val_loader, print_summary=False)
        writer.add_scalar('Val accuracy', top1, epoch + 1)
        test_top1 = evaluator.test(test_loader,
                                   dataset.gallery,
                                   dataset.query,
                                   print_summary=True)
        test_top1 = evaluator.test(test_loader,
                                   dataset.query,
                                   dataset.gallery,
                                   print_summary=True)
        writer.add_scalar('Test accuracy', test_top1, epoch + 1)

        is_best = top1 > best_top1
        best_top1 = max(top1, best_top1)
        save_checkpoint(
            {
                'state_dict': model.state_dict(),
                'epoch': epoch + 1,
                'best_top1': best_top1,
            },
            is_best,
            fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar'))

        print('\n * Finished epoch {:3d}  top1: {:5.1%}  best: {:5.1%}{}\n'.
              format(epoch, top1, best_top1, ' *' if is_best else ''))

    # final test
    print('Test with best model:')
    checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar'))
    model.load_state_dict(checkpoint['state_dict'])
    evaluator.test(test_loader, dataset.gallery, dataset.query)
Exemple #31
0
    def __init__(self,
                 model: Model,
                 optimizer: torch.optim.Optimizer,
                 iterator: DataIterator,
                 train_dataset: Dataset,
                 validation_dataset: Optional[Dataset] = None,
                 patience: int = 2,
                 validation_metric: str = "-loss",
                 num_epochs: int = 20,
                 serialization_dir: Optional[str] = None,
                 cuda_device: int = -1,
                 grad_norm: Optional[float] = None,
                 grad_clipping: Optional[float] = None,
                 learning_rate_scheduler: Optional[PytorchLRScheduler] = None,
                 no_tqdm: bool = False) -> None:
        """
        Parameters
        ----------
        model : ``Model``, required.
            An AllenNLP model to be optimized. Pytorch Modules can also be optimized if
            their ``forward`` method returns a dictionary with a "loss" key, containing a
            scalar tensor representing the loss function to be optimized.
        optimizer : ``torch.nn.Optimizer``, required.
            An instance of a Pytorch Optimizer, instantiated with the parameters of the
            model to be optimized.
        iterator : ``DataIterator``, required.
            A method for iterating over a ``Dataset``, yielding padded indexed batches.
        train_dataset : ``Dataset``, required.
            A ``Dataset`` to train on. The dataset should have already been indexed.
        validation_dataset : ``Dataset``, optional, (default = None).
            A ``Dataset`` to evaluate on. The dataset should have already been indexed.
        patience : int, optional (default=2)
            Number of epochs to be patient before early stopping.
        validation_metric : str, optional (default="loss")
            Validation metric to measure for whether to stop training using patience
            and whether to serialize an ``is_best`` model each epoch. The metric name
            must be prepended with either "+" or "-", which specifies whether the metric
            is an increasing or decreasing function.
        num_epochs : int, optional (default = 20)
            Number of training epochs.
        serialization_dir : str, optional (default=None)
            Path to directory for saving and loading model files. Models will not be saved if
            this parameter is not passed.
        cuda_device : int, optional (default = -1)
            An integer specifying the CUDA device to use. If -1, the CPU is used.
            Multi-gpu training is not currently supported, but will be once the
            Pytorch DataParallel API stabilises.
        grad_norm : float, optional, (default = None).
            If provided, gradient norms will be rescaled to have a maximum of this value.
        grad_clipping : ``float``, optional (default = ``None``).
            If provided, gradients will be clipped `during the backward pass` to have an (absolute)
            maximum of this value.  If you are getting ``NaNs`` in your gradients during training
            that are not solved by using ``grad_norm``, you may need this.
        learning_rate_scheduler : PytorchLRScheduler, optional, (default = None)
            A Pytorch learning rate scheduler. The learning rate will be decayed with respect to
            this schedule at the end of each epoch. If you use
            :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`, this will use the ``validation_metric``
            provided to determine if learning has plateaued.
        no_tqdm : ``bool``, optional (default=False)
            We use ``tqdm`` for logging, which will print a nice progress bar that updates in place
            after every batch.  This is nice if you're running training on a local shell, but can
            cause problems with log files from, e.g., a docker image running on kubernetes.  If
            ``no_tqdm`` is ``True``, we will not use tqdm, and instead log batch statistics using
            ``logger.info``, outputting a line at most every 10 seconds.
        """
        self._model = model
        self._iterator = iterator
        self._optimizer = optimizer
        self._train_dataset = train_dataset
        self._validation_dataset = validation_dataset

        self._patience = patience
        self._num_epochs = num_epochs
        self._serialization_dir = serialization_dir
        self._cuda_device = cuda_device
        self._grad_norm = grad_norm
        self._grad_clipping = grad_clipping
        self._learning_rate_scheduler = learning_rate_scheduler

        increase_or_decrease = validation_metric[0]
        if increase_or_decrease not in ["+", "-"]:
            raise ConfigurationError(
                "Validation metrics must specify whether they should increase "
                "or decrease by pre-pending the metric name with a +/-.")
        self._validation_metric = validation_metric[1:]
        self._validation_metric_decreases = increase_or_decrease == "-"
        self._no_tqdm = no_tqdm

        if self._cuda_device >= 0:
            #self._model = self._model.cuda(self._cuda_device)
            self._model = self._model.cuda()

        self._log_interval = 10  # seconds
        self._summary_interval = 100  # num batches between logging to tensorboard

        self._last_log = 0.0  # time of last logging

        if serialization_dir is not None:
            train_log = SummaryWriter(
                os.path.join(serialization_dir, "log", "train"))
            validation_log = SummaryWriter(
                os.path.join(serialization_dir, "log", "validation"))
            self._tensorboard = TensorboardWriter(train_log, validation_log)
        else:
            self._tensorboard = TensorboardWriter()
class LogDetectionsCallback(object):
    """ TODO complete
    """
    def __init__(self, logging_dir=None, prefix='val', images_path=None,
                 class_names=None, batch_size=None, mean_pixels=None, det_thresh=0.5):

        self.logging_dir = logging_dir
        self.prefix = prefix
        if not os.path.exists(images_path):
            os.mkdir(images_path)
        self.images_path = images_path
        self.class_names = class_names
        self.batch_size = batch_size
        self.mean_pixels = mean_pixels
        self.det_thresh = det_thresh
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error('You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to log detections and gt-boxes as an image in TensorBoard."""
        if param.locals is None:
            return

        result = []
        pad = param.locals['eval_batch'].pad
        images = param.locals['eval_batch'].data[0][0:self.batch_size-pad].asnumpy()
        labels = param.locals['eval_batch'].label[0][0:self.batch_size - pad].asnumpy()
        outputs = [out[0:out.shape[0] - pad] for out in param.locals['self'].get_outputs()]
        # 'det' variable can be in different positions depending with train/test symbols
        if len(outputs) > 1:
            det_idx = [idx for idx,f in enumerate(param.locals['self'].output_names) if f.startswith('det')][0]
            detections = outputs[det_idx].asnumpy()
        else:
            detections = outputs[0].asnumpy()
        for i in range(detections.shape[0]):
            det = detections[i, :, :]
            det = det[np.where(det[:, 0] >= 0)[0]]
            label = labels[i,:,:]
            label = label[np.where(label[:, 0] >= 0)[0]]
            img = images[i,:,:,:] + np.reshape(self.mean_pixels, (3,1,1))
            img = img.astype(np.uint8)
            img = img.transpose([1,2,0])
            img[:, :, (0, 1, 2)] = img[:, :, (2, 1, 0)]
            self._visualize_detection_and_labels(img, det, label=label,
                                                 classes=self.class_names, thresh=self.det_thresh,
                                                 plt_path=os.path.join(self.images_path, 'image'+str(i)+'.png'))
            # save to tensorboard
            img_det_graph = scipy.misc.imread(os.path.join(self.images_path, 'image'+str(i)+'.png'))
            self.summary_writer.add_image('image'+str(i)+'.png', img_det_graph)
        return result

    def _visualize_detection_and_labels(self, img, dets, label, classes=[], thresh=None, plt_path=None):
        """
        visualize detections in one image

        Parameters:
        ----------
        img : numpy.array
            image, in bgr format
        dets : numpy.array
            ssd detections, numpy.array([[id, score, x1, y1, x2, y2]...])
            each row is one object
        classes : tuple or list of str
            class names
        thresh : float
            score threshold
        """
        fig = plt.figure()
        plt.imshow(img)
        height = img.shape[0]
        width = img.shape[1]
        colors = dict()
        # Visualize ground-truth boxes
        gt_color = (1.0, 0.0, 0.0)
        for i in range(label.shape[0]):
            cls_id = int(label[i, 0])
            if cls_id >= 0:
                xmin = int(label[i, 1] * width)
                ymin = int(label[i, 2] * height)
                xmax = int(label[i, 3] * width)
                ymax = int(label[i, 4] * height)
                rect = plt.Rectangle((xmin, ymin), xmax - xmin,
                                     ymax - ymin, fill=False,
                                     edgecolor=gt_color,
                                     linewidth=2)
                plt.gca().add_patch(rect)
                class_name = str(cls_id)
                if classes and len(classes) > cls_id:
                    class_name = classes[cls_id]
                plt.gca().text(xmin, ymin - 2,
                               'gt',
                               bbox=dict(facecolor=gt_color, alpha=0.5),
                               fontsize=8, color='white')
        # visualize predictions
        for i in range(dets.shape[0]):
            cls_id = int(dets[i, 0])
            if cls_id >= 0:
                score = dets[i, 1]
                if score > thresh:
                    if cls_id not in colors:
                        colors[cls_id] = (random.random(), random.random(), random.random())
                    xmin = int(dets[i, 2] * width)
                    ymin = int(dets[i, 3] * height)
                    xmax = int(dets[i, 4] * width)
                    ymax = int(dets[i, 5] * height)
                    rect = plt.Rectangle((xmin, ymin), xmax - xmin,
                                         ymax - ymin, fill=False,
                                         edgecolor=colors[cls_id],
                                         linewidth=3.5)
                    plt.gca().add_patch(rect)
                    class_name = str(cls_id)
                    if classes and len(classes) > cls_id:
                        class_name = classes[cls_id]
                    plt.gca().text(xmin, ymin - 2,
                                    '{:s} {:.3f}'.format(class_name, score),
                                    bbox=dict(facecolor=colors[cls_id], alpha=0.5),
                                    fontsize=8, color='white')
        plt.savefig(plt_path)
        plt.close(fig)
def do_training(num_epoch, optimizer, kvstore, learning_rate, model_prefix, decay):
    summary_writer = SummaryWriter(args.tblog_dir)
    lr_scheduler = SimpleLRScheduler(learning_rate)
    optimizer_params = {'lr_scheduler': lr_scheduler}
    module.init_params()
    module.init_optimizer(kvstore=kvstore,
                          optimizer=optimizer,
                          optimizer_params=optimizer_params)
    n_epoch = 0
    while True:
        if n_epoch >= num_epoch:
            break
        train_iter.reset()
        val_iter.reset()
        loss_metric.reset()
        for n_batch, data_batch in enumerate(train_iter):
            module.forward_backward(data_batch)
            module.update()
            module.update_metric(loss_metric, data_batch.label)
            loss_metric.get_batch_log(n_batch)
        train_acc, train_loss, train_recon_err = loss_metric.get_name_value()
        loss_metric.reset()
        for n_batch, data_batch in enumerate(val_iter):
            module.forward(data_batch)
            module.update_metric(loss_metric, data_batch.label)
            loss_metric.get_batch_log(n_batch)
        val_acc, val_loss, val_recon_err = loss_metric.get_name_value()

        summary_writer.add_scalar('train_acc', train_acc, n_epoch)
        summary_writer.add_scalar('train_loss', train_loss, n_epoch)
        summary_writer.add_scalar('train_recon_err', train_recon_err, n_epoch)
        summary_writer.add_scalar('val_acc', val_acc, n_epoch)
        summary_writer.add_scalar('val_loss', val_loss, n_epoch)
        summary_writer.add_scalar('val_recon_err', val_recon_err, n_epoch)

        print('Epoch[%d] train acc: %.4f loss: %.6f recon_err: %.6f' % (n_epoch, train_acc, train_loss, train_recon_err))
        print('Epoch[%d] val acc: %.4f loss: %.6f recon_err: %.6f' % (n_epoch, val_acc, val_loss, val_recon_err))
        print('SAVE CHECKPOINT')

        module.save_checkpoint(prefix=model_prefix, epoch=n_epoch)
        n_epoch += 1
        lr_scheduler.learning_rate = learning_rate * (decay ** n_epoch)
Exemple #34
0
class LogDetectionsCallback(object):
    """ TODO complete
    """
    def __init__(self,
                 logging_dir=None,
                 prefix='val',
                 images_path=None,
                 class_names=None,
                 batch_size=None,
                 mean_pixels=None,
                 det_thresh=0.5):

        self.logging_dir = logging_dir
        self.prefix = prefix
        if not os.path.exists(images_path):
            os.mkdir(images_path)
        self.images_path = images_path
        self.class_names = class_names
        self.batch_size = batch_size
        self.mean_pixels = mean_pixels
        self.det_thresh = det_thresh
        try:
            from tensorboard import SummaryWriter
            self.summary_writer = SummaryWriter(logging_dir)
        except ImportError:
            logging.error(
                'You can install tensorboard via `pip install tensorboard`.')

    def __call__(self, param):
        """Callback to log detections and gt-boxes as an image in TensorBoard."""
        if param.locals is None:
            return

        result = []
        pad = param.locals['eval_batch'].pad
        images = param.locals['eval_batch'].data[0][0:self.batch_size -
                                                    pad].asnumpy()
        labels = param.locals['eval_batch'].label[0][0:self.batch_size -
                                                     pad].asnumpy()
        outputs = [
            out[0:out.shape[0] - pad]
            for out in param.locals['self'].get_outputs()
        ]
        # 'det' variable can be in different positions depending with train/test symbols
        if len(outputs) > 1:
            det_idx = [
                idx for idx, f in enumerate(param.locals['self'].output_names)
                if f.startswith('det')
            ][0]
            detections = outputs[det_idx].asnumpy()
        else:
            detections = outputs[0].asnumpy()
        for i in range(detections.shape[0]):
            det = detections[i, :, :]
            det = det[np.where(det[:, 0] >= 0)[0]]
            label = labels[i, :, :]
            label = label[np.where(label[:, 0] >= 0)[0]]
            img = images[i, :, :, :] + np.reshape(self.mean_pixels, (3, 1, 1))
            img = img.astype(np.uint8)
            img = img.transpose([1, 2, 0])
            img[:, :, (0, 1, 2)] = img[:, :, (2, 1, 0)]
            self._visualize_detection_and_labels(
                img,
                det,
                label=label,
                classes=self.class_names,
                thresh=self.det_thresh,
                plt_path=os.path.join(self.images_path,
                                      'image' + str(i) + '.png'))
            # save to tensorboard
            img_det_graph = scipy.misc.imread(
                os.path.join(self.images_path, 'image' + str(i) + '.png'))
            self.summary_writer.add_image('image' + str(i) + '.png',
                                          img_det_graph)
        return result

    def _visualize_detection_and_labels(self,
                                        img,
                                        dets,
                                        label,
                                        classes=[],
                                        thresh=None,
                                        plt_path=None):
        """
        visualize detections in one image
        Parameters:
        ----------
        img : numpy.array
            image, in bgr format
        dets : numpy.array
            ssd detections, numpy.array([[id, score, x1, y1, x2, y2]...])
            each row is one object
        classes : tuple or list of str
            class names
        thresh : float
            score threshold
        """
        fig = plt.figure()
        plt.imshow(img)
        height = img.shape[0]
        width = img.shape[1]
        colors = dict()
        # Visualize ground-truth boxes
        gt_color = (1.0, 0.0, 0.0)
        for i in range(label.shape[0]):
            cls_id = int(label[i, 0])
            if cls_id >= 0:
                xmin = int(label[i, 1] * width)
                ymin = int(label[i, 2] * height)
                xmax = int(label[i, 3] * width)
                ymax = int(label[i, 4] * height)
                rect = plt.Rectangle((xmin, ymin),
                                     xmax - xmin,
                                     ymax - ymin,
                                     fill=False,
                                     edgecolor=gt_color,
                                     linewidth=2)
                plt.gca().add_patch(rect)
                class_name = str(cls_id)
                if classes and len(classes) > cls_id:
                    class_name = classes[cls_id]
                plt.gca().text(xmin,
                               ymin - 2,
                               'gt',
                               bbox=dict(facecolor=gt_color, alpha=0.5),
                               fontsize=8,
                               color='white')
        # visualize predictions
        for i in range(dets.shape[0]):
            cls_id = int(dets[i, 0])
            if cls_id >= 0:
                score = dets[i, 1]
                if score > thresh:
                    if cls_id not in colors:
                        colors[cls_id] = (random.random(), random.random(),
                                          random.random())
                    xmin = int(dets[i, 2] * width)
                    ymin = int(dets[i, 3] * height)
                    xmax = int(dets[i, 4] * width)
                    ymax = int(dets[i, 5] * height)
                    rect = plt.Rectangle((xmin, ymin),
                                         xmax - xmin,
                                         ymax - ymin,
                                         fill=False,
                                         edgecolor=colors[cls_id],
                                         linewidth=3.5)
                    plt.gca().add_patch(rect)
                    class_name = str(cls_id)
                    if classes and len(classes) > cls_id:
                        class_name = classes[cls_id]
                    plt.gca().text(xmin,
                                   ymin - 2,
                                   '{:s} {:.3f}'.format(class_name, score),
                                   bbox=dict(facecolor=colors[cls_id],
                                             alpha=0.5),
                                   fontsize=8,
                                   color='white')
        plt.savefig(plt_path)
        plt.close(fig)
Exemple #35
0
from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
from tensorboard import SummaryWriter
writer = SummaryWriter('runs')
parser = argparse.ArgumentParser()
parser.add_argument('--dataset',
                    required=True,
                    help='cifar10 | lsun | imagenet | folder | lfw ')
parser.add_argument('--dataroot', required=True, help='path to dataset')
parser.add_argument('--workers',
                    type=int,
                    help='number of data loading workers',
                    default=2)
parser.add_argument('--batchSize',
                    type=int,
                    default=64,
                    help='input batch size')
parser.add_argument('--imageSize',
                    type=int,
    n_minibatches_validation = np.ceil(len(data_validation) / batch_size)

    # Fix the number of epochs to train for
    n_epochs = 10

    threshold = 0.01 * 10 ** (-21)

    # -------------------------------------------------------------------------
    # SET UP A LOGGER FOR TENSORBOARD VISUALIZATION
    # -------------------------------------------------------------------------

    run_start = datetime.datetime.now()
    log_name = [run_start, distances, sample_size, initial_lr, threshold]
    log_name_formatted = '[{:%Y-%m-%d_%H:%M}]-[{}]-[{}]-[lr_{:.1e}]-'\
                         '[thresh_{:.2e}]'.format(*log_name)
    writer = SummaryWriter(log_dir='logs/{}'.format(log_name_formatted))
    writer.add_text(tag='Description',
                    text_string='(Description missing.)')

    # -------------------------------------------------------------------------
    # TRAIN THE NET FOR THE GIVEN NUMBER OF EPOCHS
    # -------------------------------------------------------------------------

    print('\nStart training: Training on {} examples, validating on {} '
          'examples\n'.format(len(data_train), len(data_validation)))

    # -------------------------------------------------------------------------

    for epoch in range(n_epochs):

        print('Epoch {}/{}'.format(epoch+1, n_epochs))
Exemple #37
0
from torch.autograd.variable import Variable
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import os
from tensorboard import SummaryWriter
from datetime import datetime
from torchvision.utils import make_grid
from torch.nn import functional as F
import numpy as np
import matplotlib.pyplot as plt

check_root = './generate/draw_noattn'

os.system('rm -rf ./runs/*')
writer = SummaryWriter('./runs/' + datetime.now().strftime('%B%d  %H:%M:%S'))

if not os.path.exists(check_root):
    os.mkdir(check_root)

batch_size = 512  # picture
#batch_size = 300 # cartoon
seq_len = 20
img_size = 64

enc_hidden_size = 800
dec_hidden_size = 1600
nz = 100

model = draw(seq_len)
model.cuda()
Exemple #38
0
    def __init__(self,
                 model: Model,
                 optimizer: torch.optim.Optimizer,
                 iterator: DataIterator,
                 train_dataset: Iterable[Instance],
                 validation_dataset: Optional[Iterable[Instance]] = None,
                 patience: int = 2,
                 validation_metric: str = "-loss",
                 num_epochs: int = 20,
                 serialization_dir: Optional[str] = None,
                 num_serialized_models_to_keep: int = None,
                 keep_serialized_model_every_num_seconds: int = None,
                 model_save_interval: float = None,
                 cuda_device: Union[int, List] = -1,
                 grad_norm: Optional[float] = None,
                 grad_clipping: Optional[float] = None,
                 learning_rate_scheduler: Optional[PytorchLRScheduler] = None,
                 histogram_interval: int = None) -> None:
        """
        Parameters
        ----------
        model : ``Model``, required.
            An AllenNLP model to be optimized. Pytorch Modules can also be optimized if
            their ``forward`` method returns a dictionary with a "loss" key, containing a
            scalar tensor representing the loss function to be optimized.
        optimizer : ``torch.nn.Optimizer``, required.
            An instance of a Pytorch Optimizer, instantiated with the parameters of the
            model to be optimized.
        iterator : ``DataIterator``, required.
            A method for iterating over a ``Dataset``, yielding padded indexed batches.
        train_dataset : ``Dataset``, required.
            A ``Dataset`` to train on. The dataset should have already been indexed.
        validation_dataset : ``Dataset``, optional, (default = None).
            A ``Dataset`` to evaluate on. The dataset should have already been indexed.
        patience : int, optional (default=2)
            Number of epochs to be patient before early stopping.
        validation_metric : str, optional (default="loss")
            Validation metric to measure for whether to stop training using patience
            and whether to serialize an ``is_best`` model each epoch. The metric name
            must be prepended with either "+" or "-", which specifies whether the metric
            is an increasing or decreasing function.
        num_epochs : int, optional (default = 20)
            Number of training epochs.
        serialization_dir : str, optional (default=None)
            Path to directory for saving and loading model files. Models will not be saved if
            this parameter is not passed.
        num_serialized_models_to_keep : ``int``, optional (default=None)
            Number of previous model checkpoints to retain.  Default is to keep all checkpoints.
        keep_serialized_model_every_num_seconds : ``int``, optional (default=None)
            If num_serialized_models_to_keep is not None, then occasionally it's useful to
            save models at a given interval in addition to the last num_serialized_models_to_keep.
            To do so, specify keep_serialized_model_every_num_seconds as the number of seconds
            between permanently saved checkpoints.  Note that this option is only used if
            num_serialized_models_to_keep is not None, otherwise all checkpoints are kept.
        model_save_interval : ``float``, optional (default=None)
            If provided, then serialize models every ``model_save_interval``
            seconds within single epochs.  In all cases, models are also saved
            at the end of every epoch if ``serialization_dir`` is provided.
        cuda_device : ``int``, optional (default = -1)
            An integer specifying the CUDA device to use. If -1, the CPU is used.
        grad_norm : ``float``, optional, (default = None).
            If provided, gradient norms will be rescaled to have a maximum of this value.
        grad_clipping : ``float``, optional (default = ``None``).
            If provided, gradients will be clipped `during the backward pass` to have an (absolute)
            maximum of this value.  If you are getting ``NaNs`` in your gradients during training
            that are not solved by using ``grad_norm``, you may need this.
        learning_rate_scheduler : ``PytorchLRScheduler``, optional, (default = None)
            A Pytorch learning rate scheduler. The learning rate will be decayed with respect to
            this schedule at the end of each epoch. If you use
            :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`, this will use the ``validation_metric``
            provided to determine if learning has plateaued.  To support updating the learning
            rate on every batch, this can optionally implement ``step_batch(batch_num)`` which
            updates the learning rate given the batch number.
        histogram_interval : ``int``, optional, (default = ``None``)
            If not None, then log histograms to tensorboard every ``histogram_interval`` batches.
            When this parameter is specified, the following additional logging is enabled:
                * Histograms of model parameters
                * The ratio of parameter update norm to parameter norm
                * Histogram of layer activations
            We log histograms of the parameters returned by
            ``model.get_parameters_for_histogram_tensorboard_logging``.
            The layer activations are logged for any modules in the ``Model`` that have
            the attribute ``should_log_activations`` set to ``True``.  Logging
            histograms requires a number of GPU-CPU copies during training and is typically
            slow, so we recommend logging histograms relatively infrequently.
            Note: only Modules that return tensors, tuples of tensors or dicts
            with tensors as values currently support activation logging.
        """
        self._model = model
        self._iterator = iterator
        self._optimizer = optimizer
        self._train_data = train_dataset
        self._validation_data = validation_dataset

        self._patience = patience
        self._num_epochs = num_epochs

        self._serialization_dir = serialization_dir
        self._num_serialized_models_to_keep = num_serialized_models_to_keep
        self._keep_serialized_model_every_num_seconds = keep_serialized_model_every_num_seconds
        self._serialized_paths: List[Any] = []
        self._last_permanent_saved_checkpoint_time = time.time()
        self._model_save_interval = model_save_interval

        self._grad_norm = grad_norm
        self._batch_grad_norm = None
        self._grad_clipping = grad_clipping
        self._learning_rate_scheduler = learning_rate_scheduler

        increase_or_decrease = validation_metric[0]
        if increase_or_decrease not in ["+", "-"]:
            raise ConfigurationError(
                "Validation metrics must specify whether they should increase "
                "or decrease by pre-pending the metric name with a +/-.")
        self._validation_metric = validation_metric[1:]
        self._validation_metric_decreases = increase_or_decrease == "-"

        if not isinstance(cuda_device, int) and not isinstance(
                cuda_device, list):
            raise ConfigurationError(
                "Expected an int or list for cuda_device, got {}".format(
                    cuda_device))

        if isinstance(cuda_device, list):
            logger.info(
                f"WARNING: Multiple GPU support is experimental not recommended for use. "
                "In some cases it may lead to incorrect results or undefined behavior."
            )
            self._multiple_gpu = True
            self._cuda_devices = cuda_device
            # data_parallel will take care of transfering to cuda devices,
            # so the iterator keeps data on CPU.
            self._iterator_device = -1
        else:
            self._multiple_gpu = False
            self._cuda_devices = [cuda_device]
            self._iterator_device = cuda_device

        if self._cuda_devices[0] != -1:
            self._model = self._model.cuda(self._cuda_devices[0])

        self._log_interval = 10  # seconds
        self._summary_interval = 100  # num batches between logging to tensorboard
        self._histogram_interval = histogram_interval
        self._log_histograms_this_batch = False
        self._batch_num_total = 0

        self._last_log = 0.0  # time of last logging

        if serialization_dir is not None:
            train_log = SummaryWriter(
                os.path.join(serialization_dir, "log", "train"))
            validation_log = SummaryWriter(
                os.path.join(serialization_dir, "log", "validation"))
            self._tensorboard = TensorboardWriter(train_log, validation_log)
        else:
            self._tensorboard = TensorboardWriter()
Exemple #39
0
        i = F.dropout(i, 0.35)
        i = F.log_softmax(i)
        return i


def classification_accuracy(out, labels):
    # mi servono argmax
    _, out = torch.max(out, 1)
    accuracy = torch.sum(out == labels).float()
    accuracy /= len(out)
    return accuracy


net = Net().cuda()

writer = SummaryWriter('runs/' + datetime.now().strftime('%B%d  %H:%M:%S'))
writer.add_graph(
    net,
    net(Variable(torch.rand(1, 75, features_size), requires_grad=True).cuda()))

loader = DataLoader(MyDataset("data/text_1", input_len=75, output_len=1),
                    batch_size=64,
                    shuffle=True)

# net = net.cuda()
optimizer = Adam(params=net.parameters(), lr=0.001)

# loss
loss = nn.NLLLoss()
batch_number = len(loader)
num_epochs = 500
Exemple #40
0
def do_training(args, module, data_train, data_val, begin_epoch=0):
    from distutils.dir_util import mkpath
    from log_util import LogUtil

    log = LogUtil().getlogger()
    mkpath(os.path.dirname(get_checkpoint_path(args)))

    seq_len = args.config.get('arch', 'max_t_count')
    batch_size = args.config.getint('common', 'batch_size')
    save_checkpoint_every_n_epoch = args.config.getint('common', 'save_checkpoint_every_n_epoch')
    save_checkpoint_every_n_batch = args.config.getint('common', 'save_checkpoint_every_n_batch')
    enable_logging_train_metric = args.config.getboolean('train', 'enable_logging_train_metric')
    enable_logging_validation_metric = args.config.getboolean('train', 'enable_logging_validation_metric')

    contexts = parse_contexts(args)
    num_gpu = len(contexts)
    eval_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu, seq_length=seq_len,is_logging=enable_logging_validation_metric,is_epoch_end=True)
    # tensorboard setting
    loss_metric = STTMetric(batch_size=batch_size, num_gpu=num_gpu, seq_length=seq_len,is_logging=enable_logging_train_metric,is_epoch_end=False)

    optimizer = args.config.get('train', 'optimizer')
    momentum = args.config.getfloat('train', 'momentum')
    learning_rate = args.config.getfloat('train', 'learning_rate')
    learning_rate_annealing = args.config.getfloat('train', 'learning_rate_annealing')

    mode = args.config.get('common', 'mode')
    num_epoch = args.config.getint('train', 'num_epoch')
    clip_gradient = args.config.getfloat('train', 'clip_gradient')
    weight_decay = args.config.getfloat('train', 'weight_decay')
    save_optimizer_states = args.config.getboolean('train', 'save_optimizer_states')
    show_every = args.config.getint('train', 'show_every')
    n_epoch=begin_epoch

    if clip_gradient == 0:
        clip_gradient = None

    module.bind(data_shapes=data_train.provide_data,
                label_shapes=data_train.provide_label,
                for_training=True)

    if begin_epoch == 0 and mode == 'train':
        module.init_params(initializer=get_initializer(args))


    lr_scheduler = SimpleLRScheduler(learning_rate=learning_rate)

    def reset_optimizer(force_init=False):
        if optimizer == "sgd":
            module.init_optimizer(kvstore='device',
                                  optimizer=optimizer,
                                  optimizer_params={'lr_scheduler': lr_scheduler,
                                                    'momentum': momentum,
                                                    'clip_gradient': clip_gradient,
                                                    'wd': weight_decay},
                                  force_init=force_init)
        elif optimizer == "adam":
            module.init_optimizer(kvstore='device',
                                  optimizer=optimizer,
                                  optimizer_params={'lr_scheduler': lr_scheduler,
                                                    #'momentum': momentum,
                                                    'clip_gradient': clip_gradient,
                                                    'wd': weight_decay},
                                  force_init=force_init)
        else:
            raise Exception('Supported optimizers are sgd and adam. If you want to implement others define them in train.py')
    if mode == "train":
        reset_optimizer(force_init=True)
    else:
        reset_optimizer(force_init=False)

    #tensorboard setting
    tblog_dir = args.config.get('common', 'tensorboard_log_dir')
    summary_writer = SummaryWriter(tblog_dir)
    while True:

        if n_epoch >= num_epoch:
            break

        loss_metric.reset()
        log.info('---------train---------')
        for nbatch, data_batch in enumerate(data_train):

            module.forward_backward(data_batch)
            module.update()
            # tensorboard setting
            if (nbatch + 1) % show_every == 0:
                module.update_metric(loss_metric, data_batch.label)
            #summary_writer.add_scalar('loss batch', loss_metric.get_batch_loss(), nbatch)
            if (nbatch+1) % save_checkpoint_every_n_batch == 0:
                log.info('Epoch[%d] Batch[%d] SAVE CHECKPOINT', n_epoch, nbatch)
                module.save_checkpoint(prefix=get_checkpoint_path(args)+"n_epoch"+str(n_epoch)+"n_batch", epoch=(int((nbatch+1)/save_checkpoint_every_n_batch)-1), save_optimizer_states=save_optimizer_states)
        # commented for Libri_sample data set to see only train cer
        log.info('---------validation---------')
        data_val.reset()
        eval_metric.reset()
        for nbatch, data_batch in enumerate(data_val):
            # when is_train = False it leads to high cer when batch_norm
            module.forward(data_batch, is_train=True)
            module.update_metric(eval_metric, data_batch.label)

        # tensorboard setting
        val_cer, val_n_label, val_l_dist, _ = eval_metric.get_name_value()
        log.info("Epoch[%d] val cer=%f (%d / %d)", n_epoch, val_cer, int(val_n_label - val_l_dist), val_n_label)
        curr_acc = val_cer
        summary_writer.add_scalar('CER validation', val_cer, n_epoch)
        assert curr_acc is not None, 'cannot find Acc_exclude_padding in eval metric'

        data_train.reset()

        # tensorboard setting
        train_cer, train_n_label, train_l_dist, train_ctc_loss = loss_metric.get_name_value()
        summary_writer.add_scalar('loss epoch', train_ctc_loss, n_epoch)
        summary_writer.add_scalar('CER train', train_cer, n_epoch)

        # save checkpoints
        if n_epoch % save_checkpoint_every_n_epoch == 0:
            log.info('Epoch[%d] SAVE CHECKPOINT', n_epoch)
            module.save_checkpoint(prefix=get_checkpoint_path(args), epoch=n_epoch, save_optimizer_states=save_optimizer_states)

        n_epoch += 1

        lr_scheduler.learning_rate=learning_rate/learning_rate_annealing

    log.info('FINISH')