Ejemplo n.º 1
0
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import copy
import os
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from fine_tune_config import *

## If you want to keep a track of your network on tensorboard, set USE_TENSORBOARD TO 1 in config file.

if USE_TENSORBOARD:
    from pycrayon import CrayonClient
    cc = CrayonClient(hostname=TENSORBOARD_SERVER)
    try:
        cc.remove_experiment(EXP_NAME)
    except:
        pass
    foo = cc.create_experiment(EXP_NAME)

## If you want to use the GPU, set GPU_MODE TO 1 in config file

use_gpu = GPU_MODE
if use_gpu:
    torch.cuda.set_device(CUDA_DEVICE)

count = 0

### SECTION 2 - data loading and shuffling/augmentation/normalization : all handled by torch automatically.
Ejemplo n.º 2
0
net.train()

params = list(net.parameters())
# optimizer = torch.optim.Adam(params[-8:], lr=lr)
optimizer = torch.optim.SGD(params[8:],
                            lr=lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# tensorboad
use_tensorboard = use_tensorboard and CrayonClient is not None
if use_tensorboard:
    cc = CrayonClient(hostname='127.0.0.1')
    if remove_all_log:
        cc.remove_all_experiments()
    if exp_name is None:
        exp_name = datetime.now().strftime('vgg16_%m-%d_%H-%M')
        exp = cc.create_experiment(exp_name)
    else:
        exp = cc.open_experiment(exp_name)

# training
train_loss = 0
tp, tf, fg, bg = 0., 0., 0, 0
step_cnt = 0
re_cnt = False
t = Timer()
t.tic()
# create directories for plots, models & evaluation
plots_dir = f"{args.base_plots_dir}/{exp_name}"
logger.info(f"For plotting using dir: {plots_dir}")
plots_dir = create_directory(plots_dir, delete_if_exists=True)

save_dir = f"{args.base_models_dir}/{exp_name}"
logger.info(f"For model saving using dir: {save_dir}")
save_dir = create_directory(save_dir, delete_if_exists=True)

export_dir = f"{args.base_evaluation_dir}/{exp_name}"
logger.info(f"For exporting final evaluation using dir: {export_dir}")
export_dir = create_directory(export_dir, delete_if_exists=True)

# Connect to server & start experiment
ccexp = crayon_create_experiment(exp_name, CrayonClient())

# seed
logger.info(f"Using seed: {args.numpy_seed}")
np.random.seed(args.numpy_seed)

# metrics to send to tensorboard
mets = {"mean_discrepancy": 0.0, "identity_loss": 0.0}
logger.info(f"Metrics which will be logged: {mets.keys()}")

# initialization & training iterations
N_init = args.n_init
logger.info(f"Will train initialization for {N_init} iterations")

N_train = args.n_train
logger.info(f"Will train transport map for {N_train} iterations")
print(b.shape)
data = b
data=data.transpose()

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np
from pycrayon import CrayonClient
import time

cc = CrayonClient(hostname="10.150.6.120")

try:
    cc.remove_experiment("AnalyzeConv4")
except:
    pass

try:
    OMIE = cc.create_experiment("AnalyzeConv4")
except:
    pass

##
## noise level one
## dimension 2
### z는 따로 추출
Ejemplo n.º 5
0
import torch
import numpy as np
import torch.nn as nn
from FCDensenet import FCDensenet
from utils import train, valid, test
from pycrayon import CrayonClient

cc = CrayonClient(hostname="localhost")
data = torch.load('polyp_data.pth')

f = "FCDensenet.pth"
FCDensenet_experiment = cc.create_experiment("FCDensenet_experiment")

batch_size = 2
n_tr_batch = len(data[0]) // batch_size
n_va_batch = len(data[2]) // batch_size
n_te_batch = len(data[4]) // batch_size

n_epochs = 500
n_c = 2
k = 16
cuda = True

if cuda == True:
    model = FCDensenet(n_c, k, nn.ReLU(inplace=True)).cuda()
else:
    model = FCDensenet(n_c, k, nn.ReLU(inplace=True))

optimizer = torch.optim.RMSprop(model.parameters())
criterion = nn.CrossEntropyLoss()
Ejemplo n.º 6
0
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np
mean = np.array([1, 2, 3, 1])
cov = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
# = np.random.multivariate_normal(mean,cov,5000)

from pycrayon import CrayonClient
import time

cc = CrayonClient(hostname="10.150.6.120")
cc.remove_experiment("OMIE_5")
OMIE = cc.create_experiment("OMIE_5")
###
### noise level one
### dimension 2
### z는 따로 추출
###
input_size = 4
hidden_size = 8
hidden_size_ = 3
num_classes = 1

num_epochs = 9
learning_rate = 0.0001
debug_mode = True
Ejemplo n.º 7
0
            loss_score, ans_loss_score, dis_loss_score, acc_score = validate_bireader(net, dev_loader, params)
            print("validation loss = {0:.10}, validation accuracy = {1:.5}".
                  format(loss_score, acc_score))
            print("answerer loss = {0:.10}, discrim. loss = {1:.10}".
                  format(ans_loss_score, dis_loss_score))
            loss_score, ans_loss_score, dis_loss_score, acc_score = validate_bireader(net, test_loader, params)
            print("validation loss = {0:.10}, validation accuracy = {1:.5}".
                  format(loss_score, acc_score))
            print("answerer loss = {0:.10}, discrim. loss = {1:.10}".
                  format(ans_loss_score, dis_loss_score))

        else:

            if arg.log:
                # crayon client
                cc = CrayonClient(hostname="localhost", port=8889)
                existing = len(cc.get_experiment_names())
                ce = cc.create_experiment("run_{0}".format(existing), zip_file=None)

            print("now training...")
            train_1 = pd.read_pickle("../input_data/train_{0}.pkl".format(params['lang']))
            train_2 = pd.read_pickle("../input_data/train_{0}.pkl".format(params['lang2']))
            train_loader = tud.DataLoader(BiQADataset(train_1, train_2, nlp_1, nlp_2,
                                                      rev_dic_1, rev_dic_2, relabel=params['relabel'],
                                                      l2_supersample=params['l2_supersample']),
                                          batch_size=params['batch_size'],
                                          pin_memory=True, num_workers=3,
                                          shuffle=True)

            dev_1 = pd.read_pickle("../input_data/dev_{0}.pkl".format(params['lang']))
            dev_2 = pd.read_pickle("../input_data/dev_{0}.pkl".format(params['lang2']))
Ejemplo n.º 8
0
 def __init__(self, opt):
     self.opt = opt
     self.nums = nums
     self.istrain = opt.train
     self.cc = CrayonClient(hostname="localhost") if opt.cc else opt.cc
     self.cuda = opt.cuda
Ejemplo n.º 9
0
class _baseMuitlModel(object):
    '''Base Model combine netG and netD to became a gans's model

    @Params:
    - opt: options for config gans'model
    - train: train or test
    - nums: how many netGs
    - cc: crayon client or not
    - cuda: use cuda or not
    '''
    def __init__(self, opt):
        self.opt = opt
        self.nums = nums
        self.istrain = opt.train
        self.cc = CrayonClient(hostname="localhost") if opt.cc else opt.cc
        self.cuda = opt.cuda

    def create_tensorboard(self):
        '''use docker create tensorboard
        '''
        if self.cc:
            self.cc.remove_all_experiments()
            self.D_exp = create_sigle_experiment(self.cc, 'D_loss')
            self.G_exps = []
            for i in range(self.nums):
                G_loss_experiment_name = 'G_loss_{}'.format(i)
                G_exp = create_sigle_experiment(self.cc, 'G_loss')
                self.G_exps.append(G_exp)

    def draft_data(self, input):
        '''input from datasetsloader, put those into X/Z
        '''
        pass

    def backward_D(self):
        '''backwrad netD
        '''
        pass

    def train(self):
        '''train gans
        '''
        pass

    def test(self):
        '''test gans
        '''
        pass

    def save_network(self, it, savepath):
        '''save checkpoints of netG and netD in savepath

        @Params:
        - it: number of iterations
        - savepath: in savepath, save network parameter
        '''
        torch.save(self.netG.state_dict(),
                   '%s/netG_epoch_%d.pth' % (savepath, it))
        torch.save(self.netD.state_dict(),
                   '%s/netD_epoch_%d.pth' % (savepath, it))

    def load_networkG(self, g_network_path):
        '''load network parameters of netG and netD

        @Params:
        - g_network_path: the path of netG
        '''
        self.netG.load_state_dict(torch.load(g_network_path))

    def load_networkD(self, d_network_path):
        '''load network parameters of netG and netD

        @Params:
        - d_network_path: the path of netG
        '''
        self.netD.load_state_dict(torch.load(d_network_path))

    def save_image(self, fake, it, savepath):
        '''save result of netG output

        @Params:
        - fake: the output of netG
        - it: number of iterations
        - savepath: in savepath, save network parameter
        '''
        vutils.save_image(fake.data,
                          '%s/fake_samples_epoch_%03d.png' % (savepath, it))
Ejemplo n.º 10
0
net.cuda()
net.train()
print('load net succ...')

# optimizer
start_epoch = 0
lr = cfg.init_learning_rate
optimizer = torch.optim.SGD(net.parameters(),
                            lr=lr,
                            momentum=cfg.momentum,
                            weight_decay=cfg.weight_decay)

# tensorboad
use_tensorboard = cfg.use_tensorboard and CrayonClient is not None
if use_tensorboard:
    cc = CrayonClient(hostname='127.0.0.1')
    # if remove_all_log:
    #     cc.remove_all_experiments()
    if start_epoch == 0:
        exp = cc.create_experiment(cfg.exp_name)
    else:
        exp = cc.open_experiment(cfg.exp_name)

train_loss = 0
t = Timer()
for step in range(start_epoch * imdb.batch_per_epoch,
                  cfg.max_epoch * imdb.batch_per_epoch):
    t.tic()
    # batch
    batch = imdb.next_batch()
    im = batch['images']
Ejemplo n.º 11
0
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from RAN import restorator, discirminator
from patch_wise import patch

from pycrayon import CrayonClient
import time

cc = CrayonClient(hostname="localhost", port=8889)

cc.remove_experiment('d_real_error')
cc.remove_experiment('d_fake_error')
cc.remove_experiment('g_error')
d_real_errorC = cc.create_experiment('d_real_error')
d_fake_errorC = cc.create_experiment('d_fake_error')
g_errorC = cc.create_experiment('g_error')


def extract(v):
    return v.data.storage().tolist()

print 'Starting my Restoration Adversarial Net...'

torch.manual_seed(123)
torch.cuda.manual_seed(123)

patchSize = 64
patches = patch()
Ejemplo n.º 12
0
def main():
    config = DefaultConfigs()
    train_input_root = os.path.join(config.data)
    train_labels_file = 'labels.csv'

    if config.output:
        if not os.path.exists(config.output):
            os.makedirs(config.output)
        output_base = config.output
    else:
        if not os.path.exists(config.output):
            os.makedirs(config.output)
        output_base = config.output

    exp_name = '-'.join([
        datetime.now().strftime("%Y%m%d-%H%M%S"), config.model,
        str(config.img_size), 'f' + str(config.fold)
    ])
    mask_exp_name = '-'.join(
        [config.model,
         str(config.img_size), 'f' + str(config.fold)])
    mask_exp_name = glob.glob(
        os.path.join(output_base, 'train', '*' + mask_exp_name))
    if config.resume and mask_exp_name:
        output_dir = mask_exp_name
    else:
        output_dir = get_outdir(output_base, 'train', exp_name)

    batch_size = config.batch_size
    test_batch_size = config.test_batch_size
    num_epochs = config.epochs
    img_type = config.image_type
    img_size = (config.img_size, config.img_size)
    num_classes = get_tags_size(config.labels)

    torch.manual_seed(config.seed)

    dataset_train = HumanDataset(
        train_input_root,
        train_labels_file,
        train=True,
        multi_label=config.multi_label,
        img_type=img_type,
        img_size=img_size,
        fold=config.fold,
    )

    #sampler = WeightedRandomOverSampler(dataset_train.get_sample_weights())

    loader_train = data.DataLoader(
        dataset_train,
        batch_size=batch_size,
        shuffle=True,
        #sampler=sampler,
        num_workers=config.num_processes)

    dataset_eval = HumanDataset(
        train_input_root,
        train_labels_file,
        train=False,
        multi_label=config.multi_label,
        img_type=img_type,
        img_size=img_size,
        test_aug=config.tta,
        fold=config.fold,
    )

    loader_eval = data.DataLoader(dataset_eval,
                                  batch_size=test_batch_size,
                                  shuffle=False,
                                  num_workers=config.num_processes)

    #    model = model_factory.create_model(
    #        config.model,
    #        pretrained=True,
    #        num_classes=num_classes,
    #        drop_rate=config.drop,
    #        global_pool=config.gp)

    model = get_net(config.model, num_classes, config.drop, config.channels)

    if not config.no_cuda:
        if config.num_gpu > 1:
            model = torch.nn.DataParallel(model,
                                          device_ids=list(range(
                                              config.num_gpu))).cuda()
        else:
            model.cuda()

    if config.opt.lower() == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=config.lr,
                              momentum=config.momentum,
                              weight_decay=config.weight_decay)
    elif config.opt.lower() == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=config.lr,
                               weight_decay=config.weight_decay)
    elif config.opt.lower() == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=config.lr,
                                   weight_decay=config.weight_decay)
    elif config.opt.lower() == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=config.lr,
                                  alpha=0.9,
                                  momentum=config.momentum,
                                  weight_decay=config.weight_decay)
    elif config.opt.lower() == 'yellowfin':
        optimizer = YFOptimizer(model.parameters(),
                                lr=config.lr,
                                weight_decay=config.weight_decay,
                                clip_thresh=2)
    else:
        assert False and "Invalid optimizer"

    if not config.decay_epochs:
        lr_scheduler = ReduceLROnPlateau(optimizer, patience=8)
    else:
        lr_scheduler = None

    if config.class_weights:
        class_weights = torch.from_numpy(
            dataset_train.get_class_weights()).float()
        class_weights_norm = class_weights / class_weights.sum()
        if not config.no_cuda:
            class_weights = class_weights.cuda()
            class_weights_norm = class_weights_norm.cuda()
    else:
        class_weights = None
        class_weights_norm = None

    if config.loss.lower() == 'nll':
        #assert not args.multi_label and 'Cannot use crossentropy with multi-label target.'
        loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights)
    elif config.loss.lower() == 'mlsm':
        assert config.multi_label
        loss_fn = torch.nn.MultiLabelSoftMarginLoss(weight=class_weights)
    else:
        assert config and "Invalid loss function"

    if not config.no_cuda:
        loss_fn = loss_fn.cuda()

    # optionally resume from a checkpoint
    start_epoch = 1
    if config.resume:
        if os.path.isfile(config.resume):
            print("=> loading checkpoint '{}'".format(config.resume))
            checkpoint = torch.load(config.resume)
            config.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                config.resume, checkpoint['epoch']))
            start_epoch = checkpoint['epoch']
        else:
            print("=> no checkpoint found at '{}'".format(config.resume))
            exit(-1)

    use_tensorboard = not config.no_tb and CrayonClient is not None
    if use_tensorboard:
        hostname = '127.0.0.1'
        port = 8889
        host_port = config.tbh.split(':')[:2]
        if len(host_port) == 1:
            hostname = host_port[0]
        elif len(host_port) >= 2:
            hostname, port = host_port[:2]
        try:
            cc = CrayonClient(hostname=hostname, port=port)
            try:
                cc.remove_experiment(exp_name)
            except ValueError:
                pass
            exp = cc.create_experiment(exp_name)
        except Exception as e:
            exp = None
            print(
                "Error (%s) connecting to Tensoboard/Crayon server. Giving up..."
                % str(e))
    else:
        exp = None

    # Optional fine-tune of only the final classifier weights for specified number of epochs (or part of)
    if not config.resume and config.ft_epochs > 0.:
        if config.opt.lower() == 'adam':
            finetune_optimizer = optim.Adam(model.get_fc().parameters(),
                                            lr=config.ft_lr,
                                            weight_decay=config.weight_decay)
        else:
            finetune_optimizer = optim.SGD(model.get_fc().parameters(),
                                           lr=config.ft_lr,
                                           momentum=config.momentum,
                                           weight_decay=config.weight_decay)

        finetune_epochs_int = int(np.ceil(config.ft_epochs))
        finetune_final_batches = int(
            np.ceil((1 - (finetune_epochs_int - config.ft_epochs)) *
                    len(loader_train)))
        print(finetune_epochs_int, finetune_final_batches)
        for fepoch in range(1, finetune_epochs_int + 1):
            if fepoch == finetune_epochs_int and finetune_final_batches:
                batch_limit = finetune_final_batches
            else:
                batch_limit = 0
            train_epoch(fepoch,
                        model,
                        loader_train,
                        finetune_optimizer,
                        loss_fn,
                        config,
                        class_weights_norm,
                        output_dir,
                        batch_limit=batch_limit)
            step = fepoch * len(loader_train)
            score, _ = validate(step, model, loader_eval, loss_fn, config, 0.3,
                                output_dir)

    score_metric = 'f2'
    best_loss = None
    best_f2 = None
    threshold = 0.2
    try:
        for epoch in range(start_epoch, num_epochs + 1):
            if config.decay_epochs:
                adjust_learning_rate(optimizer,
                                     epoch,
                                     initial_lr=config.lr,
                                     decay_epochs=config.decay_epochs)

            train_metrics = train_epoch(epoch,
                                        model,
                                        loader_train,
                                        optimizer,
                                        loss_fn,
                                        config,
                                        class_weights_norm,
                                        output_dir,
                                        exp=exp)

            step = epoch * len(loader_train)
            eval_metrics, latest_threshold = validate(step,
                                                      model,
                                                      loader_eval,
                                                      loss_fn,
                                                      config,
                                                      threshold,
                                                      output_dir,
                                                      exp=exp)

            if lr_scheduler is not None:
                lr_scheduler.step(eval_metrics['eval_loss'])

            rowd = OrderedDict(epoch=epoch)
            rowd.update(train_metrics)
            rowd.update(eval_metrics)
            with open(os.path.join(output_dir, 'summary.csv'), mode='a') as cf:
                dw = csv.DictWriter(cf, fieldnames=rowd.keys())
                if best_loss is None:  # first iteration (epoch == 1 can't be used)
                    dw.writeheader()
                dw.writerow(rowd)

            best = False
            if best_loss is None or eval_metrics['eval_loss'] < best_loss[1]:
                best_loss = (epoch, eval_metrics['eval_loss'])
                if score_metric == 'loss':
                    best = True
            if best_f2 is None or eval_metrics['eval_f2'] > best_f2[1]:
                best_f2 = (epoch, eval_metrics['eval_f2'])
                if score_metric == 'f2':
                    best = True

            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': config.model,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'threshold': latest_threshold,
                    'config': config
                },
                is_best=best,
                filename=os.path.join(config.checkpoint_path,
                                      'checkpoint-%d.pth.tar' % epoch),
                output_dir=output_dir)

    except KeyboardInterrupt:
        pass
    print('*** Best loss: {0} (epoch {1})'.format(best_loss[1], best_loss[0]))
    print('*** Best f2: {0} (epoch {1})'.format(best_f2[1], best_f2[0]))
Ejemplo n.º 13
0
import argparse
parser = argparse.ArgumentParser(description='Lets win charades')
parser.add_argument('-name', type=str, required=False, default="No name provided", help='Name of experiment')
parser.add_argument('-resume', type=str, required=False, default=None, help='Path to resume model')

args = parser.parse_args()
print(args.name)

if config.USE_GPU:
    torch.cuda.set_device(config.TORCH_DEVICE)
cc = None
if config.LOG:
    from pycrayon import CrayonClient
    os.system('')
    cc = CrayonClient(hostname="server_machine_address")

from models.inflated_inception_attention import InceptionAttention
net = InceptionAttention()

from config import *
from utils import *

actionClassifier = getActionClassifier() 

device_ids=[0, 1, 2, 3]
def parallel(var, device_ids=[0, 1, 2, 3]):
    return torch.nn.DataParallel(var, device_ids=device_ids)

# Resume training from pretrained model
resume_epoch = 0
Ejemplo n.º 14
0
        h = self.bn(h)
        for f in self.res:
            h = f(h)
        y = self.fc(h)
        return y


if __name__ == '__main__':
    # GPUフラグ
    gpu_fg = util.gpuCheck(sys.argv)
    if gpu_fg >= 0:
        cuda.check_cuda_available()
    xp = cuda.cupy if gpu_fg >= 0 else np

    # pycrayon 初期化
    cc = CrayonClient(hostname="192.168.1.90", port=8889)
    # delete this experiment from the server
    try:
        cc.remove_experiment("ResNet train")
        cc.remove_experiment("ResNet test")
    except:
        pass

    # create a new experiment
    try:
        tb_res_train = cc.create_experiment("ResNet train")
        tb_res_test = cc.create_experiment("ResNet test")
    except:
        tb_res_train = cc.open_experiment("ResNet train")
        tb_res_test = cc.open_experiment("ResNet test")
Ejemplo n.º 15
0
z_dim = 100
h_dim = 128
x_dim_w, x_dim_h = train_loader.dataset.train_data.size()[1:3]
x_dim = x_dim_w * x_dim_h
train_size = train_loader.dataset.train_data.size()[0]
y_dim = 10
lr = 1e-3
cnt = 0
display_cnt = 100
iter = 2
nets_num = 10

cuda = False
netD_continue_trian = True

cc = CrayonClient(hostname="localhost")
cc.remove_all_experiments()
D_exp = create_sigle_experiment(cc, 'D_loss')
D_preb_real = create_sigle_experiment(cc, 'preb_real')
D_preb_fake = create_sigle_experiment(cc, 'preb_fake')
G_exps = create_experiments(cc, 10)

netG_indeps = create_nets(config['G'][2], z_dim, nets_num)
netG_share = build_netG(config['G'][3], h_dim)
netD = build_netD(config['D'][2], x_dim)

print netG_indeps
print netG_share

init_network(netG_share)
init_network(netG_indeps)
Ejemplo n.º 16
0
        ######################################
        y = self.fc8(h)
        ######################################

        return y


if __name__ == '__main__':
    # GPUフラグ
    gpu_fg = util.gpuCheck(sys.argv)
    if gpu_fg >= 0:
        cuda.check_cuda_available()
    xp = cuda.cupy if gpu_fg >= 0 else np

    # pycrayon 初期化
    cc = CrayonClient(hostname="192.168.1.198", port=8889)
    # delete this experiment from the server
    try:
        cc.remove_experiment("AlexNet train (Adam)")
        cc.remove_experiment("AlexNet test (Adam)")
    except:
        pass

    # create a new experiment
    try:
        tb_alex_train = cc.create_experiment("AlexNet train (Adam)")
        tb_alex_test = cc.create_experiment("AlexNet test (Adam)")
    except:
        tb_alex_train = cc.open_experiment("AlexNet train (Adam)")
        tb_alex_test = cc.open_experiment("AlexNet test (Adam)")
Ejemplo n.º 17
0
    def parse(self):

        opt = self.gather_options()
        # opt.isTrain = self.isTrain   # train or test

        model = opt.model_name
        dataset_name = opt.dataset #dataset name - used for saving model file
        exp = 'v7-{}-{}-{}/'.format(dataset_name, model, datetime.now().strftime('exp-%m-%d_%H-%M'))
        expr_dir = './saved_models/{}/'.format(exp) #model files are saved here

        opt.crop_size = map(int, opt.crop_size.split('x'))

        if opt.save_model_para and not os.path.exists(expr_dir):
            makedirs('./saved_models')
            makedirs(expr_dir)
            makedirs(expr_dir+'./sup/')

        else:
            expr_dir = './temp1/'
            makedirs(expr_dir)
            if not os.path.exists(expr_dir+'./sup/'):
                os.mkdir(expr_dir+'./sup/')

        opt.expr_dir = expr_dir

        logger = logging.getLogger()
        fh = logging.FileHandler("{0}/{1}.log".format(expr_dir, 'log'), mode='w')
        fh.setFormatter(logging.Formatter(fmt="%(asctime)s  %(message)s", datefmt="%d-%H:%M"))
        logger.addHandler(fh)
        opt.logger =  logger

        self.opt = opt

        #Tensorboard  config
        use_tensorboard = opt.use_tensorboard
        remove_all_log = False   # remove all historical experiments in TensorBoardO
        use_tensorboard = use_tensorboard and CrayonClient is not None
        self.vis_exp = None

        if use_tensorboard:
            cc = CrayonClient(hostname='8.8.8.8', port=7879)
            if remove_all_log:
                cc.remove_all_experiments()
            random.seed(time.time())
            vis_exp_name = exp + str(random.random())
            opt.vis_exp_name = vis_exp_name
            self.vis_exp = cc.create_experiment(vis_exp_name)

        import socket
        hostname = socket.gethostname()

        # set gpu ids
        str_ids = opt.gpus.split(',')
        opt.gpus = []
        for str_id in str_ids:
            id = int(str_id)
            if id >= 0:
                opt.gpus.append(id)
        if len(opt.gpus) > 0:
            torch.cuda.set_device(opt.gpus[0])

        self.opt = opt
        self.print_options(opt)

        return self.opt
Ejemplo n.º 18
0
class CrayonWrapper:
    """
    Wraps PyCrayon (https://github.com/torrvision/crayon), a language-agnostic interface to TensorBoard.
    """
    def __init__(self,
                 name,
                 runs_distributed,
                 runs_cluster,
                 chief_handle,
                 path_log_storage=None,
                 crayon_server_address="localhost"):
        self._name = name
        self._path_log_storage = path_log_storage
        if path_log_storage is not None:
            create_dir_if_not_exist(path_log_storage)

        self._chief_handle = chief_handle
        self._crayon = CrayonClient(hostname=crayon_server_address)
        self._experiments = {}
        self.clear()
        self._custom_logs = {
        }  # dict of exps containing dict of graph names containing lists of {step: val, } dicts

        self._ray = MaybeRay(runs_distributed=runs_distributed,
                             runs_cluster=runs_cluster)

    @property
    def name(self):
        return self._name

    @property
    def path_log_storage(self):
        return self._path_log_storage

    def clear(self):
        """
        Does NOT clear crayon's internal experiment logs and files.
        """
        self._experiments = {}

    def export_all(self, iter_nr):
        """
        Exports all logs of the current run in Tensorboard's format and as json files.
        """
        if self._path_log_storage is not None:
            path_crayon = ospj(self._path_log_storage, str(self._name),
                               str(iter_nr), "crayon")
            path_json = ospj(self._path_log_storage, str(self._name),
                             str(iter_nr), "as_json")
            create_dir_if_not_exist(path=path_crayon)
            create_dir_if_not_exist(path=path_json)
            for e in self._experiments.values():
                e.to_zip(filename=ospj(path_crayon, e.xp_name + ".zip"))
                write_dict_to_file_json(dictionary=self._custom_logs,
                                        _dir=path_json,
                                        file_name="logs")

    def update_from_log_buffer(self):
        """
        Pulls newly added logs from the chief onto whatever worker CrayonWrapper runs on. It then adds all these new
        logs to Tensorboard (i.e. PyCrayon's docker container)
        """
        new_v, exp_names = self._get_new_vals()

        for e in exp_names:
            if e not in self._experiments.keys():
                self._custom_logs[e] = {}
                try:
                    self._experiments[e] = self._crayon.create_experiment(
                        xp_name=e)
                except ValueError:
                    self._crayon.remove_experiment(xp_name=e)
                    self._experiments[e] = self._crayon.create_experiment(
                        xp_name=e)

        for name, vals_dict in new_v.items():
            for graph_name, data_points in vals_dict.items():

                for data_point in data_points:
                    step = int(data_point[0])
                    val = data_point[1]

                    self._experiments[name].add_scalar_value(name=graph_name,
                                                             step=step,
                                                             value=val)
                    if graph_name not in self._custom_logs[name].keys():
                        self._custom_logs[name][graph_name] = []

                    self._custom_logs[name][graph_name].append({step: val})

    def _get_new_vals(self):
        """
        Returns:
            dict: Pulls and returns newly added logs from the chief onto whatever worker CrayonWrapper runs on.
        """
        return self._ray.get(
            self._ray.remote(self._chief_handle.get_new_values))
Ejemplo n.º 19
0
def remove_all_experiments(hostname, port):
    '''
    DANGER: don't use this, unless you're sure
    '''
    tb = CrayonClient(hostname=hostname, port=port)
    tb.remove_all_experiments()
Ejemplo n.º 20
0
Archivo: run.py Proyecto: we1l1n/atis
def train(model, data, params):
    """ Trains a model.

    Inputs:
        model (ATISModel): The model to train.
        data (ATISData): The data that is used to train.
        params (namespace): Training parameters.
    """
    # Get the training batches.
    log = Logger(os.path.join(params.logdir, params.logfile), "w")
    num_train_original = atis_data.num_utterances(data.train_data)
    log.put("Original number of training utterances:\t"
            + str(num_train_original))

    eval_fn = evaluate_utterance_sample
    trainbatch_fn = data.get_utterance_batches
    trainsample_fn = data.get_random_utterances
    validsample_fn = data.get_all_utterances
    batch_size = params.batch_size
    if params.interaction_level:
        batch_size = 1
        eval_fn = evaluate_interaction_sample
        trainbatch_fn = data.get_interaction_batches
        trainsample_fn = data.get_random_interactions
        validsample_fn = data.get_all_interactions

    maximum_output_length = params.train_maximum_sql_length
    train_batches = trainbatch_fn(batch_size,
                                  max_output_length=maximum_output_length,
                                  randomize=not params.deterministic)

    if params.num_train >= 0:
        train_batches = train_batches[:params.num_train]

    training_sample = trainsample_fn(params.train_evaluation_size,
                                     max_output_length=maximum_output_length)
    valid_examples = validsample_fn(data.valid_data,
                                    max_output_length=maximum_output_length)

    num_train_examples = sum([len(batch) for batch in train_batches])
    num_steps_per_epoch = len(train_batches)

    log.put(
        "Actual number of used training examples:\t" +
        str(num_train_examples))
    log.put("(Shortened by output limit of " +
            str(maximum_output_length) +
            ")")
    log.put("Number of steps per epoch:\t" + str(num_steps_per_epoch))
    log.put("Batch size:\t" + str(batch_size))

    print(
        "Kept " +
        str(num_train_examples) +
        "/" +
        str(num_train_original) +
        " examples")
    print(
        "Batch size of " +
        str(batch_size) +
        " gives " +
        str(num_steps_per_epoch) +
        " steps per epoch")

    # Keeping track of things during training.
    epochs = 0
    patience = params.initial_patience
    learning_rate_coefficient = 1.
    previous_epoch_loss = float('inf')
    maximum_validation_accuracy = 0.
    maximum_string_accuracy = 0.
    crayon = CrayonClient(hostname="localhost")
    experiment = crayon.create_experiment(params.logdir)

    countdown = int(patience)

    keep_training = True
    while keep_training:
        log.put("Epoch:\t" + str(epochs))
        model.set_dropout(params.dropout_amount)
        model.set_learning_rate(
            learning_rate_coefficient *
            params.initial_learning_rate)

        # Run a training step.
        if params.interaction_level:
            epoch_loss = train_epoch_with_interactions(
                train_batches,
                params,
                model,
                randomize=not params.deterministic)
        else:
            epoch_loss = train_epoch_with_utterances(
                train_batches,
                model,
                randomize=not params.deterministic)

        log.put("train epoch loss:\t" + str(epoch_loss))
        experiment.add_scalar_value("train_loss", epoch_loss, step=epochs)

        model.set_dropout(0.)

        # Run an evaluation step on a sample of the training data.
        train_eval_results = eval_fn(training_sample,
                                     model,
                                     params.train_maximum_sql_length,
                                     "train-eval",
                                     gold_forcing=True,
                                     metrics=TRAIN_EVAL_METRICS)[0]

        for name, value in train_eval_results.items():
            log.put(
                "train final gold-passing " +
                name.name +
                ":\t" +
                "%.2f" %
                value)
            experiment.add_scalar_value(
                "train_gold_" + name.name, value, step=epochs)

        # Run an evaluation step on the validation set.
        valid_eval_results = eval_fn(valid_examples,
                                     model,
                                     "valid-eval",
                                     gold_forcing=True,
                                     metrics=VALID_EVAL_METRICS)[0]
        for name, value in valid_eval_results.items():
            log.put("valid gold-passing " + name.name + ":\t" + "%.2f" % value)
            experiment.add_scalar_value(
                "valid_gold_" + name.name, value, step=epochs)

        valid_loss = valid_eval_results[Metrics.LOSS]
        valid_token_accuracy = valid_eval_results[Metrics.TOKEN_ACCURACY]
        string_accuracy = valid_eval_results[Metrics.STRING_ACCURACY]

        if valid_loss > previous_epoch_loss:
            learning_rate_coefficient *= params.learning_rate_ratio
            log.put(
                "learning rate coefficient:\t" +
                str(learning_rate_coefficient))
        experiment.add_scalar_value(
            "learning_rate",
            learning_rate_coefficient,
            step=epochs)
        previous_epoch_loss = valid_loss
        saved = False
        if valid_token_accuracy > maximum_validation_accuracy:
            saved = True
            maximum_validation_accuracy = valid_token_accuracy
            patience = patience * params.patience_ratio
            countdown = int(patience)
            last_save_file = os.path.join(params.logdir, "save_" + str(epochs))
            model.save(last_save_file)

            log.put("maximum accuracy:\t" + str(maximum_validation_accuracy))
            log.put("patience:\t" + str(patience))
            log.put("save file:\t" + str(last_save_file))
        if not saved and string_accuracy > maximum_string_accuracy:
            maximum_string_accuracy = string_accuracy
            log.put(
                "maximum string accuracy:\t" +
                str(maximum_string_accuracy))
            last_save_file = os.path.join(params.logdir, "save_" + str(epochs))
            model.save(last_save_file)

        send_slack_message(
            username=params.logdir,
            message="Epoch " +
            str(epochs) +
            ": " +
            str(string_accuracy) +
            " validation accuracy; countdown is " +
            str(countdown),
            channel="models")

        if countdown <= 0:
            keep_training = False

        countdown -= 1
        log.put("countdown:\t" + str(countdown))
        experiment.add_scalar_value("countdown", countdown, step=epochs)
        log.put("")

        epochs += 1

    log.put("Finished training!")
    send_slack_message(username=params.logdir,
                       message="Done training!!",
                       channel="@alsuhr")
    log.close()

    return last_save_file
Ejemplo n.º 21
0
import scipy.stats as st
import numpy as np

from pavooc.scoring.feature_extraction import extract_features, \
    split_test_train_valid, normalize_features
from pavooc.scoring.azimuth_dataset import load_dataset
from pavooc.scoring.dataloader import DataLoader
from pavooc.config import BATCH_SIZE, WEIGHTS_DIR, \
    CONSERVATION_FEATURES_FILE, SCALER_FILE, DATADIR

if cuda.is_available():
    import torch.backends.cudnn as cudnn
    cudnn.benchmark = True

try:
    crayon = CrayonClient(hostname="localhost", port=8889)
except (ValueError, RuntimeError):
    crayon = None

try:
    os.mkdir(WEIGHTS_DIR)
except FileExistsError:
    pass


def to_np(x):
    return x.data.cpu().numpy()


def _init_model(feature_length, model_class, loss, learning_rate):
    model = model_class(feature_length)
Ejemplo n.º 22
0
    return parser.parse_args()


if __name__ == '__main__':
    args = __pars_args__()

    master_net = DFP_Network(
        (args.env_size**2) *
        3,  # observation_size = (args.env_size*args.env_size)*3 = battel_ground*colors
        num_offset=len(args.offset),
        a_size=args.action_space,
        num_measurements=args.num_measurements,
        is_master=True)
    master_net.share_memory()
    cc = CrayonClient(hostname="localhost")
    # cc.remove_all_experiments()

    processes = []
    # p = mp.Process(target=work, args=(0, args, master_net, exp_buff, optimizer))      eval net
    # p.start()
    # processes.append(p)

    for rank in range(0, args.num_processes):
        # for rank in range(0, 1):
        p = mp.Process(target=work, args=(rank, args, master_net, cc, None))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
Ejemplo n.º 23
0
                if rnd <= 0: break
            res.append(i)
            cw = i
            if cw == stop: break
            if nchars and len(res) > nchars: break
        return res

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('corpus', help='Path to the corpus file.')
    parser.add_argument('crayserver', help='Server location for crayon.')
    parser.add_argument('expname', help='Experiment name')
    args = parser.parse_args()

    # Connect to the server
    cc = CrayonClient(hostname=args.crayserver)

    #Create a new experiment
    myexp = cc.create_experiment(args.expname)

    train = util.CharsCorpusReader(args.corpus, begin="<s>")
    vocab = util.Vocab.from_corpus(train)
    
    VOCAB_SIZE = vocab.size()

    model = dy.ParameterCollection()
    trainer = dy.SimpleSGDTrainer(model)

    #lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.SimpleRNNBuilder)
    lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=dy.LSTMBuilder)
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import copy
import os
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from fine_tuning_config_file import *

## If you want to keep a track of your network on tensorboard, set USE_TENSORBOARD TO 1 in config file.

if USE_TENSORBOARD:
    from pycrayon import CrayonClient
    cc = CrayonClient(hostname=TENSORBOARD_SERVER)
    try:
        cc.remove_experiment(EXP_NAME)
    except:
        pass
    foo = cc.create_experiment(EXP_NAME)


## If you want to use the GPU, set GPU_MODE TO 1 in config file

use_gpu = GPU_MODE
if use_gpu:
    torch.cuda.set_device(CUDA_DEVICE)

count=0
Ejemplo n.º 25
0
def eval_loop(counter, args, shared_model, model_eval):
    try:
        SEC_PER_DAY = 24 * 60 * 60

        env = build_env(args.type,
                        args,
                        treat_life_lost_as_terminal=False,
                        max_time=5 * 60)
        model = copy.deepcopy(shared_model)
        model.eval()

        # Create a new experiment
        vis = visdom.Visdom(env='A3C:' + args.name)

        cc = CrayonClient()
        names = cc.get_experiment_names()
        summaries = []
        for idx in range(args.n_eval):
            name = "{} [{}]".format(args.name, idx + 1)
            if name in names:
                cc.remove_experiment(name)
            summaries.append(cc.create_experiment(name))

        max_reward = None
        save_condition = args.save_intervel

        rewards = []
        start_time = time.time()
        while True:
            # Sync with the shared model
            model.load_state_dict(shared_model.state_dict())

            restart, eval_start_time, eval_start_step = False, time.time(
            ), counter.value
            results = []
            for i in range(args.n_eval):
                model.reset_state()
                results.append(model_eval(model, env, vis=(vis, i + 1, 60)))
                if env.exceed_max:
                    restart = True
                    env.reset()
                    break
                env.reset()

            if restart:
                continue

            eval_end_time, eval_end_step = time.time(), counter.value
            results = EvalResult(*zip(*results))
            rewards.append((counter.value, results.reward))

            local_max_reward = np.max(results.reward)
            if max_reward is None or max_reward < local_max_reward:
                max_reward = local_max_reward

            if local_max_reward >= max_reward:
                # Save model
                torch.save(model.state_dict(),
                           os.path.join(args.model_path, 'best_model.pth'))

            time_since_start = eval_end_time - start_time
            day = time_since_start // SEC_PER_DAY
            time_since_start %= SEC_PER_DAY

            seconds_to_finish = (args.n_steps - eval_end_step) / (
                eval_end_step - eval_start_step) * (eval_end_time -
                                                    eval_start_time)
            days_to_finish = seconds_to_finish // SEC_PER_DAY
            seconds_to_finish %= SEC_PER_DAY
            print("STEP:[{}|{}], Time: {}d {}, Finish in {}d {}".format(
                counter.value, args.n_steps, '%02d' % day,
                time.strftime("%Hh %Mm %Ss", time.gmtime(time_since_start)),
                '%02d' % days_to_finish,
                time.strftime("%Hh %Mm %Ss", time.gmtime(seconds_to_finish))))
            print(
                '\tMax reward: {}, avg_reward: {}, std_reward: {}, min_reward: {}, max_reward: {}'
                .format(max_reward, np.mean(results.reward),
                        np.std(results.reward), np.min(results.reward),
                        local_max_reward))

            # Plot
            for summary, reward in zip(summaries, results.reward):
                summary.add_scalar_value('reward',
                                         reward,
                                         step=eval_start_step)

            if counter.value > save_condition or counter.value >= args.n_steps:
                save_condition += args.save_intervel
                torch.save(
                    model.state_dict(),
                    os.path.join(args.model_path,
                                 'model_iter_{}.pth'.format(counter.value)))
                torch.save(model.state_dict(),
                           os.path.join(args.model_path, 'model_latest.pth'))

                with open(os.path.join(args.save_path, 'rewards'), 'a+') as f:
                    for record in rewards:
                        f.write('{}: {}\n'.format(record[0], record[1]))
                del rewards[:]

            if counter.value >= args.n_steps:
                print('Evaluator Finished !!!')
                break
    except KeyboardInterrupt:
        torch.save(shared_model.state_dict(),
                   os.path.join(args.model_path, 'model_latest.pth'))
        raise
Ejemplo n.º 26
0
                'label': prob
            },
                                     ignore_index=True)  # for proba

    df_pred['id'].astype(int)
    return df_pred


if __name__ == '__main__':

    # tensorboad
    use_tensorboard = False
    # use_tensorboard = True and CrayonClient is not None

    if use_tensorboard == True:
        cc = CrayonClient(hostname='http://192.168.0.3')
        # cc.remove_all_experiments()

    trainloader, valloader, trainset, valset, classes, class_to_idx, num_to_class, df = loadDB(
        args)
    print('Çlasses {}'.format(classes))
    models = ['senet']
    for i in range(1, 5):
        for m in models:
            runId = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
            fixSeed(args)
            model = selectModel(args, m)
            recorder = RecorderMeter(args.epochs)  # epoc is updated
            model_name = (type(model).__name__)

            exp_name = datetime.datetime.now().strftime(model_name + '_' +
Ejemplo n.º 27
0
def main():
    data_path = args.data
    model_name = args.model
    save_dir = args.save
    hidden_size = args.hidden_size
    pmnist = args.pmnist
    batch_size = args.batch_size
    max_iter = args.max_iter
    use_gpu = args.gpu

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    if pmnist:
        perm = torch.randperm(784)
    else:
        perm = torch.arange(0, 784).long()
    train_dataset = datasets.MNIST(root=data_path,
                                   train=True,
                                   transform=transforms.Compose([
                                       transforms.ToTensor(),
                                       transform_flatten,
                                       partial(transform_permute, perm=perm)
                                   ]),
                                   download=True)
    valid_dataset = datasets.MNIST(root=data_path,
                                   train=False,
                                   transform=transforms.Compose([
                                       transforms.ToTensor(),
                                       transform_flatten,
                                       partial(transform_permute, perm=perm)
                                   ]),
                                   download=True)

    tb_client = CrayonClient()
    tb_xp_name = '{}-{}'.format(datetime.now().strftime("%y%m%d-%H%M%S"),
                                save_dir)
    tb_xp_train = tb_client.create_experiment('{}/train'.format(tb_xp_name))
    tb_xp_valid = tb_client.create_experiment('{}/valid'.format(tb_xp_name))

    if model_name == 'bnlstm':
        model = LSTM(cell_class=BNLSTMCell,
                     input_size=1,
                     hidden_size=hidden_size,
                     batch_first=True,
                     max_length=784)
    elif model_name == 'lstm':
        model = LSTM(cell_class=LSTMCell,
                     input_size=1,
                     hidden_size=hidden_size,
                     batch_first=True)
    else:
        raise ValueError
    fc = nn.Linear(in_features=hidden_size, out_features=10)
    loss_fn = nn.CrossEntropyLoss()
    params = list(model.parameters()) + list(fc.parameters())
    optimizer = optim.RMSprop(params=params, lr=1e-3, momentum=0.9)

    def compute_loss_accuracy(data, label):
        hx = None
        if not pmnist:
            h0 = Variable(
                data.data.new(1, data.size(0), hidden_size).normal_(0, 0.1))
            c0 = Variable(
                data.data.new(1, data.size(0), hidden_size).normal_(0, 0.1))
            hx = (h0, c0)
        _, (h_n, _) = model(input_=data, hx=hx)
        logits = fc(h_n[0])
        loss = loss_fn(input=logits, target=label)
        accuracy = (logits.max(1)[1] == label).float().mean()
        return loss, accuracy

    if use_gpu:
        model.cuda()
        fc.cuda()

    iter_cnt = 0
    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              pin_memory=True)
    while iter_cnt < max_iter:
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  pin_memory=True)
        for train_batch in train_loader:
            train_data, train_label = train_batch
            train_data = Variable(train_data)
            train_label = Variable(train_label)
            if use_gpu:
                train_data = train_data.cuda()
                train_label = train_label.cuda()
            model.train(True)
            model.zero_grad()
            train_loss, train_accuracy = compute_loss_accuracy(
                data=train_data, label=train_label)
            train_loss.backward()
            clip_grad_norm(parameters=params, max_norm=1)
            optimizer.step()
            tb_xp_train.add_scalar_dict(data={
                'loss': train_loss.data[0],
                'accuracy': train_accuracy.data[0]
            },
                                        step=iter_cnt)

            if iter_cnt % 50 == 49:
                for valid_batch in valid_loader:
                    valid_data, valid_label = valid_batch
                    # Dirty, but don't get other solutions
                    break
                valid_data = Variable(valid_data, volatile=True)
                valid_label = Variable(valid_label, volatile=True)
                if use_gpu:
                    valid_data = valid_data.cuda()
                    valid_label = valid_label.cuda()
                model.train(False)
                valid_loss, valid_accuracy = compute_loss_accuracy(
                    data=valid_data, label=valid_label)
                tb_xp_valid.add_scalar_dict(data={
                    'loss': valid_loss.data[0],
                    'accuracy': valid_accuracy.data[0]
                },
                                            step=iter_cnt)
                save_path = '{}/{}'.format(save_dir, iter_cnt)
                torch.save(model, save_path)
            iter_cnt += 1
            if iter_cnt == max_iter:
                break
Ejemplo n.º 28
0
from pycrayon import CrayonClient
import time

# Connect to the server
cc = CrayonClient(hostname="127.0.0.1")

# Create a new experiment
foo = cc.create_experiment("foo")

# Send some scalar values to the server
foo.add_scalar_value("accuracy", 0, wall_time=11.3)
foo.add_scalar_value("accuracy", 4, wall_time=12.3)
# You can force the time and step values
foo.add_scalar_value("accuracy", 6, wall_time=13.3, step=4)

# Get the datas sent to the server
foo.get_scalar_values("accuracy")
# >> [[11.3, 0, 0.0], [12.3, 1, 4.0], [13.3, 4, 6.0]])

# backup this experiment as a zip file
filename = foo.to_zip()

# delete this experiment from the server
cc.remove_experiment("foo")
# using the `foo` object from now on will result in an error

# Create a new experiment based on foo's backup
bar = cc.create_experiment("bar", zip_file=filename)

# Get the name of all scalar plots in this experiment
bar.get_scalar_names()
Ejemplo n.º 29
0
if torch.cuda.is_available() and not opt.gpuid:
    print("WARNING: You have a CUDA device, should run with -gpuid 0")

if opt.gpuid:
    cuda.set_device(opt.gpuid[0])
    if opt.seed > 0:
        torch.cuda.manual_seed(opt.seed)

if len(opt.gpuid) > 1:
    sys.stderr.write("Sorry, multigpu isn't supported yet, coming soon!\n")
    sys.exit(1)

# Set up the Crayon logging server.
if opt.exp_host != "":
    from pycrayon import CrayonClient
    cc = CrayonClient(hostname=opt.exp_host)

    experiments = cc.get_experiment_names()
    print(experiments)
    if opt.exp in experiments:
        cc.remove_experiment(opt.exp)
    experiment = cc.create_experiment(opt.exp)


def report_func(epoch, batch, num_batches, start_time, lr, report_stats):
    """
    This is the user-defined batch-level traing progress
    report function.

    Args:
        epoch(int): current epoch count.
Ejemplo n.º 30
0
def main():
    parser = argparse.ArgumentParser(description='mcnn worldexp.')
    parser.add_argument('--preload', type=int, default=1)
    parser.add_argument('--data', type=str, default="/mnt/m2/mzcc/crowd_data/worldexpo", help='train, test, etc')
    args = parser.parse_args()
    method = 'mcnn'
    dataset_name = 'worldexpo'
    output_dir = './saved_models/'

    data_path = args.data
    train_path = data_path+'/train_frame'
    train_gt_path = data_path+'/train_dmap'
    train_mask_path = os.path.join(data_path,'train_roi')
    val_path = data_path+'/test_frame'
    val_gt_path = data_path+'/test_dmap'
    val_mask_path = os.path.join(data_path, 'test_roi')

    #training configuration
    start_step = 0
    end_step = 3000
    lr = 0.000001
    momentum = 0.9
    disp_interval = 500
    log_interval = 250


    #Tensorboard  config
    use_tensorboard = False
    save_exp_name = method + '_' + dataset_name + '_' + 'v1'
    remove_all_log = False   # remove all historical experiments in TensorBoard
    exp_name = None # the previous experiment name in TensorBoard

    # ------------
    rand_seed = 64678
    if rand_seed is not None:
        np.random.seed(rand_seed)
        torch.manual_seed(rand_seed)
        torch.cuda.manual_seed(rand_seed)


    # load net
    net = CrowdCounter()
    network.weights_normal_init(net, dev=0.01)
    # network.weights_xavier_init(net, gain=0.01)
    net.cuda()
    net.train()

    params = list(net.parameters())
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr)

    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    # tensorboad
    use_tensorboard = use_tensorboard and CrayonClient is not None
    if use_tensorboard:
        cc = CrayonClient(hostname='127.0.0.1')
        if remove_all_log:
            cc.remove_all_experiments()
        if exp_name is None:
            exp_name = save_exp_name
            exp = cc.create_experiment(exp_name)
        else:
            exp = cc.open_experiment(exp_name)

    # training
    train_loss = 0
    step_cnt = 0
    re_cnt = False
    t = Timer()
    t.tic()

    data_loader = ExrImageDataLoader(train_path, train_gt_path, mask_path=train_mask_path,
                                     shuffle=True, gt_downsample=True, pre_load=args.preload)
    data_loader_val = ExrImageDataLoader(val_path, val_gt_path, mask_path=val_mask_path,
                                         shuffle=False, gt_downsample=True, pre_load=False)
    best_mae = 10000000

    for epoch in range(start_step, end_step+1):
        step = -1
        train_loss = 0
        for blob in data_loader:
            step = step + 1
            im_data = blob['data']
            gt_data = blob['gt_density']
            mask = blob['mask']
            density_map = net(im_data, gt_data, mask=mask)
            loss = net.loss
            train_loss += loss.item()#.data[0]
            step_cnt += 1
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if step % disp_interval == 0:
                print("current loss: {}".format(loss.item()))
                duration = t.toc(average=False)
                fps = step_cnt / duration
                gt_count = np.sum(gt_data)
                density_map = density_map.data.cpu().numpy()
                et_count = np.sum(density_map)
                utils.save_results(im_data,gt_data,density_map, output_dir)
                log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % (epoch,
                    step, 1./fps, gt_count,et_count)
                log_print(log_text, color='green', attrs=['bold'])
                re_cnt = True


            if re_cnt:
                t.tic()
                re_cnt = False

        if (epoch % 2 == 0):
            save_name = os.path.join(output_dir, '{}_{}_{}.h5'.format(method,dataset_name,epoch))
            network.save_net(save_name, net)
            #calculate error on the validation dataset
            mae,mse = evaluate_model(save_name, data_loader_val)
            if mae < best_mae:
                best_mae = mae
                best_mse = mse
                best_model = '{}_{}_{}.h5'.format(method,dataset_name,epoch)
            log_text = 'EPOCH: %d, MAE: %.1f, MSE: %0.1f' % (epoch,mae,mse)
            log_print(log_text, color='green', attrs=['bold'])
            log_text = 'BEST MAE: %0.1f, BEST MSE: %0.1f, BEST MODEL: %s' % (best_mae,best_mse, best_model)
            log_print(log_text, color='green', attrs=['bold'])
            if use_tensorboard:
                exp.add_scalar_value('MAE', mae, step=epoch)
                exp.add_scalar_value('MSE', mse, step=epoch)
                exp.add_scalar_value('train_loss', train_loss/data_loader.get_num_samples(), step=epoch)
Ejemplo n.º 31
0
if opt.seed > 0:
    torch.manual_seed(opt.seed)

if torch.cuda.is_available() and not opt.gpus:
    print("WARNING: You have a CUDA device, should run with -gpus 0")

opt.gpus = range(opt.gpus)
if opt.gpus:
    #cuda.set_device(opt.gpus)
    if opt.seed > 0:
        torch.cuda.manual_seed(opt.seed)

# Set up the Crayon logging server.
if opt.log_server != "":
    from pycrayon import CrayonClient
    cc = CrayonClient(hostname=opt.log_server)

    experiments = cc.get_experiment_names()
    print(experiments)
    if opt.experiment_name in experiments:
        cc.remove_experiment(opt.experiment_name)
    experiment = cc.create_experiment(opt.experiment_name)


def eval(model, criterion, data, fert_dict):
    stats = onmt.Loss.Statistics()
    model.eval()
    loss = onmt.Loss.MemoryEfficientLoss(opt,
                                         model.generator,
                                         criterion,
                                         eval=True,
Ejemplo n.º 32
0
    print("WARNING: You have a CUDA device, should run with -gpuid 0")

if opt.gpuid:
    cuda.set_device(opt.gpuid[0])
    if opt.seed > 0:
        torch.cuda.manual_seed(opt.seed)

if len(opt.gpuid) > 1:
    sys.stderr.write("Sorry, multigpu isn't supported yet, coming soon!\n")
    sys.exit(1)

# Set up the Crayon logging server.
if opt.exp_host != "":
    from pycrayon import CrayonClient

    cc = CrayonClient(hostname=opt.exp_host)

    experiments = cc.get_experiment_names()
    print(experiments)
    if opt.exp in experiments:
        cc.remove_experiment(opt.exp)
    experiment = cc.create_experiment(opt.exp)

if opt.tensorboard:
    from tensorboardX import SummaryWriter
    writer = SummaryWriter(
        opt.tensorboard_log_dir + datetime.now().strftime("/%b-%d_%H-%M-%S"),
        comment="Onmt")

progress_step = 0