Example #1
0
    def __init__(self, vocab_size, device=DEVICE):
        super().__init__()
        self.vocab_size = vocab_size
        self.device = device
        self.embed = nn.Embedding(num_embeddings=vocab_size,
                                  embedding_dim=D_WORD).to(self.device)
        self.emb_dropout = nn.Dropout(P_DROP).to(self.device)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            self.rnn = nn.LSTM(
                input_size=D_WORD,
                hidden_size=D_HIDDEN // 2,  # bidirectional
                batch_first=True,
                dropout=P_DROP,
                bidirectional=True).to(self.device)
        # Initial cell and hidden state for each sequence
        hidden0_weights = torch.randn(D_HIDDEN // 2)
        self.hidden0 = nn.Parameter(hidden0_weights.to(self.device),
                                    requires_grad=True)
        cell0_weights = torch.randn(D_HIDDEN // 2)
        self.cell0 = nn.Parameter(cell0_weights.to(self.device),
                                  requires_grad=True)

        p_trainable, p_non_trainable = count_params(self)
        print(
            f'Text encoder params: trainable {p_trainable} - non_trainable {p_non_trainable}'
        )
Example #2
0
    def __init__(self):
        super().__init__()
        self.img = nn.Sequential(conv3x3(D_GF, 3), nn.Tanh())

        p_trainable, p_non_trainable = count_params(self)
        print(
            f'Image output params: trainable {p_trainable} - non_trainable {p_non_trainable}'
        )
Example #3
0
    def __init__(self):
        super().__init__()
        self.encoder = downscale16_encoder_block()
        self.logit = DiscriminatorLogitBlock()

        p_trainable, p_non_trainable = count_params(self)
        print(
            f'Discriminator64 params: trainable {p_trainable} - non_trainable {p_non_trainable}'
        )
Example #4
0
    def __init__(self):
        super().__init__()
        self.downscale_encoder_16 = downscale16_encoder_block()
        self.downscale_encoder_32 = downscale2_encoder_block(
            D_DF * 8, D_DF * 16)
        self.encoder32 = conv3x3_LReLU(D_DF * 16, D_DF * 8)
        self.logit = DiscriminatorLogitBlock()

        p_trainable, p_non_trainable = count_params(self)
        print(
            f'Discriminator128 params: trainable {p_trainable} - non_trainable {p_non_trainable}'
        )
Example #5
0
def init_model(args, field, logger, world_size, device):
    logger.info(f'Initializing {args.model}')
    Model = getattr(models, args.model)
    model = Model(field, args)
    params = get_trainable_params(model)
    num_param = count_params(params)
    logger.info(f'{args.model} has {num_param:,} trainable parameters')

    model.to(device)
    if world_size > 1:
        logger.info(f'Wrapping model for distributed')
        model = DistributedDataParallel(model)

    model.params = params
    return model
Example #6
0
def init_model(args, field, logger, world_size, device):
    logger.info(f'Initializing {args.model}')
    Model = getattr(models, args.model) 
    model = Model(field, args)
    params = get_trainable_params(model) 
    num_param = count_params(params)
    logger.info(f'{args.model} has {num_param:,} trainable parameters')

    model.to(device)
    if world_size > 1: 
        logger.info(f'Wrapping model for distributed')
        model = DistributedDataParallel(model)

    model.params = params
    return model
Example #7
0
    def __init__(self, use_self_attention=False):
        super().__init__()
        self.residuals = nn.Sequential(
            *[Residual(D_GF * 2) for _ in range(RESIDUALS)])
        self.attn = Attention(D_GF, D_HIDDEN)
        self.upsample = upsample_block(D_GF * 2, D_GF)
        self.use_self_attention = use_self_attention

        if self.use_self_attention:
            self.self_attn = self_attn_block()

        p_trainable, p_non_trainable = count_params(self)
        print(
            f'GeneratorN params: trainable {p_trainable} - non_trainable {p_non_trainable}'
        )
Example #8
0
def init_model(world_size):

    model = MultitaskQuestionAnsweringNetwork()
    if os.path.isfile('model.pth'):
        print('load pretrained model')
        model.load_state_dict(torch.load('model.pth'))
    else:
        print('new model ')
    params = get_trainable_params(model)
    num_param = count_params(params)
    print(f'model  has {num_param:,} parameters')
    if world_size > 1:
        print(f'Wrapping model for distributed')
        model = DistributedDataParallel(model)
    model.params = params
    return model
Example #9
0
    def __init__(self):
        super().__init__()
        self.d_gf = D_GF * 16
        self.fc = nn.Sequential(
            nn.Linear(D_Z + D_COND, self.d_gf * 4 * 4 * 2, bias=False),
            nn.BatchNorm1d(self.d_gf * 4 * 4 * 2),
            nn.modules.activation.GLU(dim=1))

        self.upsample_steps = nn.Sequential(*[
            upsample_block(self.d_gf // (2**i), self.d_gf // (2**(i + 1)))
            for i in range(4)
        ])

        p_trainable, p_non_trainable = count_params(self)
        print(
            f'Generator0 params: trainable {p_trainable} - non_trainable {p_non_trainable}'
        )
Example #10
0
def init_model(args, field, logger, world_size):
    logger.info(f'Initializing {args.model}')
    Model = getattr(models, args.model) 
    model = Model(field, args)
    # 模型初始化
    params = get_trainable_params(model) 
    num_param = count_params(params)
    # 计算模型参数个数
    logger.info(f'{args.model} has {num_param:,} parameters')

    if args.gpus[0] > -1:
        model.cuda()
        # 是否使用gpu设置的地方,如果设置为-1或者更负,就不使用gpu,只用cpu
    if world_size > 1: 
        logger.info(f'Wrapping model for distributed')
        model = DistributedDataParallel(model)

    model.params = params
    return model
Example #11
0
    def __init__(self, device=DEVICE):
        super().__init__()
        self.device = device
        self.inception_model = torchvision.models.inception_v3(
            pretrained=True).to(self.device).eval()
        # Freeze Inception V3 parameters
        freeze_params_(self.inception_model)
        # 768: the dimension of mixed_6e layer's sub-regions (768 x 289 [number of sub-regions, 17 x 17])
        self.local_proj = conv1x1(768, D_HIDDEN).to(self.device)
        # 2048: the dimension of last average pool's output
        self.global_proj = nn.Linear(2048, D_HIDDEN).to(self.device)

        self.local_proj.weight.data.uniform_(-IMG_WEIGHT_INIT_RANGE,
                                             IMG_WEIGHT_INIT_RANGE)
        self.global_proj.weight.data.uniform_(-IMG_WEIGHT_INIT_RANGE,
                                              IMG_WEIGHT_INIT_RANGE)

        p_trainable, p_non_trainable = count_params(self)
        print(
            f'Image encoder params: trainable {p_trainable} - non_trainable {p_non_trainable}'
        )
Example #12
0
File: nod.py Project: joeaortiz/nod
    def __init__(self,
                 embedding_dim,
                 input_dims,
                 hidden_dim,
                 num_slots,
                 encoder='cswm',
                 cnn_size='small',
                 decoder='broadcast',
                 trans_model='gnn',
                 identity_action=False,
                 residual=False,
                 canonical=False):
        super(NodModel, self).__init__()
        self.embedding_dim = embedding_dim
        self.input_dims = input_dims
        self.hidden_dims = hidden_dim

        self.num_slots = num_slots
        self.identity_action_flag = identity_action
        self.canonical = canonical

        if encoder == 'cswm':
            self.encoder = modules.EncoderCSWM(
                input_dims=self.input_dims,
                embedding_dim=self.embedding_dim,
                num_objects=self.num_slots,
                cnn_size=cnn_size)

        if trans_model == 'gnn':
            self.transition_model = modules.TransitionGNN(
                input_dim=self.embedding_dim,
                hidden_dim=512,
                action_dim=12,
                num_objects=self.num_slots,
                residual=residual)
        elif trans_model == 'attention':
            self.transition_model = attention.MultiHeadCondAttention(
                n_head=5,
                input_feature_dim=self.embedding_dim + 12,
                out_dim=self.embedding_dim,
                dim_k=128,
                dim_v=128)

        if decoder == 'broadcast':
            self.decoder = spd.BroadcastDecoder(
                latent_dim=self.embedding_dim,
                output_dim=4,  # 3 rgb channels and one mask
                hidden_channels=32,
                num_layers=4,
                img_dims=self.
                input_dims[1:],  # width and height of square image
                act_fn='elu')
        elif decoder == 'cnn':
            out_shape = self.input_dims
            out_shape[0] += 1
            self.decoder = modules.DecoderCNNMedium(
                input_dim=self.embedding_dim,
                hidden_dim=32,
                num_objects=self.num_slots,
                output_size=out_shape)

        print('Number of params in encoder ', util.count_params(self.encoder))
        print(f'Number of params in transition model ',
              util.count_params(self.transition_model))
        print('Number of params in decoder ', util.count_params(self.decoder))

        self.l2_loss = nn.MSELoss(reduction="mean")
Example #13
0
input_shape = obs['image1'].size()[1:]

model = nod.NodModel(
    embedding_dim=args.embedding_dim,
    input_dims=input_shape,
    hidden_dim=args.hidden_dim,
    num_slots=args.num_slots,
    encoder=args.encoder,
    cnn_size=args.cnn_size,
    trans_model=args.trans_model,
    decoder=args.decoder,
    identity_action=args.identity_action,
    residual=args.residual,
    canonical=args.canonical_rep)
model.to(device)
print('Number of parameters in model', util.count_params(model))

if args.checkpoint_path is not None:
    print("Loading model from %s" % args.checkpoint_path)
    util.custom_load(model, path=args.checkpoint_path)
else:
    print("Initialising random weights")
    model.apply(util.weights_init)

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=args.learning_rate)

# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=200, gamma=0.4)

now = datetime.datetime.now()
Example #14
0
def training(config, cla):
    g_global_step = tf.Variable(0, trainable=False, name=config['model']['type']+"_global_step")

    glr = config['optimizer']['lr']
    sess = tf.Session()

    # build model
    G = hparams.get_model(config['model']['type'])(config, sess)

    ## update params
    G_vars = [var for var in tf.trainable_variables() if config['model']['type'] in var.name]
    util.count_params(G_vars, config['model']['type'])
    util.total_params()

    g_learning_rate = tf.placeholder(tf.float32, [])

    g_ozer = hparams.get_optimizer(config['optimizer']['type'])(learn_rate=g_learning_rate)
    
    g_grad = g_ozer.compute_gradients(G.loss, G_vars)
    g_update = g_ozer.apply_gradients(g_grad, global_step=g_global_step)

    g_grad_fix = g_ozer.compute_gradients(G.loss_fix, G_vars)
    g_update_fix = g_ozer.apply_gradients(g_grad_fix, global_step=g_global_step)

    ## restore from checkpoint
    G_save_path = os.path.join(config['training']['path'], 'generat.ckpt')

    sess.run(tf.global_variables_initializer())

    G.load(G_save_path)

    history_file = os.path.join(config['training']['path'], 'history.txt')

    tr_dataset = get_dataset(config, 'tr')
    cv_dataset = get_dataset(config, 'cv')
    tr_next = tr_dataset.get_iterator()
    cv_next = cv_dataset.get_iterator()

    valid_best_sdr = float('-inf')
    valid_wait = 0

    if config['training']['perm_path'] != None:
        fixed_perm_list = util.read_pretrained_perm(config['training']['perm_path'], tr_dataset.file_base)

    last_step = sess.run(g_global_step)
    tr_audio_perm = {i:[] for i in range(20000)} if last_step == 0 else util.load_perm(config, 'tr', last_step, tr_dataset, 20000)

    for epoch in range(last_step//(20000//config['training']['batch_size'])+1, config['training']['num_epochs'] + 1):
        
        tr_loss = tr_size = tr_sdr = 0.0
        
        util.myprint(history_file, '-' * 20 + ' epoch {} '.format(epoch) + '-' * 20)

        ## training data initial
        if hasattr(tr_dataset, 'iterator'):
            sess.run(tr_dataset.iterator.initializer)
        else:
            tr_gen = tr_dataset.get_next()

        while True:
            try:
                feed_audio, audio_idx = sess.run(tr_next) if tr_next != None else next(tr_gen)

                if config['training']['pit'] == True:
                    g_loss, g_sdr, g_curr_step, _, g_perm_idx = sess.run(
                                            fetches=[G.loss, G.sdr, g_global_step, g_update, G.perm_idxs],
                                            feed_dict={G.audios: feed_audio, g_learning_rate: glr})

                elif config['training']['perm_path'] != None:
                    fixed_perm = np.take(fixed_perm_list, audio_idx, axis=0)
                    g_loss, g_sdr, g_curr_step, _, g_perm_idx = sess.run(
                                            fetches=[G.loss_fix, G.sdr_fix, g_global_step, g_update_fix, G.perm_idxs_fix],
                                            feed_dict={G.audios: feed_audio, g_learning_rate: glr, G.fixed_perm: fixed_perm})

                tr_loss += g_loss
                tr_sdr  += g_sdr
                tr_size += 1

                print('Train step {}: {} = {:5f}, sdr = {:5f}, lr = {}'.
                      format(g_curr_step, config['training']['loss'], g_loss, g_sdr, glr), end='\r')

                # record label assignment
                for _i, _id in enumerate(audio_idx):
                    tr_audio_perm[_id].append(g_perm_idx[_i].tolist())

            except (tf.errors.OutOfRangeError, StopIteration):
                util.myprint(history_file, 'Train step {}: {} = {:5f}, sdr = {:5f}, lr = {}'.
                            format(g_curr_step, config['training']['loss'], g_loss, g_sdr, glr))
                util.myprint(history_file, 'mean {} = {:5f} , mean sdr = {:5f}, lr = {}'.
                            format(config['training']['loss'], tr_loss/tr_size, tr_sdr/tr_size, glr))
                break

        ## valid iteration
        if hasattr(cv_dataset, 'iterator'):
            sess.run(cv_dataset.iterator.initializer)
        else:
            cv_gen = cv_dataset.get_next()

        cv_loss = cv_size = cv_sdr = 0.0
        while True:
            try:
                feed_audio, audio_idx = sess.run(cv_next) if cv_next != None else next(cv_gen)
                g_loss, g_sdr = sess.run(fetches=[G.loss, G.sdr], feed_dict={G.audios: feed_audio})

                cv_loss += g_loss
                cv_sdr  += g_sdr
                cv_size += 1

            except (tf.errors.OutOfRangeError, StopIteration):
                curr_loss = cv_loss/cv_size
                curr_sdr = cv_sdr/cv_size
                util.myprint(history_file, 'Valid '+ config['training']['loss'] +' = {:5f}, sdr = {}'.\
                                format(curr_loss, curr_sdr))
                
                ## save model for every improve of the best valid score
                ## or last epoch
                if curr_sdr > valid_best_sdr or epoch == config['training']['num_epochs']:
                    util.myprint(history_file, 'Save Model')
                    valid_wait = 0
                    valid_best_sdr = curr_sdr
                    G.save(G_save_path, g_curr_step)

                else:
                    valid_wait += 1
                    if valid_wait == config['training']['half_lr_patience']:
                        glr /= 2; valid_wait = 0
                break

        util.write(os.path.join(config['training']['path'], 'tr_perm.csv'), tr_dataset.file_base, tr_audio_perm, epoch, config['training']['n_speaker'])
Example #15
0
def training(config, cla):
    g_global_step = tf.Variable(0,
                                trainable=False,
                                name=config['model']['type'] + "_global_step")

    glr = config['optimizer']['lr']
    sess = tf.Session()

    # build model
    G = hparams.get_model(config['model']['type'])(config, sess)

    ## update params
    G_vars = [
        var for var in tf.trainable_variables()
        if config['model']['type'] in var.name
    ]
    util.count_params(G_vars, config['model']['type'])
    util.total_params()

    g_learning_rate = tf.placeholder(tf.float32, [])

    g_ozer = hparams.get_optimizer(
        config['optimizer']['type'])(learn_rate=g_learning_rate)
    g_grad = g_ozer.compute_gradients(G.loss, G_vars)
    g_update = g_ozer.apply_gradients(g_grad, global_step=g_global_step)

    ## restore from checkpoint
    G_save_path = os.path.join(config['training']['path'], 'generat.ckpt')

    sess.run(tf.global_variables_initializer())

    G.load(G_save_path)

    history_file = os.path.join(config['training']['path'], 'history.txt')

    tr_dataset = get_dataset(config, 'tr')
    cv_dataset = get_dataset(config, 'cv')
    tr_next = tr_dataset.get_iterator()
    cv_next = cv_dataset.get_iterator()

    valid_best_sdr = float('-inf')
    valid_wait = 0

    for epoch in range(1, config['training']['num_epochs'] + 1):
        tr_loss = tr_size = tr_sdr = 0.0

        util.myprint(history_file,
                     '-' * 20 + ' epoch {} '.format(epoch) + '-' * 20)

        ## training data initial
        if hasattr(tr_dataset, 'iterator'):
            sess.run(tr_dataset.iterator.initializer)
        else:
            tr_gen = tr_dataset.get_next()

        while True:
            try:
                feed_audio, audio_idx = sess.run(
                    tr_next) if tr_next != None else next(tr_gen)

                g_loss, g_sdr, g_curr_step, _ = sess.run(
                    fetches=[G.loss, G.sdr, g_global_step, g_update],
                    feed_dict={
                        G.audios: feed_audio,
                        g_learning_rate: glr
                    })
                tr_loss += g_loss
                tr_sdr += g_sdr
                tr_size += 1

                print('Train step {}: {} = {:5f}, sdr = {:5f}, lr = {}'.format(
                    g_curr_step, config['training']['loss'], g_loss, g_sdr,
                    glr),
                      end='\r')

            except (tf.errors.OutOfRangeError, StopIteration):
                util.myprint(
                    history_file,
                    'Train step {}: {} = {:5f}, sdr = {:5f}, lr = {}'.format(
                        g_curr_step, config['training']['loss'], g_loss, g_sdr,
                        glr))
                util.myprint(
                    history_file,
                    'mean {} = {:5f} , mean sdr = {:5f}, lr = {}'.format(
                        config['training']['loss'], tr_loss / tr_size,
                        tr_sdr / tr_size, glr))
                break

        ## valid iteration
        if hasattr(cv_dataset, 'iterator'):
            sess.run(cv_dataset.iterator.initializer)
        else:
            cv_gen = cv_dataset.get_next()

        cv_loss = cv_size = cv_sdr = 0.0
        while True:
            try:
                feed_audio, audio_idx = sess.run(
                    cv_next) if cv_next != None else next(cv_gen)
                g_loss, g_sdr = sess.run(fetches=[G.loss, G.sdr],
                                         feed_dict={G.audios: feed_audio})

                cv_loss += g_loss
                cv_sdr += g_sdr
                cv_size += 1

            except (tf.errors.OutOfRangeError, StopIteration):
                curr_loss = cv_loss / cv_size
                curr_sdr = cv_sdr / cv_size
                util.myprint(history_file, 'Valid '+ config['training']['loss'] +' = {:5f}, sdr = {}'.\
                                format(curr_loss, curr_sdr))

                ## save model for every improve of the best valid score
                ## or last epoch
                if curr_sdr > valid_best_sdr or epoch == config['training'][
                        'num_epochs']:
                    util.myprint(history_file, 'Save Model')
                    valid_wait = 0
                    valid_best_sdr = curr_sdr
                    G.save(G_save_path, g_curr_step)

                else:
                    valid_wait += 1
                    if valid_wait == config['training']['half_lr_patience']:
                        glr /= 2
                        valid_wait = 0
                break
def main():
    global args, best_er1
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    for tgt_idx, tgt in enumerate(dataset_targets[args.dataset]):
        print("Training a model for {}".format(tgt))

        # Load data
        root = args.dataset_path if args.dataset_path else dataset_paths[
            args.dataset]
        task_type = args.dataset_type if args.dataset_type else dataset_types[
            args.dataset]
        if args.resume:
            resume_dir = args.resume.format(dataset=args.dataset,
                                            model=args.model,
                                            layers=args.layers,
                                            feature=tgt)
        #end if
        Model_Class = model_dict[args.model]

        print("Preparing dataset")
        node_features, edge_features, target_features, task_type, train_loader, valid_loader, test_loader = read_dataset(
            args.dataset, root, args.batch_size, args.prefetch)

        # Define model and optimizer

        print('\tCreate model')
        hidden_state_size = args.hidden
        model = Model_Class(node_features=node_features,
                            edge_features=edge_features,
                            target_features=1,
                            hidden_features=hidden_state_size,
                            num_layers=args.layers,
                            dropout=0.5,
                            type=task_type,
                            s2s_processing_steps=args.s2s)
        print("#Parameters: {param_count}".format(
            param_count=count_params(model)))

        print('Optimizer')
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        criterion, evaluation, metric_name, metric_compare, metric_best = get_metric_by_task_type(
            task_type, target_features)

        print('Logger')
        logger = Logger(
            args.log_path.format(dataset=args.dataset,
                                 model=args.model,
                                 layers=args.layers,
                                 feature=tgt))

        lr_step = (args.lr - args.lr * args.lr_decay) / (
            args.epochs * args.schedule[1] - args.epochs * args.schedule[0])

        # get the best checkpoint if available without training
        if args.resume:
            checkpoint_dir = resume_dir
            best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
            if not os.path.isdir(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            if os.path.isfile(best_model_file):
                print("=> loading best model '{}'".format(best_model_file))
                checkpoint = torch.load(best_model_file)
                args.start_epoch = checkpoint['epoch']
                best_acc1 = checkpoint['best_er1']
                model.load_state_dict(checkpoint['state_dict'])
                if args.cuda:
                    model.cuda()
                optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded best model '{}' (epoch {})".format(
                    best_model_file, checkpoint['epoch']))
            else:
                print("=> no best model found at '{}'".format(best_model_file))

        print('Check cuda')
        if args.cuda:
            print('\t* Cuda')
            model = model.cuda()
            criterion = criterion.cuda()

        # Epoch for loop
        for epoch in range(0, args.epochs):
            try:
                if epoch > args.epochs * args.schedule[
                        0] and epoch < args.epochs * args.schedule[1]:
                    args.lr -= lr_step
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = args.lr
                #end if

                # train for one epoch
                train(train_loader,
                      model,
                      criterion,
                      optimizer,
                      epoch,
                      evaluation,
                      logger,
                      target_range=(tgt_idx, ),
                      tgt_name=tgt,
                      metric_name=metric_name,
                      cuda=args.cuda,
                      log_interval=args.log_interval)

                # evaluate on test set
                er1 = validate(valid_loader,
                               model,
                               criterion,
                               evaluation,
                               logger,
                               target_range=(tgt_idx, ),
                               tgt_name=tgt,
                               metric_name=metric_name,
                               cuda=args.cuda,
                               log_interval=args.log_interval)

                is_best = metric_compare(er1, best_er1)
                best_er1 = metric_best(er1, best_er1)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'best_er1': best_er1,
                        'optimizer': optimizer.state_dict(),
                    },
                    is_best=is_best,
                    directory=resume_dir)

                # Logger step
                logger.log_value('learning_rate', args.lr).step()
            except KeyboardInterrupt:
                break
            #end try
        #end for

        # get the best checkpoint and test it with test set
        if args.resume:
            checkpoint_dir = resume_dir
            best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
            if not os.path.isdir(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            if os.path.isfile(best_model_file):
                print("=> loading best model '{}'".format(best_model_file))
                checkpoint = torch.load(best_model_file)
                args.start_epoch = checkpoint['epoch']
                best_acc1 = checkpoint['best_er1']
                model.load_state_dict(checkpoint['state_dict'])
                if args.cuda:
                    model.cuda()
                optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded best model '{}' (epoch {})".format(
                    best_model_file, checkpoint['epoch']))
            else:
                print("=> no best model found at '{}'".format(best_model_file))
            #end if
        #end if

        # (For testing)
        validate(test_loader,
                 model,
                 criterion,
                 evaluation,
                 target_range=(tgt_idx, ),
                 tgt_name=tgt,
                 metric_name=metric_name,
                 cuda=args.cuda,
                 log_interval=args.log_interval)
Example #17
0
def go(options):

    marker = itertools.cycle((',', '+', '.', 'o', '*'))

    SIZE = options.size
    EPOCHS = options.epochs
    BATCH_SIZE = options.batch_size
    TRAIN_SIZE = 60000 // BATCH_SIZE
    TEST_SIZE = 10000 // BATCH_SIZE

    CUDA = options.cuda

    # for modelname in ['relu', 'sigmoid', 'relu-lambda', 'sigmoid-lambda', 'relu-sigloss', 'sigmoid-sigloss', 'bn-relu', 'relu-bn', 'sigmoid-bn']:
    for modelname in ['relu-lambda', 'relu', 'relu-bn']:  #, 'linear-bn']:

        print('testing model ', modelname)
        model = load_model(modelname, size=SIZE, mult=options.mult)
        print(util.count_params(model), ' parameters')

        if CUDA:
            model.cuda()

        criterion = nn.MSELoss(size_average=False)
        optimizer = optim.Adam(model.parameters(), lr=options.learning_rate)

        accuracies = []
        results = {}

        for e in tqdm.trange(EPOCHS):
            for i in range(TRAIN_SIZE):

                x = torch.rand(BATCH_SIZE, SIZE)
                if CUDA:
                    x = x.cuda()
                x = Variable(x)
                x.requires_grad = False

                optimizer.zero_grad()

                y = model(x)

                loss = criterion(y, x)

                if 'lambda' in modelname:
                    lloss = sum(loss_terms(model, x))

                    # print(loss.data[0], lloss.data[0])
                    loss = loss + options.lambd * lloss

                if 'sigloss' in modelname:
                    lterms = loss_terms(model, x)
                    lloss = (1.0 / len(lterms)) * sum(
                        [nn.functional.sigmoid(l) for l in lterms])
                    loss = loss + options.lambd * lloss

                loss.backward()
                optimizer.step()

                # w.add_scalar('normalization/mloss', mloss.data[0], i * BATCH_SIZE + e)
                # w.add_scalar('normalization/lloss', lloss.data[0], i * BATCH_SIZE + e)
                #
                # if i > 2:
                #     break

            sm = 0
            for i in range(TEST_SIZE):

                x = torch.rand(BATCH_SIZE, +SIZE)
                if CUDA:
                    x = x.cuda()
                x = Variable(x)
                x.requires_grad = False

                y = model(x)
                loss = criterion(y, x)
                # print(loss.data[0] / BATCH_SIZE )

                sm += float(loss.data[0])

                # if i > 2:
                #     break

            accuracies.append(sm / (TEST_SIZE * BATCH_SIZE))

        # accuracies = np.asarray(accuracies)
        plt.plot(accuracies, label=modelname, marker=next(marker))
        results[modelname] = list(accuracies)

    plt.title('lambda ' + str(options.lambd))
    plt.legend()
    plt.savefig('loss-curves.pdf')

    pickle.dump(results, open('results.pkl', 'wb'))
Example #18
0
def go_learnrate(options):

    marker = itertools.cycle((',', '+', '.', 'o', '*'))

    SHAPE = options.size
    EPOCHS = options.epochs
    BATCH_SIZE = options.batch_size
    CUDA = options.cuda

    w = SummaryWriter()

    for learnrate in [
            0.000001, 0.000005, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005,
            0.01, 0.05
    ]:

        print('testing learning rate ', learnrate)
        model = load_model('relu', False)
        print(util.count_params(model), ' parameters')

        if CUDA:
            model.cuda()

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learnrate)

        accuracies = []
        results = {}

        for e in tqdm.trange(EPOCHS):
            for i, data in enumerate(trainloader, 0):

                # get the inputs
                inputs, labels = data

                if CUDA:
                    inputs, labels = inputs.cuda(), labels.cuda()

                # wrap them in Variable
                inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = model(inputs)

                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                # w.add_scalar('normalization/mloss', mloss.data[0], i * BATCH_SIZE + e)
                # w.add_scalar('normalization/lloss', lloss.data[0], i * BATCH_SIZE + e)
                #
                # if i > 2:
                #     break

            correct = 0
            total = 0
            for i, data in enumerate(testloader):

                inputs, labels = data

                if CUDA:
                    inputs, labels = inputs.cuda(), labels.cuda()

                # wrap them in Variable
                inputs, labels = Variable(inputs), Variable(labels)

                outputs = model(inputs)

                _, predicted = torch.max(outputs.data, 1)

                total += labels.size(0)
                correct += (predicted == labels.data).sum()

                # if i > 2:
                #     break

            accuracies.append(correct / total)

        # accuracies = np.asarray(accuracies)
        plt.plot(accuracies, label=str(learnrate), marker=next(marker))
        results[str(learnrate)] = list(accuracies)

    plt.title('learning rates ')
    plt.legend()
    plt.savefig('loss-curves-lr.pdf')

    pickle.dump(results, open('results-lr.pkl', 'wb'))
Example #19
0
def go_learnrate(options):

    marker = itertools.cycle((',', '+', '.', 'o', '*'))

    EPOCHS = options.epochs
    BATCH_SIZE = options.batch_size
    CUDA = options.cuda

    w = SummaryWriter()

    # Set up the dataset

    normalize = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train = torchvision.datasets.CIFAR10(root=options.data,
                                         train=True,
                                         download=True,
                                         transform=normalize)
    trainloader = torch.utils.data.DataLoader(train,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True,
                                              num_workers=2)
    test = torchvision.datasets.CIFAR10(root=options.data,
                                        train=False,
                                        download=True,
                                        transform=normalize)
    testloader = torch.utils.data.DataLoader(test,
                                             batch_size=BATCH_SIZE,
                                             shuffle=False,
                                             num_workers=2)

    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    for learnrate in [
            0.000001, 0.000005, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005,
            0.01, 0.05
    ]:

        print('testing learning rate ', learnrate)
        model = load_model('relu', False)
        print(util.count_params(model), ' parameters')

        if CUDA:
            model.cuda()

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learnrate)

        accuracies = []
        results = {}

        for e in tqdm.trange(EPOCHS):
            for i, data in enumerate(trainloader, 0):

                # get the inputs
                inputs, labels = data

                if CUDA:
                    inputs, labels = inputs.cuda(), labels.cuda()

                # wrap them in Variable
                inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = model(inputs)

                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                # w.add_scalar('normalization/mloss', mloss.data[0], i * BATCH_SIZE + e)
                # w.add_scalar('normalization/lloss', lloss.data[0], i * BATCH_SIZE + e)
                #
                # if i > 2:
                #     break

            correct = 0
            total = 0
            for i, data in enumerate(testloader):

                inputs, labels = data

                if CUDA:
                    inputs, labels = inputs.cuda(), labels.cuda()

                # wrap them in Variable
                inputs, labels = Variable(inputs), Variable(labels)

                outputs = model(inputs)

                _, predicted = torch.max(outputs.data, 1)

                total += labels.size(0)
                correct += (predicted == labels.data).sum()

                # if i > 2:
                #     break

            accuracies.append(correct / total)

        # accuracies = np.asarray(accuracies)
        plt.plot(accuracies, label=str(learnrate), marker=next(marker))
        results[str(learnrate)] = list(accuracies)

    plt.title('learning rates ')
    plt.legend()
    plt.savefig('loss-curves-lr.pdf')

    pickle.dump(results, open('results-lr.pkl', 'wb'))