コード例 #1
0
ファイル: train.py プロジェクト: jamie0725/Review-Generation
def train_fn(model):
    global_step = tf.train.create_global_step()

    trainable_vars = tf.trainable_variables()
    count_parameters(trainable_vars)

    optimizer = tf.compat.v1.train.AdamOptimizer(FLAGS.learning_rate)

    rating_l2_loss = tf.add_n([
        tf.nn.l2_loss(v) for v in trainable_vars
        if check_scope_rating(v.name) and 'bias' not in v.name
    ])
    model.rating_loss = model.rating_loss + FLAGS.lambda_reg * rating_l2_loss

    review_l2_loss = tf.add_n([
        tf.nn.l2_loss(v) for v in trainable_vars
        if check_scope_review(v.name) and 'bias' not in v.name
    ])
    model.review_loss = model.review_loss + FLAGS.lambda_reg * review_l2_loss

    update_rating = optimizer.minimize(model.rating_loss,
                                       name='update_rating',
                                       global_step=global_step)
    update_review = optimizer.minimize(model.review_loss, name='update_review')

    return update_rating, update_review, global_step
コード例 #2
0
def train_fn(loss):
    '''
  Calculate gradients and update parameters based on loss
  '''
    # get all trainaible variables
    trained_vars = tf.trainable_variables()

    # great utils function to calculate the parameters of the model and print them out
    count_parameters(trained_vars)

    # get gradients for parameter given loss
    gradients = tf.gradients(loss, trained_vars)

    # Gradient clipping (described in paper?): Clips values of multiple tensors by the ratio of the sum of their norms.
    clipped_grads, global_norm = tf.clip_by_global_norm(
        gradients, FLAGS.max_grad_norm)
    # save global norm
    tf.summary.scalar('global_grad_norm', global_norm)

    # Add gradients and vars to summary
    # for gradient, var in list(zip(clipped_grads, trained_vars)):
    #   if 'attention' in var.name:
    #     tf.summary.histogram(var.name + '/gradient', gradient)
    #     tf.summary.histogram(var.name, var)

    # Define optimizer
    # Returns and create (if necessary) the global step tensor.
    global_step = tf.train.get_or_create_global_step()
    # define the optimizer rmsprop; paper uses different optimizer: SGD with momentum 0.9
    optimizer = tf.train.RMSPropOptimizer(FLAGS.learning_rate)
    # get apply gradients operation
    train_op = optimizer.apply_gradients(zip(clipped_grads, trained_vars),
                                         name='train_op',
                                         global_step=global_step)
    return train_op, global_step
コード例 #3
0
ファイル: train.py プロジェクト: jamie0725/Review-Generation
def train_fn(model):
    global_step = tf.train.create_global_step()

    trainable_vars = tf.trainable_variables()
    count_parameters(trainable_vars)

    optimizer = tf.compat.v1.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1e-4)

    rating_l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in trainable_vars
                               if check_scope_rating(v.name) and 'bias' not in v.name])
    model.rating_loss = model.rating_loss + FLAGS.lambda_reg * rating_l2_loss

    review_l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in trainable_vars
                               if check_scope_review(v.name) and 'bias' not in v.name])
    model.review_loss = model.review_loss + FLAGS.lambda_reg * review_l2_loss

    # grad_rating = optimizer.compute_gradients(model.rating_loss)
    # grad_review = optimizer.compute_gradients(model.review_loss)
    # def ClipIfNotNone(grad):
    #         if grad is None:
    #             return grad
    #         return tf.clip_by_value(grad, -1., 1.)
    # clipped_grad_rating = [(ClipIfNotNone(grad), var) for grad, var in grad_rating]
    # clipeed_grad_review = [(ClipIfNotNone(grad), var) for grad, var in grad_review]
    # update_rating = optimizer.apply_gradients(clipped_grad_rating, name='update_rating', global_step=global_step)
    # update_review = optimizer.apply_gradients(clipeed_grad_review, name='update_review')
    update_rating = optimizer.minimize(
        model.rating_loss, name='update_rating', global_step=global_step)
    update_review = optimizer.minimize(model.review_loss, name='update_review')

    return update_rating, update_review, global_step
コード例 #4
0
def get_generater(args, config):
    # Import the model.
    model = __import__(config["model"])
    G = model.Generator(**config).to(config["device"])
    utils.count_parameters(G)

    utils.load_state_dict(G,
                          torch.load(args.weights_path),
                          strict=not args.not_strict)
    G.eval()
    return G
コード例 #5
0
ファイル: model_iq.py プロジェクト: strategist922/dl_signal
    def __init__(self,
                 input_dims,
                 hidden_size,
                 embed_dim,
                 output_dim,
                 num_heads,
                 attn_dropout,
                 relu_dropout,
                 res_dropout,
                 out_dropout,
                 layers,
                 attn_mask=False,
                 src_mask=False,
                 tgt_mask=False):
        super(TransformerGenerationModel, self).__init__()
        [self.orig_d_a, self.orig_d_b] = input_dims
        assert self.orig_d_a == self.orig_d_b
        self.d_a, self.d_b = 512, 512
        final_out = embed_dim * 2
        h_out = hidden_size
        self.num_heads = num_heads
        self.layers = layers
        self.attn_dropout = attn_dropout
        self.relu_dropout = relu_dropout
        self.res_dropout = res_dropout
        self.attn_mask = attn_mask
        self.embed_dim = embed_dim

        self.d_a, self.d_b = 512, 512
        self.fc_a = nn.Linear(self.orig_d_a, self.d_a)
        self.fc_b = nn.Linear(self.orig_d_b, self.d_b)

        self.trans_encoder = self.get_encoder_network()
        self.trans_decoder = self.get_decoder_network()

        print("Encoder Model size: {0}".format(
            count_parameters(self.trans_encoder)))
        print("Decoder Model size: {0}".format(
            count_parameters(self.trans_decoder)))

        # Projection layers
        self.proj_enc = ComplexLinear(self.d_a, self.embed_dim)
        self.proj_dec = ComplexLinear(self.orig_d_a, self.embed_dim)

        self.out_fc1 = nn.Linear(final_out, h_out)

        self.out_fc2 = nn.Linear(h_out, output_dim)

        self.out_fc3 = nn.Linear(output_dim, 1000)

        self.out_dropout = nn.Dropout(out_dropout)
コード例 #6
0
def convert(logdir_train3, logdir_convert):
    # WARNING! Do not load net1 or net2
    # Load model net3 only
    net3_model = Net3()
    checkpoint_path = '{}/checkpoint.tar'.format(logdir_train3)
    checkpoint = torch.load(checkpoint_path)
    if checkpoint:
        net3_model.load_state_dict(checkpoint['model_state_dict'])

    # Create valid loader
    conversion_source_path = os.path.join(hp.quick_convert.data_path, 'test')
    conversion_source_set = Net3DataDir(conversion_source_path)
    conversion_source_loader = DataLoader(
        conversion_source_set,
        batch_size=hp.quick_convert.batch_size,
        shuffle=False,
        drop_last=False)

    # Run model: Warning! only give the net3_model, not othermodels here.
    spectrogram_batch = quick_convert(net3_model, conversion_source_loader,
                                      logdir_convert)

    # logging
    net3_num_params = count_parameters(net3_model)
    logger.debug('Network 3 number of params: {}'.format(net3_num_params))
コード例 #7
0
def create_model(x, y, n_gpu, hparams):
    gen_logits = []
    gen_loss = []
    clf_loss = []
    tot_loss = []
    accuracy = []

    trainable_params = None
    for i in range(n_gpu):
        with tf.device("/gpu:%d" % i):
            results = model(hparams, x[i], y[i], reuse=(i != 0))

            gen_logits.append(results["gen_logits"])
            gen_loss.append(results["gen_loss"])
            clf_loss.append(results["clf_loss"])

            if hparams.clf:
                tot_loss.append(results["gen_loss"] + results["clf_loss"])
            else:
                tot_loss.append(results["gen_loss"])

            accuracy.append(results["accuracy"])

            if i == 0:
                trainable_params = tf.trainable_variables()
                print("trainable parameters:", count_parameters())

    return trainable_params, gen_logits, gen_loss, clf_loss, tot_loss, accuracy
コード例 #8
0
def main(args):
    logging.info('training on {} gpus'.format(torch.cuda.device_count()))
    logging.info('max tokens {} per gpu'.format(args.max_tokens))
    logging.info('max sentences {} per gpu'.format(args.max_sentences))

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.enabled = True
    cudnn.benchmark = True
    cudnn.deterministic = True

    tasks.set_ljspeech_hparams(args)
    logging.info("args = %s", args)

    saved_args, model_state_dict, epoch, global_step, optimizer_state_dict, best_valid_loss = utils.load(
        args.output_dir)
    if any([
            saved_args, model_state_dict, epoch, global_step,
            optimizer_state_dict
    ]):
        logging.info('Found exist checkpoint with epoch %d and updates %d',
                     epoch, global_step)

    if saved_args is not None:
        saved_args.__dict__.update(args.__dict__)
        args = saved_args
    task = tasks.LJSpeechTask(args)
    task.setup_task(model_state_dict, optimizer_state_dict)
    logging.info("param size = %d", utils.count_parameters(task.model))

    if args.max_epochs is not None:
        max_epochs = args.max_epochs
    else:
        max_epochs = float('inf')
    if args.max_updates is not None:
        max_updates = args.max_updates
    else:
        max_updates = float('inf')

    while epoch < max_epochs and global_step < max_updates:
        epoch += 1
        decoder_loss, stop_loss, loss, global_step = task.train(
            epoch=epoch, num_updates=global_step)
        logging.info(
            'train %d global step %d decoder loss %.6f stop loss %.6f total loss %.6f',
            epoch, global_step, decoder_loss, stop_loss, loss)
        decoder_loss, stop_loss, loss, fr, pcr, dfr = task.valid()
        logging.info(
            'valid %d global step %d decoder loss %.6f stop loss %.6f total loss %.6f fr %.6f pcr %.6f dfr %.6f',
            epoch, global_step, decoder_loss, stop_loss, loss, fr, pcr, dfr)
        is_best = False
        if loss < best_valid_loss:
            best_valid_loss = loss
            is_best = True
        if epoch % args.save_interval == 0:
            utils.save(args.output_dir, args, task.model, epoch, global_step,
                       task.optimizer, best_valid_loss, is_best)
コード例 #9
0
    def __init__(self, num_agents: int, num_trials: int, lr: float,
                 initial_agent: ESAgent, agent_class, env_name: str,
                 weights_std: float, seed: int, num_parallel: int, alpha: int,
                 num_gradients: int):
        self.num_parallel = num_parallel

        self.num_agents = num_agents
        self.num_trials = num_trials
        self.env_name = env_name

        self.agent = initial_agent
        self.seed = seed
        self.lr = lr

        self.centroid = deepcopy(initial_agent.policy)
        self.weights_std = weights_std
        self.num_parameters = count_parameters(self.centroid)
        self.num_gradients = num_gradients

        self.pertrubations_distr = distrib.Normal(
            torch.zeros(self.num_parameters), 1)

        self.grad_distr = distrib.Normal(torch.zeros(self.num_gradients), 1)

        self.alpha = alpha

        self.grads = Buffer(self.num_parameters, num_gradients)
コード例 #10
0
ファイル: calculate_logits.py プロジェクト: thu-spmi/CAT
def single_worker(device, num_jobs, args, idx_beg=0):

    if idx_beg > 0 and num_jobs == 1:
        local_writers = [open(f"{args.output_dir}/decode.{args.nj}.ark", 'wb')]
    else:
        local_writers = [open(f"{args.output_dir}/decode.{i+1}.ark", 'wb')
                         for i in range(num_jobs)]

    inferset = InferDataset(args.input_scp)
    inferset.dataset = inferset.dataset[idx_beg:]

    testloader = DataLoader(
        inferset, batch_size=1, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    with open(args.config, 'r') as fi:
        configures = json.load(fi)

    model = build_model(args, configures, train=False)

    model = model.to(device)
    model.load_state_dict(torch.load(
        args.resume, map_location=device))
    model.eval()

    print("> Model built.")
    print("  Model size:{:.2f}M".format(
        utils.count_parameters(model)/1e6))

    cal_logit(model, testloader, device, local_writers)
コード例 #11
0
ファイル: vae.py プロジェクト: dylanrandle/deepgen
def create_model():
    """ helper function to instantiate new model """
    ae = AutoEncoder()
    print('Loaded new AutoEncoder model')
    total_params = utils.count_parameters(ae)
    print(f'Model has {total_params} parameters')
    ae = ae.to(DEVICE)
    return ae
コード例 #12
0
    def __init__(self,
                 net_config=None,
                 opt_config=None,
                 metric="",
                 GPU=0,
                 seed=None,
                 **kwargs):
        # Set logger.
        self.logger = get_module_logger("QuantTransformer")
        self.logger.info("QuantTransformer PyTorch version...")

        # set hyper-parameters.
        self.net_config = net_config or DEFAULT_NET_CONFIG
        self.opt_config = opt_config or DEFAULT_OPT_CONFIG
        self.metric = metric
        self.device = torch.device("cuda:{:}".format(
            GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
        self.seed = seed

        self.logger.info("Transformer parameters setting:"
                         "\nnet_config : {:}"
                         "\nopt_config : {:}"
                         "\nmetric     : {:}"
                         "\ndevice     : {:}"
                         "\nseed       : {:}".format(
                             self.net_config,
                             self.opt_config,
                             self.metric,
                             self.device,
                             self.seed,
                         ))

        if self.seed is not None:
            random.seed(self.seed)
            np.random.seed(self.seed)
            torch.manual_seed(self.seed)
            if self.use_gpu:
                torch.cuda.manual_seed(self.seed)
                torch.cuda.manual_seed_all(self.seed)

        self.model = get_transformer(self.net_config)
        self.model.set_super_run_type(super_core.SuperRunMode.FullModel)
        self.logger.info("model: {:}".format(self.model))
        self.logger.info("model size: {:.3f} MB".format(
            count_parameters(self.model)))

        if self.opt_config["optimizer"] == "adam":
            self.train_optimizer = optim.Adam(self.model.parameters(),
                                              lr=self.opt_config["lr"])
        elif self.opt_config["optimizer"] == "adam":
            self.train_optimizer = optim.SGD(self.model.parameters(),
                                             lr=self.opt_config["lr"])
        else:
            raise NotImplementedError(
                "optimizer {:} is not supported!".format(optimizer))

        self.fitted = False
        self.model.to(self.device)
コード例 #13
0
def main():
    global best_valacc, best_epoch, best_state
    for epoch in range(1, args.epochs + 1):
        if args.unfreeze == epoch:
            model.freeze(False)
            if args.changeopt:
                if args.adamopt:
                    model.optimizer = torch.optim.Adam(
                        model.parameters(),
                        lr=0.005,
                        weight_decay=model.args.weight_decay)
                else:
                    model.optimizer = torch.optim.SGD(
                        model.parameters(),
                        lr=0.0005,
                        weight_decay=model.args.weight_decay)
        elif args.freeze == epoch:
            model.freeze(True)
        if args.dropout and args.unfreeze < epoch:
            if args.dropout_all:
                model.targeted_dropout_all()
            else:
                model.targeted_dropout()
        if args.make_hard == epoch:
            model.make_hard()
        print('Epoch {} {} {} {}'.format(epoch, count_parameters(model),
                                         count_parameters(model, 1e-6),
                                         count_parameters(model, 0)))
        model.train_(train_loader,
                     metric_saved,
                     args.n_classes,
                     save_target_on_leaves_=False,
                     hard=False,
                     mode='train')
        valacc = model.train_(valloader,
                              metric_saved,
                              args.n_classes,
                              hard=True,
                              mode='val')
        if valacc > best_valacc:
            best_state = model.state_dict()
            best_valacc = valacc
            best_epoch = epoch
            print('another best valacc:{:.2f}'.format(valacc))
            save_metric()
コード例 #14
0
def test(ENV):

    print('Loading environmnet...\n')
    env = UnityEnvironment(file_name=ENV)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
    states = env_info.vector_observations  # get the current state (for each agent)

    print('Loading agent...\n')
    num_agents = len(env_info.agents)
    state_size, action_size = brain.vector_observation_space_size, brain.vector_action_space_size
    agent = Agent(num_agents=num_agents,
                  state_size=state_size,
                  action_size=action_size)
    print('Capacity of the Actor (# of parameters): ',
          count_parameters(agent.actor_local))
    print('Capacity of the Critic (# of parameters): ',
          count_parameters(agent.critic_local))

    scores = np.zeros(num_agents)  # initialize the score
    dones = False

    # Tranfer Learning
    print('Tranfer Learning into Agent...\n')
    agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
    agent.actor_local.eval()

    agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))
    agent.critic_local.eval()

    # Play
    print('Playing...\n')
    while not np.any(dones):
        actions = agent.act(states)  # select an action (for each agent)
        actions = np.clip(actions, -1, 1)  # all actions between -1 and 1
        env_info = env.step(actions)[
            brain_name]  # send all actions to tne environment
        next_states = env_info.vector_observations  # get next state (for each agent)
        rewards = env_info.rewards  # get reward (for each agent)
        dones = env_info.local_done  # see if episode finished
        scores += rewards  # update the score (for each agent)
        states = next_states  # roll over states to next time step

    print("Score: {}".format(scores))
コード例 #15
0
def train_fn(loss):
    trained_vars = tf.trainable_variables()
    count_parameters(trained_vars)

    # Gradient clipping
    gradients = tf.gradients(loss, trained_vars)

    clipped_grads, global_norm = tf.clip_by_global_norm(
        gradients, FLAGS.max_grad_norm)
    tf.summary.scalar('global_grad_norm', global_norm)

    # Define optimizer
    global_step = tf.train.get_or_create_global_step()
    optimizer = tf.train.RMSPropOptimizer(FLAGS.learning_rate)
    train_op = optimizer.apply_gradients(zip(clipped_grads, trained_vars),
                                         name='train_op',
                                         global_step=global_step)
    return train_op, global_step
コード例 #16
0
def test(args, recon_model):
    """
    Performs evaluation of a pre-trained policy model.

    :param args: Argument object containing evaluation parameters.
    :param recon_model: reconstruction model.
    """
    model, policy_args = load_policy_model(
        pathlib.Path(args.policy_model_checkpoint))

    # Overwrite number of trajectories to test on
    policy_args.num_test_trajectories = args.num_test_trajectories
    if args.data_path is not None:  # Overwrite data path if provided
        policy_args.data_path = args.data_path

    # Logging of policy model
    logging.info(args)
    logging.info(recon_model)
    logging.info(model)
    if args.wandb:
        wandb.config.update(args)
        wandb.watch(model, log='all')
    # Initialise summary writer
    writer = SummaryWriter(log_dir=policy_args.run_dir / 'summary')

    # Parameter counting
    logging.info(
        'Reconstruction model parameters: total {}, of which {} trainable and {} untrainable'
        .format(count_parameters(recon_model),
                count_trainable_parameters(recon_model),
                count_untrainable_parameters(recon_model)))
    logging.info(
        'Policy model parameters: total {}, of which {} trainable and {} untrainable'
        .format(count_parameters(model), count_trainable_parameters(model),
                count_untrainable_parameters(model)))

    # Create data loader
    test_loader = create_data_loader(policy_args, 'test', shuffle=False)
    test_data_range_dict = create_data_range_dict(policy_args, test_loader)

    do_and_log_evaluation(policy_args, -1, recon_model, model, test_loader,
                          writer, 'Test', test_data_range_dict)

    writer.close()
コード例 #17
0
def test_lcg(a,
             c,
             m,
             n,
             bit_index=0,
             batch_size=128,
             num_batches=128,
             num_epochs=2):
    print("Initializing a model for bit {}...".format(bit_index))
    model = Model(n, 2)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    data_source = LcgBitDataset(bit_index,
                                a=a,
                                c=c,
                                m=m,
                                n=n,
                                batch_size=batch_size,
                                num_batches=num_batches)
    data_loader = data.DataLoader(
        data_source, **{
            'batch_size': batch_size,
            'num_workers': 50
        })

    max_epochs = trange(num_epochs)
    for epoch in max_epochs:
        max_epochs.set_description(
            "Epoch {} - Generating data...".format(epoch + 1))
        iteration_count = 0
        for x, y in data_loader:
            optimizer.zero_grad()

            output = model(x)
            loss = criterion(output, y)
            loss.backward()

            optimizer.step()

            iteration_count += 1
            max_epochs.set_description("Epoch {} - Loss {:.3f}; {:.0%}".format(
                epoch + 1, loss.item(),
                iteration_count / data_source.num_batches))

    print("Benchmarking model against random guesser...")
    nn_guess_accuracy, random_guess_accuracy = utils.model_vs_random(
        model, data_loader)

    print("\n=========== Results ==============")
    print("     Net was correct: {:.2%}".format(nn_guess_accuracy))
    print("  Random was correct: {:.2%}".format(random_guess_accuracy))
    print("==================================")

    return nn_guess_accuracy, random_guess_accuracy, utils.count_parameters(
        model)
コード例 #18
0
 def __init__(self):
     super(Net, self).__init__()
     encoder = ConvNet()
     hooks, inp_szs, enc_szs = get_hooks(encoder.downsample)
     idxs = list(enc_szs.keys())
     x_sz = enc_szs[len(enc_szs) - 1]
     head = FeedForward(x_sz)
     layers = [encoder, head]
     [print(count_parameters(x)) for x in layers]
     self.layers = nn.Sequential(*layers)
コード例 #19
0
ファイル: train.py プロジェクト: jiushishuai88/DLreaserch
def main():
    global args, config, last_epoch, best_prec, writer
    writer = SummaryWriter(log_dir=args.work_path + '/event')

    # 加载配置文件
    with open(args.work_path + '/config.yaml') as f:
        config = yaml.load(f)
    config = easydict.EasyDict(config)
    logger.info((config))
    # 获取模型
    net = get_model(config)
    logger.info(net)
    logger.info("=====total parameters:" + str(utils.count_parameters(net)))
    device = 'cuda' if config.use_gpu else 'cpu'
    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True
    net.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(),
                                config.lr_scheduler.base_lr,
                                momentum=config.optimize.momentum,
                                weight_decay=config.optimize.weight_decay,
                                nesterov=config.optimize.nesterov)
    last_epoch = -1
    best_prec = 0

    # 加载训练过的模型继续训练
    if args.work_path:
        ckpt_file_name = args.work_path + '/' + config.ckpt_name + '.pth.tar'
        if args.resume:
            best_prec, last_epoch = utils.load_checkpoint(ckpt_file_name,
                                                          net,
                                                          optimizer=optimizer)

    # 设置数据的格式转换
    transform_train = transforms.Compose(utils.data_augmentation(config))
    transform_test = transforms.Compose(
        utils.data_augmentation(config, is_train=False))
    train_loader, test_loader = utils.get_data_loader(transform_train,
                                                      transform_test, config)
    logger.info("==============trian-test-file-pathL{}".format(config.dataset))
    logger.info("            =======  Training  =======\n")
    for epoch in range(last_epoch + 1, config.epochs):
        lr = utils.adjust_learning_rate(optimizer, epoch, config)
        writer.add_scalar('learning_rate', lr, epoch)
        train(train_loader, net, criterion, optimizer, epoch, device)
        if epoch == 0 or (
                epoch +
                1) % config.eval_freq == 0 or epoch == config.epochs - 1:
            test(test_loader, net, criterion, optimizer, epoch, device)
    writer.close()
    logger.info(
        "======== Training Finished.   best_test_acc: {:.3f}% ========".format(
            best_prec))
コード例 #20
0
ファイル: model_iq.py プロジェクト: strategist922/dl_signal
    def __init__(self,
                 time_step,
                 input_dims,
                 hidden_size,
                 embed_dim,
                 output_dim,
                 num_heads,
                 attn_dropout,
                 relu_dropout,
                 res_dropout,
                 out_dropout,
                 layers,
                 attn_mask=False):
        """
        Construct a basic Transfomer model.
        
        :param input_dims: The input dimensions of the various modalities.
        :param hidden_size: The hidden dimensions of the fc layer.
        :param embed_dim: The dimensions of the embedding layer.
        :param output_dim: The dimensions of the output (128 in MuiscNet).
        :param num_heads: The number of heads to use in the multi-headed attention. 
        :param attn_dropout: The dropout following self-attention sm((QK)^T/d)V.
        :param relu_droput: The dropout for ReLU in residual block.
        :param res_dropout: The dropout of each residual block.
        :param out_dropout: The dropout of output layer.
        :param layers: The number of transformer blocks.
        :param attn_mask: A boolean indicating whether to use attention mask (for transformer decoder).
        """
        super(TransformerModel, self).__init__()
        [self.orig_d_a, self.orig_d_b] = input_dims
        assert self.orig_d_a == self.orig_d_b
        self.d_a, self.d_b = 512, 512
        final_out = embed_dim * 2
        h_out = hidden_size
        self.num_heads = num_heads
        self.layers = layers
        self.attn_dropout = attn_dropout
        self.relu_dropout = relu_dropout
        self.res_dropout = res_dropout
        self.attn_mask = attn_mask
        self.embed_dim = embed_dim

        # Transformer networks
        self.trans = self.get_network()
        print("Encoder Model size: {0}".format(count_parameters(self.trans)))
        self.fc_a = nn.Linear(self.orig_d_a, self.d_a)
        self.fc_b = nn.Linear(self.orig_d_b, self.d_b)
        # Projection layers
        self.proj = ComplexLinear(self.d_a, self.embed_dim)

        self.out_fc1 = nn.Linear(final_out, h_out)

        self.out_fc2 = nn.Linear(h_out, output_dim)

        self.out_dropout = nn.Dropout(out_dropout)
コード例 #21
0
ファイル: pruner.py プロジェクト: puhsu/pruning
 def __init__(self, model, batch_per_epoch, config):
     """
     Initialize model pruner.
     """
     self.weights_count = utils.count_parameters(model)
     self.itr = 1
     self.pruners = []
     for name, weight in model.named_parameters():
         if 'bias' not in name:
             self.pruners.append(
                 WeightPruner(name, weight, batch_per_epoch, config[name]))
コード例 #22
0
 def __init__(self):
     super(Net2, self).__init__()
     c = 10
     downsample = ConvResBlock(1, c)
     x = torch.randn(1, 1, 28, 28)
     x.requires_grad_(False)
     x_sz = downsample(x).shape
     head = FeedForward(x_sz)
     layers = [downsample, head]
     [print(count_parameters(x)) for x in layers]
     self.layers = nn.Sequential(*layers)
コード例 #23
0
def trainGANs(n=100, datadir='data/gan/'):
    for i in range(n):
       try:
          os.mkdir(datadir+str(i))
       except FileExistsError:
          pass
       loader = data.MNIST(batch=128)
       model = SimpleGAN(28*28, zdim=64, hd=64, hg=64, lr=2e-4).cuda()
       print('Network: ' + str(i) + ', Params: ' + str(utils.count_parameters(model)))
       #model = DCGAN(zdim=16, h=4, lr=2e-4).cuda()
       trainer = MNISTTrainer(model, loader, datadir+str(i)+'/')
       trainer.train(epochs=100)
def training_curves(models, y_data, settings, histories, smoothing=1):
    """function for building training curves"""
    epochs = np.arange(settings['epochs'])
    markers = ['.', '^']  # ok for two outputs only
    if len(models) == 1:
        f = plt.figure(figsize=(4 * len(models), 4 * len(models)))
    else:
        f = plt.figure(figsize=(10 * len(models) / 3, 10 * len(models)))
    for i, mod in enumerate(models):

        model_name = utils.get_model_name(mod, settings['dataset'])
        plt.subplot(1, len(models), i + 1)
        for j in range(len(y_data)):
            plt.plot(np.convolve(np.log(
                histories[model_name].history[y_data[j] + '_mse']),
                                 np.ones(smoothing) / smoothing,
                                 mode='valid'),
                     'k--',
                     alpha=0.5)
            plt.plot(np.convolve(np.log(
                histories[model_name].history['val_' + y_data[j] + '_mse']),
                                 np.ones(smoothing) / smoothing,
                                 mode='valid'),
                     'r--',
                     alpha=0.5)

            plt.plot(epochs[::smoothing],
                     np.convolve(np.log(
                         histories[model_name].history[y_data[j] + '_mse']),
                                 np.ones(smoothing) / smoothing,
                                 mode='valid')[::smoothing],
                     'k' + markers[j],
                     label=y_data[j],
                     alpha=0.5)
            plt.plot(epochs[::smoothing],
                     np.convolve(np.log(
                         histories[model_name].history['val_' + y_data[j] +
                                                       '_mse']),
                                 np.ones(smoothing) / smoothing,
                                 mode='valid')[::smoothing],
                     'r' + markers[j],
                     alpha=0.5)

        if i == 0:
            plt.legend(frameon=False)
        plt.xlabel('epochs')
        plt.ylabel('log MSE')
        plt.title(model_name +
                  ' ({} params)'.format(utils.count_parameters(mod)))
        plt.gca().set_aspect(1. / plt.gca().get_data_ratio())

    plt.tight_layout()
    return f
コード例 #25
0
    def __init__(self, epochs=NUM_EPOCHS):
        self.model = SingleNetwork()
        self.alpha = list(pd.read_pickle('class_weights.pkl').values())
        self.criterion = FocalLoss(gamma=2, alpha=self.alpha)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-1, weight_decay=1e-2)
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=10, eta_min=5e-5)
        self.epochs = epochs

        self.train_loader = torch.load('train_loader.pt')
        self.cv_loader = torch.load('cv_loader.pt')

        self.best_loss = 1e3
        print(count_parameters(self.model))
コード例 #26
0
 def __init__(self):
     super(ODENet, self).__init__()
     c = 64
     downsample = ConvResBlock(1, c)
     x = torch.randn(1, 1, 28, 28)
     x.requires_grad_(False)
     x_sz = downsample(x).shape
     self.feature_layers = ODEBlock(ODEfunc(x_sz[1]))
     head = FeedForward(x_sz)
     layers = [downsample, self.feature_layers, head]
     [print(count_parameters(x)) for x in layers]
     self.layers = nn.Sequential(*layers)
     print(self.layers)
コード例 #27
0
def main():
    set_random_seed(C.seed)

    summary_writer = SummaryWriter(C.log_dpath)

    train_iter, val_iter, test_iter, vocab = build_loaders(C)

    model = build_model(C, vocab)
    print("#params: ", count_parameters(model))
    model = model.cuda()

    optimizer = torch.optim.Adamax(model.parameters(),
                                   lr=C.lr,
                                   weight_decay=1e-5)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                              C.epochs,
                                                              eta_min=0,
                                                              last_epoch=-1)

    best_val_scores = {'CIDEr': -1.}
    for e in range(1, C.epochs + 1):
        print()
        ckpt_fpath = C.ckpt_fpath_tpl.format(e)
        """ Train """
        teacher_forcing_ratio = get_teacher_forcing_ratio(
            C.decoder.max_teacher_forcing_ratio,
            C.decoder.min_teacher_forcing_ratio, e, C.epochs)
        train_loss = train(e, model, optimizer, train_iter, vocab,
                           teacher_forcing_ratio, C.CA_lambda, C.gradient_clip)
        log_train(C, summary_writer, e, train_loss, get_lr(optimizer),
                  teacher_forcing_ratio)
        lr_scheduler.step()
        """ Validation """
        val_loss = evaluate(model, val_iter, vocab, C.CA_lambda)
        val_scores, _, _, _ = score(model, val_iter, vocab)
        log_val(C, summary_writer, e, val_loss, val_scores)

        if val_scores['CIDEr'] > best_val_scores['CIDEr']:
            best_val_scores = val_scores
            best_epoch = e
            best_model = model

        print("Saving checkpoint at epoch={} to {}".format(e, ckpt_fpath))
        save_checkpoint(ckpt_fpath, e, model, optimizer)
    """ Test """
    test_scores, _, _, _ = score(best_model, test_iter, vocab)
    for metric in C.metrics:
        summary_writer.add_scalar("BEST SCORE/{}".format(metric),
                                  test_scores[metric], best_epoch)
    best_ckpt_fpath = C.ckpt_fpath_tpl.format("best")
    save_checkpoint(best_ckpt_fpath, best_epoch, best_model, optimizer)
コード例 #28
0
    def build_model(self):
        self.G = LineartoMel_real(F=self.F,
                                  melF_to_linearFs=self.melF_to_linearFs,
                                  nCH=self.config.nCH,
                                  w=self.config.convW,
                                  H=self.config.nMap_per_F,
                                  L=self.config.L_CNN,
                                  non_linear=self.config.non_linear,
                                  BN=self.config.complex_BN)  # 현재 사용중인 모델
        G_name = 'LineartoMel_real'

        print('initialized enhancement model as ' + G_name)
        nParam = count_parameters(self.G)
        print('# trainable parameters = ' + str(nParam))
コード例 #29
0
ファイル: vae.py プロジェクト: dylanrandle/deepgen
def load_model(model_path):
    """ helper function to load model if given path exists
        otherwise creates new object and returns it """
    if not os.path.exists(model_path):
        raise RuntimeError(f'Could not find provided model_path: {model_path}')

    ae = AutoEncoder()
    ae.load_state_dict(torch.load(model_path, map_location=DEVICE))
    print(f'Loaded existing model from {model_path}')

    total_params = utils.count_parameters(ae)
    print(f'Model has {total_params} parameters')
    ae = ae.to(DEVICE)
    return ae
コード例 #30
0
def train_transformer():
    if args.data == 'iq':
        input_size = int(3200 / (args.src_time_step + args.trg_time_step))
    else:
        input_size = 4096
    input_dim = int(input_size / 2)

    model = TransformerGenerationModel(
        ntokens=10000,  # TODO: wait for Paul's data
        # time_step=args.time_step,
        input_dims=[input_dim, input_dim],
        # proj_dims=args.modal_lengths,
        hidden_size=args.hidden_size,
        # output_dim=args.output_dim,
        num_heads=args.num_heads,
        attn_dropout=args.attn_dropout,
        relu_dropout=args.relu_dropout,
        res_dropout=args.res_dropout,
        layers=args.nlevels,
        horizons=args.nhorizons,
        attn_mask=args.attn_mask,
        crossmodal=args.crossmodal)
    if use_cuda:
        model = model.cuda()

    print("Model size: {0}".format(count_parameters(model)))

    optimizer = getattr(optim, args.optim)(model.parameters(),
                                           lr=args.lr,
                                           weight_decay=1e-7)
    # criterion = nn.CrossEntropyLoss()
    criterion = nn.MSELoss()

    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  patience=2,
                                  factor=0.5,
                                  verbose=True)

    settings = {
        'model': model,
        'optimizer': optimizer,
        'criterion': criterion,
        'scheduler': scheduler,
        'input_size': input_size,
        'src_time_step': args.src_time_step,
        'trg_time_step': args.trg_time_step
    }
    return train_model(settings)