Beispiel #1
0
 def __init__(self, rnn_dims, fc_dims, bits, pad, upsample_factors,
              feat_dims, compute_dims, res_out_dims, res_blocks):
     super().__init__()
     if hp.input_type == 'raw':
         self.n_classes = 2
     elif hp.input_type == 'mixture':
         # mixture requires multiple of 3, default at 10 component mixture, i.e 3 x 10 = 30
         self.n_classes = 30
     elif hp.input_type == 'mulaw':
         self.n_classes = hp.mulaw_quantize_channels
     elif hp.input_type == 'bits':
         self.n_classes = 2**bits
     else:
         raise ValueError("input_type: {hp.input_type} not supported")
     self.rnn_dims = rnn_dims
     self.aux_dims = res_out_dims // 4
     self.upsample = UpsampleNetwork(feat_dims, upsample_factors, compute_dims, 
                                     res_blocks, res_out_dims, pad)
     self.I = nn.Linear(feat_dims + self.aux_dims + 1, rnn_dims)
     self.rnn1 = nn.GRU(rnn_dims, rnn_dims, batch_first=True)
     self.rnn2 = nn.GRU(rnn_dims + self.aux_dims, rnn_dims, batch_first=True)
     self.fc1 = nn.Linear(rnn_dims + self.aux_dims, fc_dims)
     self.fc2 = nn.Linear(fc_dims + self.aux_dims, fc_dims)
     self.fc3 = nn.Linear(fc_dims, self.n_classes)
     num_params(self)
Beispiel #2
0
 def __init__(self, input_dims, output_dims, model_type):
     super().__init__()
     if model_type=='convnet':
         self.cnn = ConvNet(input_dims, output_dims)
     elif model_type=='resnet18':
         self.cnn = ResNet18(input_dims, output_dims)
     elif model_type=='resnet34':
         self.cnn = ResNet34(input_dims, output_dims)
     self.sigmoid = nn.Sigmoid()
     num_params(self)
Beispiel #3
0
 def __init__(self, input_dims, output_dims, model_type):
     super().__init__()
     if model_type=='convnet':
         self.cnn = ConvNet(input_dims, output_dims)
     elif model_type=='mobilenet':
         self.cnn = MobileNet(input_dims, output_dims)
     elif model_type=='mobilenetv2':
         self.cnn = MobileNetv2(input_dims, output_dims)
     self.sigmoid = nn.Sigmoid()
     num_params(self)
Beispiel #4
0
    def _summarize_best_genome(self, genomes_all):

        logger.info('*' * 50)
        logger.info(
            'n_models: {}, best_fitness: {:.3f}, controller_step: {}'.format(
                self.n_models, self.best_genome.fitness, self.controller_step))
        logger.info('best_genome:\n{}'.format(self.best_genome.model_string))

        if self.tb is not None:

            # best genome info
            self.tb.scalar_summary(f'best_genome/fitness',
                                   self.best_genome.fitness,
                                   self.controller_step)

            self.tb.text_summary(f'best_genome/model_string',
                                 self.best_genome.model_string,
                                 self.controller_step)

            # save best genome image
            fname = (f'{self.epoch:03d}-{self.controller_step:06d}-'
                     f'{self.best_genome.fitness:6.4f}-best_genome.png')
            path = os.path.join(args.model_dir, 'networks', fname)

            graph_deep_net(self.amb,
                           dp=self.profile,
                           show_disabled=False,
                           prune=True,
                           genome=self.best_genome,
                           fname=None,
                           save_file=path.replace('.png', ''))

            self.tb.image_summary('best_genome/sample', path,
                                  self.controller_step)

            # can't do this in AMB b/c you don't have the model
            self.tb.scalar_summary(f'best_genome/num_params',
                                   utils.num_params(self.best_model),
                                   self.controller_step)

            # generation summary
            fitnesses = [x[0].fitness for x in genomes_all]
            bp_iters = [x[-1] for x in genomes_all]

            self.tb.histogram_summary('generation/fitnesses', fitnesses,
                                      self.controller_step)
            self.tb.histogram_summary('generation/bp_iters', bp_iters,
                                      self.controller_step)

            self.tb.scalar_summary(f'gneration/mean_bp_iters',
                                   np.mean(bp_iters), self.controller_step)

            # plot best genomes
            """
Beispiel #5
0
 def param_trace(name, module, depth, max_depth=999, threshold=0):
     if depth > max_depth:
         return
     prefix = "  " * depth
     n_params = utils.num_params(module)
     if n_params > threshold:
         print("{:60s}\t{:10.2f}M".format(prefix + name,
                                          n_params / K / K))
     for n, m in module.named_children():
         if depth == 0:
             child_name = n
         else:
             child_name = "{}.{}".format(name, n)
         param_trace(child_name, m, depth + 1, max_depth, threshold)
Beispiel #6
0
            bias_initializer=tf.constant_initializer(np.copy(ib.pretrained_PZ[9])),
            inputs=x,
            units=num_classes,
            activation=None,
            # kernel_initializer=None,
            # bias_initializer=tf.zeros_initializer(),
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_output),
        )
        print('Pretrained PZ_output loaded!')
    else:
        raise Exception("this does not get executed!")

    y = tf.nn.softmax(dense_out)
    print('denseOut\t', y.get_shape())

print('Model consists of ', utils.num_params(), 'trainable parameters.')
# %%
#########################################
###   SETTING VARIABLES TRAINABILITY  ###
#########################################

### STORING TRAINABLE VARIABLES
all_vars = tf.trainable_variables()
start_idx, end_idx = ib.what_is_trainable(all_vars)
to_train = all_vars[start_idx:end_idx]

print("and we will train: ")
for j in range(len(to_train)):
    print("## ", to_train[j])

####################################################
Beispiel #7
0
    num_classes = 2
    layers = [
        (10, None),
    ]
    net = HyperNet(
        num_channels,
        num_classes,
        layers,
        h=1e-1,
        verbose=False,
        clear_grad=True,
        classifier_type='conv3',
    ).to(device)
    print('\n### Model Statistics')
    print('Model Size: %8.1f mb' % utils.model_size(net))
    print('Number of Parameters: %9d' % utils.num_params(net))
    print(' ')

    nex = 4
    images = torch.randn((4, num_channels, 16, 16, 16)).to(device)
    fwd_start = timer()
    Y_N, Y_Nm1 = net(images)
    fwd_time = timer() - fwd_start

    dYN = torch.randn_like(Y_N)
    get_optim = lambda net: torch.optim.Adam(net.parameters(), lr=1e-2)

    bwd_start = timer()
    Y0, Y1 = net.backward(Y_N, Y_Nm1, dYN, get_optim, False)
    bwd_time = timer() - bwd_start
Beispiel #8
0
def main(summary):
    train_dataset, val_dataset = EEGDataset.from_config(
        validation_ratio=hp.validation_ratio,
        validation_seed=hp.validation_seed,
        dir_path='./data/prepared_eegs_mat_th5',
        data_sampling_freq=220,
        start_sampling_freq=1,
        end_sampling_freq=60,
        start_seq_len=32,
        num_channels=17,
        return_long=False)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=hp.batch_size,
                                  num_workers=0,
                                  drop_last=True)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=hp.batch_size,
                                num_workers=0,
                                drop_last=False,
                                pin_memory=True)
    network = cudize(
        Network(train_dataset.num_channels,
                bidirectional=False,
                contextualizer_num_layers=hp.contextualizer_num_layers,
                contextualizer_dropout=hp.contextualizer_dropout,
                use_transformer=hp.use_transformer,
                prediction_k=hp.prediction_k *
                (hp.prediction_loss_weight != 0.0),
                have_global=(hp.global_loss_weight != 0.0),
                have_local=(hp.local_loss_weight != 0.0),
                residual_encoder=hp.residual_encoder,
                sinc_encoder=hp.sinc_encoder))
    num_parameters = num_params(network)
    print('num_parameters', num_parameters)
    if hp.use_bert_adam:
        network_optimizer = BertAdam(network.parameters(),
                                     lr=hp.lr,
                                     weight_decay=hp.weight_decay,
                                     warmup=0.2,
                                     t_total=hp.epochs * len(train_dataloader),
                                     schedule='warmup_linear')
    else:
        network_optimizer = Adam(network.parameters(),
                                 lr=hp.lr,
                                 weight_decay=hp.weight_decay)
    if hp.use_scheduler:
        scheduler = ReduceLROnPlateau(network_optimizer,
                                      patience=3,
                                      verbose=True)
    best_val_loss = float('inf')
    for epoch in trange(hp.epochs):
        for training, data_loader in zip((False, True),
                                         (val_dataloader, train_dataloader)):
            if training:
                if epoch == hp.epochs - 1:
                    break
                network.train()
            else:
                network.eval()
            total_network_loss = 0.0
            total_prediction_loss = 0.0
            total_global_loss = 0.0
            total_local_loss = 0.0
            total_global_accuracy = 0.0
            total_local_accuracy = 0.0
            total_k_pred_acc = {}
            total_pred_acc = 0.0
            total_count = 0
            with torch.set_grad_enabled(training):
                for batch in data_loader:
                    x = cudize(batch['x'])
                    network_return = network.forward(x)
                    network_loss = hp.prediction_loss_weight * network_return.losses.prediction_
                    network_loss = network_loss + hp.global_loss_weight * network_return.losses.global_
                    network_loss = network_loss + hp.local_loss_weight * network_return.losses.local_

                    bs = x.size(0)
                    total_count += bs
                    total_network_loss += network_loss.item() * bs
                    total_prediction_loss += network_return.losses.prediction_.item(
                    ) * bs
                    total_global_loss += network_return.losses.global_.item(
                    ) * bs
                    total_local_loss += network_return.losses.local_.item(
                    ) * bs

                    total_global_accuracy += network_return.accuracies.global_ * bs
                    total_local_accuracy += network_return.accuracies.local_ * bs
                    dict_add(total_k_pred_acc,
                             network_return.accuracies.prediction_, bs)
                    len_pred = len(network_return.accuracies.prediction_)
                    if len_pred > 0:
                        total_pred_acc += sum(
                            network_return.accuracies.prediction_.values(
                            )) / len_pred * bs

                    if training:
                        network_optimizer.zero_grad()
                        network_loss.backward()
                        network_optimizer.step()

            metrics = dict(net_loss=total_network_loss)
            if network.prediction_loss_network.k > 0 and hp.prediction_loss_weight != 0:
                metrics.update(
                    dict(avg_prediction_acc=total_pred_acc,
                         prediction_loss=total_prediction_loss,
                         k_prediction_acc=total_k_pred_acc))
            if hp.global_loss_weight != 0:
                metrics.update(
                    dict(global_loss=total_global_loss,
                         global_acc=total_global_accuracy))
            if hp.local_loss_weight != 0:
                metrics.update(
                    dict(local_loss=total_local_loss,
                         local_acc=total_local_accuracy))
            divide_dict(metrics, total_count)

            if not training and hp.use_scheduler:
                scheduler.step(metrics['net_loss'])
            if summary:
                print('train' if training else 'validation', epoch,
                      metrics['net_loss'])
            else:
                print('train' if training else 'validation', epoch)
                print(json.dumps(metrics, indent=4))
            if not training and (metrics['net_loss'] < best_val_loss):
                best_val_loss = metrics['net_loss']
                print('update best to', best_val_loss)
                torch.save(network.state_dict(), 'best_network.pth')
Beispiel #9
0
    L = len(valid_data)
    eval_batch = [valid_data[i] for i in range(L // 4 - 1, L, L // 4)]

    ### build model
    logger.info("### Build model ###")
    seq2seq = get_model(model_type, in_dim, out_dim, max_len, cfg['model'])
    seq2seq.cuda()

    ### init params
    # NOTE no bias init ...
    for p in seq2seq.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    K = 1024
    n_params = utils.num_params(seq2seq) / K / K
    logger.nofmt(seq2seq)
    logger.info("# of params = {:.1f} M".format(n_params))

    # parameter size tracing
    if args.param_tracing:
        # recursive tracing
        def param_trace(name, module, depth, max_depth=999, threshold=0):
            if depth > max_depth:
                return
            prefix = "  " * depth
            n_params = utils.num_params(module)
            if n_params > threshold:
                print("{:60s}\t{:10.2f}M".format(prefix + name,
                                                 n_params / K / K))
            for n, m in module.named_children():
Beispiel #10
0
        (n, 'down'),
        (n, 'down'),
        (n, 'up'),
        (n, 'up'),
        (n, 'up'),
        (n, 'up'),
        (n, None),
    ]
    net = HyperNet(channels_in,
                   nClasses,
                   layers,
                   h=1e-2,
                   classifier_type='conv',
                   verbose=False)
    print('Model Size: %6.2f' % model_size(net))
    print('Number of Parameters: %d' % num_params(net))
    print('Number of Layers: %d' % (n * len(layers)))
    net = net.to(device)

    # Show scale captured

    class_weights = torch.tensor(train_dataset.CLASS_WEIGHTS)
    misfit = nn.CrossEntropyLoss(class_weights).to(device)

    best_val_acc = 0
    for epoch in range(num_epochs):

        if epoch % 100 == 0 and not epoch == 0:
            lr = lr / 10
        get_optim = lambda net: torch.optim.Adam(net.parameters(), lr=lr)
Beispiel #11
0
    def fit(self, X, y, use_tensorboard=True):

        n_inputs = X.shape[1]
        n_outputs = self.profile.num_target_classes()
        if n_outputs == 0:
            n_outputs = 1

        self.neatcfg = utils.Config(n_inputs)
        self.amb = utils.AMB(self.neatcfg)  # for plotting deep networks
        self.set_model_traits()
        self.build_space()

        if use_tensorboard:
            self.tb = TensorBoardCallback(args.model_dir, self.amb)
        else:
            self.tb = None

        # self.traits.batch_size = 256

        idx_train, idx_val = self.traits.get_cv_split(X.values, y.values)
        idx_train, idx_val = to.LongTensor(idx_train), to.LongTensor(idx_val)

        logger.debug('train size: {}, val size: {}'.format(
            len(idx_train), len(idx_val)))

        X = X.values
        y = y.values
        X = to.Tensor(X.astype('float32'))
        if self.is_class:
            y = y.dot(np.arange(y.shape[1])).astype(int)
            y = to.LongTensor(y)
        else:
            y = to.Tensor(y.astype('float32'))

        X = ag.Variable(X)
        y = ag.Variable(y)

        self.x = X[idx_train]
        self.y = y[idx_train]
        self.x_val = X[idx_val]
        self.y_val = y[idx_val]

        self.controller = Controller(n_inputs, n_outputs, self.neatcfg,
                                     self.vocab, self.activations)
        self.controller.cuda()

        if args.controller_path != '':
            logger.info('Loading controller weights from: {}'.format(
                args.controller_path))
            self.controller.load_state_dict(to.load(args.controller_path))

        # print controller and save string rep to file
        print(self.controller)
        num_params = utils.num_params(self.controller)
        num_params = 'num_params: {:,}'.format(num_params)
        print(num_params)
        path = os.path.join(args.model_dir, "controller.txt")
        with open(path, 'w') as fp:
            fp.write(str(self.controller) + '\n' + num_params)

        # TODO: try higher
        # self.controller_lr = 3.5e-4  # the default
        self.controller_lr = 1e-3
        self.controller_optim = to.optim.Adam(self.controller.parameters(),
                                              lr=self.controller_lr)

        if args.nprocs > 1:
            self.pool = mp.Pool(args.nprocs)

        self.start_time = time.time()
        self.epoch = 1

        stop = self.train_controller()
        logger.info('model_dir: %s', args.model_dir)