def __init__(self, rnn_dims, fc_dims, bits, pad, upsample_factors, feat_dims, compute_dims, res_out_dims, res_blocks): super().__init__() if hp.input_type == 'raw': self.n_classes = 2 elif hp.input_type == 'mixture': # mixture requires multiple of 3, default at 10 component mixture, i.e 3 x 10 = 30 self.n_classes = 30 elif hp.input_type == 'mulaw': self.n_classes = hp.mulaw_quantize_channels elif hp.input_type == 'bits': self.n_classes = 2**bits else: raise ValueError("input_type: {hp.input_type} not supported") self.rnn_dims = rnn_dims self.aux_dims = res_out_dims // 4 self.upsample = UpsampleNetwork(feat_dims, upsample_factors, compute_dims, res_blocks, res_out_dims, pad) self.I = nn.Linear(feat_dims + self.aux_dims + 1, rnn_dims) self.rnn1 = nn.GRU(rnn_dims, rnn_dims, batch_first=True) self.rnn2 = nn.GRU(rnn_dims + self.aux_dims, rnn_dims, batch_first=True) self.fc1 = nn.Linear(rnn_dims + self.aux_dims, fc_dims) self.fc2 = nn.Linear(fc_dims + self.aux_dims, fc_dims) self.fc3 = nn.Linear(fc_dims, self.n_classes) num_params(self)
def __init__(self, input_dims, output_dims, model_type): super().__init__() if model_type=='convnet': self.cnn = ConvNet(input_dims, output_dims) elif model_type=='resnet18': self.cnn = ResNet18(input_dims, output_dims) elif model_type=='resnet34': self.cnn = ResNet34(input_dims, output_dims) self.sigmoid = nn.Sigmoid() num_params(self)
def __init__(self, input_dims, output_dims, model_type): super().__init__() if model_type=='convnet': self.cnn = ConvNet(input_dims, output_dims) elif model_type=='mobilenet': self.cnn = MobileNet(input_dims, output_dims) elif model_type=='mobilenetv2': self.cnn = MobileNetv2(input_dims, output_dims) self.sigmoid = nn.Sigmoid() num_params(self)
def _summarize_best_genome(self, genomes_all): logger.info('*' * 50) logger.info( 'n_models: {}, best_fitness: {:.3f}, controller_step: {}'.format( self.n_models, self.best_genome.fitness, self.controller_step)) logger.info('best_genome:\n{}'.format(self.best_genome.model_string)) if self.tb is not None: # best genome info self.tb.scalar_summary(f'best_genome/fitness', self.best_genome.fitness, self.controller_step) self.tb.text_summary(f'best_genome/model_string', self.best_genome.model_string, self.controller_step) # save best genome image fname = (f'{self.epoch:03d}-{self.controller_step:06d}-' f'{self.best_genome.fitness:6.4f}-best_genome.png') path = os.path.join(args.model_dir, 'networks', fname) graph_deep_net(self.amb, dp=self.profile, show_disabled=False, prune=True, genome=self.best_genome, fname=None, save_file=path.replace('.png', '')) self.tb.image_summary('best_genome/sample', path, self.controller_step) # can't do this in AMB b/c you don't have the model self.tb.scalar_summary(f'best_genome/num_params', utils.num_params(self.best_model), self.controller_step) # generation summary fitnesses = [x[0].fitness for x in genomes_all] bp_iters = [x[-1] for x in genomes_all] self.tb.histogram_summary('generation/fitnesses', fitnesses, self.controller_step) self.tb.histogram_summary('generation/bp_iters', bp_iters, self.controller_step) self.tb.scalar_summary(f'gneration/mean_bp_iters', np.mean(bp_iters), self.controller_step) # plot best genomes """
def param_trace(name, module, depth, max_depth=999, threshold=0): if depth > max_depth: return prefix = " " * depth n_params = utils.num_params(module) if n_params > threshold: print("{:60s}\t{:10.2f}M".format(prefix + name, n_params / K / K)) for n, m in module.named_children(): if depth == 0: child_name = n else: child_name = "{}.{}".format(name, n) param_trace(child_name, m, depth + 1, max_depth, threshold)
bias_initializer=tf.constant_initializer(np.copy(ib.pretrained_PZ[9])), inputs=x, units=num_classes, activation=None, # kernel_initializer=None, # bias_initializer=tf.zeros_initializer(), kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_output), ) print('Pretrained PZ_output loaded!') else: raise Exception("this does not get executed!") y = tf.nn.softmax(dense_out) print('denseOut\t', y.get_shape()) print('Model consists of ', utils.num_params(), 'trainable parameters.') # %% ######################################### ### SETTING VARIABLES TRAINABILITY ### ######################################### ### STORING TRAINABLE VARIABLES all_vars = tf.trainable_variables() start_idx, end_idx = ib.what_is_trainable(all_vars) to_train = all_vars[start_idx:end_idx] print("and we will train: ") for j in range(len(to_train)): print("## ", to_train[j]) ####################################################
num_classes = 2 layers = [ (10, None), ] net = HyperNet( num_channels, num_classes, layers, h=1e-1, verbose=False, clear_grad=True, classifier_type='conv3', ).to(device) print('\n### Model Statistics') print('Model Size: %8.1f mb' % utils.model_size(net)) print('Number of Parameters: %9d' % utils.num_params(net)) print(' ') nex = 4 images = torch.randn((4, num_channels, 16, 16, 16)).to(device) fwd_start = timer() Y_N, Y_Nm1 = net(images) fwd_time = timer() - fwd_start dYN = torch.randn_like(Y_N) get_optim = lambda net: torch.optim.Adam(net.parameters(), lr=1e-2) bwd_start = timer() Y0, Y1 = net.backward(Y_N, Y_Nm1, dYN, get_optim, False) bwd_time = timer() - bwd_start
def main(summary): train_dataset, val_dataset = EEGDataset.from_config( validation_ratio=hp.validation_ratio, validation_seed=hp.validation_seed, dir_path='./data/prepared_eegs_mat_th5', data_sampling_freq=220, start_sampling_freq=1, end_sampling_freq=60, start_seq_len=32, num_channels=17, return_long=False) train_dataloader = DataLoader(train_dataset, batch_size=hp.batch_size, num_workers=0, drop_last=True) val_dataloader = DataLoader(val_dataset, batch_size=hp.batch_size, num_workers=0, drop_last=False, pin_memory=True) network = cudize( Network(train_dataset.num_channels, bidirectional=False, contextualizer_num_layers=hp.contextualizer_num_layers, contextualizer_dropout=hp.contextualizer_dropout, use_transformer=hp.use_transformer, prediction_k=hp.prediction_k * (hp.prediction_loss_weight != 0.0), have_global=(hp.global_loss_weight != 0.0), have_local=(hp.local_loss_weight != 0.0), residual_encoder=hp.residual_encoder, sinc_encoder=hp.sinc_encoder)) num_parameters = num_params(network) print('num_parameters', num_parameters) if hp.use_bert_adam: network_optimizer = BertAdam(network.parameters(), lr=hp.lr, weight_decay=hp.weight_decay, warmup=0.2, t_total=hp.epochs * len(train_dataloader), schedule='warmup_linear') else: network_optimizer = Adam(network.parameters(), lr=hp.lr, weight_decay=hp.weight_decay) if hp.use_scheduler: scheduler = ReduceLROnPlateau(network_optimizer, patience=3, verbose=True) best_val_loss = float('inf') for epoch in trange(hp.epochs): for training, data_loader in zip((False, True), (val_dataloader, train_dataloader)): if training: if epoch == hp.epochs - 1: break network.train() else: network.eval() total_network_loss = 0.0 total_prediction_loss = 0.0 total_global_loss = 0.0 total_local_loss = 0.0 total_global_accuracy = 0.0 total_local_accuracy = 0.0 total_k_pred_acc = {} total_pred_acc = 0.0 total_count = 0 with torch.set_grad_enabled(training): for batch in data_loader: x = cudize(batch['x']) network_return = network.forward(x) network_loss = hp.prediction_loss_weight * network_return.losses.prediction_ network_loss = network_loss + hp.global_loss_weight * network_return.losses.global_ network_loss = network_loss + hp.local_loss_weight * network_return.losses.local_ bs = x.size(0) total_count += bs total_network_loss += network_loss.item() * bs total_prediction_loss += network_return.losses.prediction_.item( ) * bs total_global_loss += network_return.losses.global_.item( ) * bs total_local_loss += network_return.losses.local_.item( ) * bs total_global_accuracy += network_return.accuracies.global_ * bs total_local_accuracy += network_return.accuracies.local_ * bs dict_add(total_k_pred_acc, network_return.accuracies.prediction_, bs) len_pred = len(network_return.accuracies.prediction_) if len_pred > 0: total_pred_acc += sum( network_return.accuracies.prediction_.values( )) / len_pred * bs if training: network_optimizer.zero_grad() network_loss.backward() network_optimizer.step() metrics = dict(net_loss=total_network_loss) if network.prediction_loss_network.k > 0 and hp.prediction_loss_weight != 0: metrics.update( dict(avg_prediction_acc=total_pred_acc, prediction_loss=total_prediction_loss, k_prediction_acc=total_k_pred_acc)) if hp.global_loss_weight != 0: metrics.update( dict(global_loss=total_global_loss, global_acc=total_global_accuracy)) if hp.local_loss_weight != 0: metrics.update( dict(local_loss=total_local_loss, local_acc=total_local_accuracy)) divide_dict(metrics, total_count) if not training and hp.use_scheduler: scheduler.step(metrics['net_loss']) if summary: print('train' if training else 'validation', epoch, metrics['net_loss']) else: print('train' if training else 'validation', epoch) print(json.dumps(metrics, indent=4)) if not training and (metrics['net_loss'] < best_val_loss): best_val_loss = metrics['net_loss'] print('update best to', best_val_loss) torch.save(network.state_dict(), 'best_network.pth')
L = len(valid_data) eval_batch = [valid_data[i] for i in range(L // 4 - 1, L, L // 4)] ### build model logger.info("### Build model ###") seq2seq = get_model(model_type, in_dim, out_dim, max_len, cfg['model']) seq2seq.cuda() ### init params # NOTE no bias init ... for p in seq2seq.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) K = 1024 n_params = utils.num_params(seq2seq) / K / K logger.nofmt(seq2seq) logger.info("# of params = {:.1f} M".format(n_params)) # parameter size tracing if args.param_tracing: # recursive tracing def param_trace(name, module, depth, max_depth=999, threshold=0): if depth > max_depth: return prefix = " " * depth n_params = utils.num_params(module) if n_params > threshold: print("{:60s}\t{:10.2f}M".format(prefix + name, n_params / K / K)) for n, m in module.named_children():
(n, 'down'), (n, 'down'), (n, 'up'), (n, 'up'), (n, 'up'), (n, 'up'), (n, None), ] net = HyperNet(channels_in, nClasses, layers, h=1e-2, classifier_type='conv', verbose=False) print('Model Size: %6.2f' % model_size(net)) print('Number of Parameters: %d' % num_params(net)) print('Number of Layers: %d' % (n * len(layers))) net = net.to(device) # Show scale captured class_weights = torch.tensor(train_dataset.CLASS_WEIGHTS) misfit = nn.CrossEntropyLoss(class_weights).to(device) best_val_acc = 0 for epoch in range(num_epochs): if epoch % 100 == 0 and not epoch == 0: lr = lr / 10 get_optim = lambda net: torch.optim.Adam(net.parameters(), lr=lr)
def fit(self, X, y, use_tensorboard=True): n_inputs = X.shape[1] n_outputs = self.profile.num_target_classes() if n_outputs == 0: n_outputs = 1 self.neatcfg = utils.Config(n_inputs) self.amb = utils.AMB(self.neatcfg) # for plotting deep networks self.set_model_traits() self.build_space() if use_tensorboard: self.tb = TensorBoardCallback(args.model_dir, self.amb) else: self.tb = None # self.traits.batch_size = 256 idx_train, idx_val = self.traits.get_cv_split(X.values, y.values) idx_train, idx_val = to.LongTensor(idx_train), to.LongTensor(idx_val) logger.debug('train size: {}, val size: {}'.format( len(idx_train), len(idx_val))) X = X.values y = y.values X = to.Tensor(X.astype('float32')) if self.is_class: y = y.dot(np.arange(y.shape[1])).astype(int) y = to.LongTensor(y) else: y = to.Tensor(y.astype('float32')) X = ag.Variable(X) y = ag.Variable(y) self.x = X[idx_train] self.y = y[idx_train] self.x_val = X[idx_val] self.y_val = y[idx_val] self.controller = Controller(n_inputs, n_outputs, self.neatcfg, self.vocab, self.activations) self.controller.cuda() if args.controller_path != '': logger.info('Loading controller weights from: {}'.format( args.controller_path)) self.controller.load_state_dict(to.load(args.controller_path)) # print controller and save string rep to file print(self.controller) num_params = utils.num_params(self.controller) num_params = 'num_params: {:,}'.format(num_params) print(num_params) path = os.path.join(args.model_dir, "controller.txt") with open(path, 'w') as fp: fp.write(str(self.controller) + '\n' + num_params) # TODO: try higher # self.controller_lr = 3.5e-4 # the default self.controller_lr = 1e-3 self.controller_optim = to.optim.Adam(self.controller.parameters(), lr=self.controller_lr) if args.nprocs > 1: self.pool = mp.Pool(args.nprocs) self.start_time = time.time() self.epoch = 1 stop = self.train_controller() logger.info('model_dir: %s', args.model_dir)