def __init__(self, flags, word_index_to_embeddings_map, train_or_others):
        self.lstm_size = flags.lstm_size
        self.max_grad_norm = flags.max_grad_norm
        self.batch_size = flags.batch_size
        self.learning_rate = flags.learning_rate
        self.max_len = flags.max_len
        self.embedding_size = flags.embedding_size
        self.word_index_to_embeddings_map = word_index_to_embeddings_map
        self.num_layers = flags.num_layers
        self.train_or_others = train_or_others
        self.rich_context = flags.rich_context
        self.l2_strength = flags.l2_strength
        self.keep_prob_placeholder = tf.placeholder(tf.float32,
                                                    shape=[],
                                                    name='keep_prob')
        self.global_step = tf.Variable(0, trainable=False)
        print_params(flags)

        self.sentences_placeholder()
        self.get_embedding()
        #sentence to lstm and then ,concat and max pooling
        self.build_sentences()
        #attention for both w0 and w1
        self.attention()
        self.pred_loss()
        self.saver = tf.train.Saver()
Esempio n. 2
0
    def __init__(self, cfg, try_load_best=False):
        self.cfg = cfg
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.model = VectorQuantizedVAE(cfg.input_dim, cfg.dim,
                                        K=cfg.K).to(self.device)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
        utils.print_params(self.model)

        self.writer = SummaryWriter(cfg.tbp)

        self.last_epoch = 0
        self.best_loss = None
        ckp = cfg.ckp
        if try_load_best and os.path.isfile(cfg.ckp_best):
            ckp = cfg.ckp_best

        if os.path.isfile(ckp):
            checkpoint = torch.load(ckp)
            self.model.load_state_dict(checkpoint['model_state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            self.last_epoch = checkpoint['epoch']
            self.best_loss = checkpoint['best_loss']
            print('Load {}! Epoch: {} Best loss: {:.4f}'.format(
                ckp, self.last_epoch, self.best_loss))
def restore_model(session, folder, train_data, validation_data, test_data, override_params = {}):
                      
    print('Restoring model from %s' % folder)
    
    if not os.path.exists(folder):
        print('Error: Folder does not exist', folder)
        return []
    
    # load saved parameters and override if needed
    params_path = folder + '/hyperparams.json' 
    hyperparams = load_hyperparams(params_path)
    last_num_steps_per_epoch = utils.calc_num_steps_per_epoch(train_data, hyperparams)
    for param in override_params:
        if param in hyperparams:
            hyperparams[param] = override_params[param]
    utils.print_params(hyperparams)

    # Add ops to save and restore all the variables.
    graph = computation_graph.build_graph(hyperparams, validation_data, test_data)   
        
    # Restore variables from disk. 
    model_path = tf.train.latest_checkpoint(folder)
    print('Restoring model %s' % model_path)
    saver = graph.saver
    saver.restore(session, model_path)
    print("Model restored.")

    global_step = graph.global_step
    last_step = global_step.eval()
    last_epoch = last_step // last_num_steps_per_epoch   
    print('Restored global_step %d last_epoch %d' % (last_step, last_epoch))
    
    return [graph, hyperparams, last_epoch]
Esempio n. 4
0
def run_model(hyperparams, data, save_to_folder):
    '''data: train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels]'''

    if save_to_folder != '':
        hyperparams['save_folder'] = save_to_folder
    utils.print_params(hyperparams)

    with tf.Graph().as_default(), tf.Session() as session:

        # build graph flow model
        train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = data
        graph = computation_graph.build_graph(hyperparams, valid_dataset,
                                              test_dataset)

        # run graph flow model
        accuracy = model_training.train_model(session, graph, hyperparams,
                                              train_dataset, train_labels,
                                              valid_labels, test_labels)

        # report final results
        print('------Final results-------')
        print('Final train accuracy %1.2f, validation accuracy %1.2f %%' %
              (accuracy[0], accuracy[1]))
        print("Test accuracy: %1.2f%%" % accuracy[2])

        return accuracy
def _two_label_performance(target_names, params):
    # get_params
    noise_amount = params['noise_amount']

    # set params
    params['target_names'] = target_names
    print_params(**params)

    # fit hopfield
    print('\n.. fitting hopfield\n')
    hf, X, y, target_names, params = fit_hopfield(params)
    print_params(**params)

    # recall
    print('\n.. recalling\n')
    X, X_noise, X_recall = recall_with_noise(clf=hf, X=X,
                                             noise_amount=noise_amount)

    print_header('result')
    similarities, accurate = get_recalling_performance(X, X_recall)
    print('similarity:', np.mean(similarities))
    print('accuracy:', np.mean(accurate))

    similarity = np.mean(similarities)
    accuracy = np.mean(accurate)
    return similarity, accuracy
Esempio n. 6
0
def main_process(dtrain, dtest, params, epsilon, stop_value=None):
    print("Starting hyperparameter tuning with start params:")
    print(utils.print_params(params))
    print("With epsilon (stop) value: {}".format(epsilon))
    gradients = utils.get_gradient_list(params, global_constraint.STEP)
    steps = utils.get_possible_steps(params, gradients, [])
    min_mae = float("Inf")
    step_mae = float("Inf")
    iterations = 0
    best_params = params.copy()
    last_steps = []
    while True:
        last_steps = steps.copy()
        for step_params in steps:
            print(utils.print_params(step_params))
            cv_results = xgb.cv(step_params,
                                dtrain,
                                num_boost_round=10,
                                seed=42,
                                nfold=5,
                                metrics={'mae'},
                                early_stopping_rounds=10)

            mean_mae = cv_results['test-mae-mean'].min()
            boost_rounds = cv_results['test-mae-mean'].argmin()
            print("\tMAE {} for {} rounds".format(mean_mae, boost_rounds))
            iterations = iterations + 1
            print(iterations)
            if mean_mae < min_mae:
                min_mae = mean_mae
                best_params = step_params.copy()

        if stop_value is not None and min_mae < stop_value:
            break

        if (abs(step_mae - min_mae) < epsilon):
            if (iterations < 500):
                utils.reduce_steps()
                step_mae = min_mae
                steps = utils.get_possible_steps(best_params, gradients,
                                                 last_steps)
            else:
                break
        else:
            step_mae = min_mae
            steps = utils.get_possible_steps(best_params, gradients,
                                             last_steps)
            print(len(steps))

    print(len(steps))

    print("Found best solution:")
    print(utils.print_params(best_params))
    print("MAE:")
    print(min_mae)

    return (params, min_mae, iterations)
Esempio n. 7
0
def main():
    '''
        Input Stage
    '''
    print('\nInput Stage...')
    start = time.time()

    inputs = get_input_params()

    # Convert mm to pixel
    cvt_inputs = cvt_mm2pixel(inputs, pitch_of_pixel=inputs['P_D'])

    # Convert depth data
    inputstage = InputStage(inputs['name'], int(args.f), int(args.g),
                            args.is_prediction)
    d, P_I, delta_d, color, L = inputstage.convert_depth(
        inputs['color'], cvt_inputs['depth'], cvt_inputs['f'], cvt_inputs['g'],
        cvt_inputs['P_D'], cvt_inputs['P_L'])

    print('Input Stage Done.')

    # Print parameters
    utils.print_params(inputs, cvt_inputs, d, P_I, delta_d, color, L)
    '''
        Calculation Stage
    '''
    # Generate elemental images
    print('\nCalculation Stage...')

    calculationstage = CalculationStage(inputs['name'], int(args.f),
                                        int(args.g), args.is_prediction)
    if args.is_gpu:
        elem_plane = calculationstage.generate_elemental_imgs_GPU(
            color, L, int(cvt_inputs['P_L']), P_I, cvt_inputs['g'],
            inputs['num_of_lenses'])
    else:
        elem_plane = calculationstage.generate_elemental_imgs_CPU(
            color, L, int(cvt_inputs['P_L']), P_I, cvt_inputs['g'],
            inputs['num_of_lenses'])

    print('Elemental Image Array generated.')
    '''
        Generate Sub Aperture
    '''
    print('\nGenerate sub aperture images...')
    aperture = SubAperture(inputs['name'], int(args.f), int(args.g),
                           args.is_prediction)
    sub_apertures = aperture.generate_sub_apertures(elem_plane,
                                                    int(cvt_inputs['P_L']),
                                                    inputs['num_of_lenses'])
    print('Sub-Aperture Images generated.')

    print('\nElapsed time : {}s'.format(time.time() - start))
    print('Done.')
 def train(self):
     print('Training model ...')
     # load params
     self.window_size = self.train_data.window_size
     self.userTagIntent_vocab_size = self.train_data.userTagIntent_vocab_size
     self.agentAct_vocab_size = self.train_data.agentAct_vocab_size
     self.id2agentAct = self.train_data.id2agentAct
     other_npz = '{}/other_vars.npz'.format(self.model_folder)
     train_vars = {'window_size': self.window_size,
                   'userTagIntent_vocab_size': self.userTagIntent_vocab_size,
                   'agentAct_vocab_size': self.agentAct_vocab_size,
                   'id2agentAct': self.id2agentAct}
     np.savez_compressed(other_npz, **train_vars)
     self.params['window_size'] = self.window_size
     self.params['userTagIntent_vocab_size'] = self.userTagIntent_vocab_size
     self.params['agentAct_vocab_size'] = self.agentAct_vocab_size
     print_params(self.params)
     # build model graph, save graph and plot graph
     self._build()
     self._plot_graph()
     graph_yaml = '{}/graph-arch.yaml'.format(self.model_folder)
     with open(graph_yaml, 'w') as fyaml:
         fyaml.write(self.model.to_yaml())
     # load train data
     X_train = self.train_data.userTagIntent_vecBin
     y_train = self.train_data.agentAct_vecBin
     train_utter_txt = self.train_data.userUtter_txt
     train_act_txt = self.train_data.agentAct_txt
     train_fname = '{}/train.target'.format(self.model_folder)
     writeUtterActTxt(train_utter_txt, train_act_txt, train_fname)
     # load dev data
     X_dev = self.dev_data.userTagIntent_vecBin
     y_dev = self.dev_data.agentAct_vecBin
     dev_utter_txt = self.dev_data.userUtter_txt
     dev_act_txt = self.dev_data.agentAct_txt
     dev_fname = '{}/dev.target'.format(self.model_folder)
     writeUtterActTxt(dev_utter_txt, dev_act_txt, dev_fname)
     for ep in xrange(self.epoch_nb):
         print('<Epoch {}>'.format(ep))
         self.model.fit(x=X_train, y=y_train, batch_size=self.batch_size, nb_epoch=1, verbose=2)
         act_probs = self.model.predict(X_dev)
         precision, recall, fscore, accuracy_frame, threshold = eval_intentPredict(act_probs, y_dev)
         print('ep={}, precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}, threshold={:.4f}'.format(ep, precision, recall, fscore, accuracy_frame, threshold))
         dev_pred_txt = getActPred(act_probs, threshold, self.id2agentAct)
         dev_results_fname = '{}/dev_results/dev_ep={}.pred'.format(self.model_folder, ep)
         writeUtterActTxt(dev_utter_txt, dev_pred_txt, dev_results_fname)
         print('Write dev results: {}'.format(dev_results_fname))
         weights_fname = '{}/weights/ep={}_f1={:.4f}_frameAcc={:.4f}_th={:.4f}.h5'.format(self.model_folder, ep, fscore, accuracy_frame, threshold)
         print('Saving Model: {}'.format(weights_fname))
         self.model.save_weights(weights_fname, overwrite=True)
Esempio n. 9
0
    def __init__(self):
        self.ckp = 'results/vae.pt'
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.model = VAE().to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3)
        utils.print_params(self.model)

        self.writer = SummaryWriter('runs/vae')

        self.last_epoch = 0
        if os.path.isfile(self.ckp):
            checkpoint = torch.load(self.ckp)
            self.model.load_state_dict(checkpoint['model_state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            self.last_epoch = checkpoint['epoch']
            loss = checkpoint['loss']
            print(
                'Load checkpoint! Last Epoch: {} Average loss: {:.4f}'.format(
                    self.last_epoch, loss))
def hopfield_single_performance(
        n_sample,
        n_label,
        noise_amount,
        fit_mode,
        save_fig,
        ):

    # parameters
    params = {
        'n_sample': n_sample,
        'n_label': n_label,
        'noise_amount': noise_amount,
        'fit_mode': fit_mode,
        }
    print_params(**params)

    # fit hopfield
    print('\n.. fitting hopfield\n')
    hf, X, y, target_names, params = fit_hopfield(params)
    print_params(**params)

    # recall
    print('\n.. recalling\n')
    X, X_noise, X_recall = recall_with_noise(clf=hf, X=X,
                                             noise_amount=noise_amount)

    print_header('result')
    similarities, accurate = get_recalling_performance(X, X_recall)
    print('similarity:', np.mean(similarities))
    print('accuracy:', np.mean(accurate))

    # compare 3 images & save
    if save_fig:
        print('\n.. view recalling result\n')
        view_recalling_result(X, X_noise, X_recall,
                              accurate=accurate, **params)

    similarity = np.mean(similarities)
    accuracy = np.mean(accurate)
    return similarity, accuracy
def hopfield_two_label_performance(
        n_sample,
        noise_amount,
        fit_mode,
        save_fig,
        ):

    # parameters
    params = {
        'n_sample': n_sample,
        'noise_amount': noise_amount,
        'fit_mode': fit_mode,
        }
    print_params(**params)

    labels = []
    similarities = []
    accuracies = []
    for target_names in itertools.combinations('chiltx', 2):
        similarity, accuracy = _two_label_performance(target_names, params)
        labels.append(','.join(target_names))
        similarities.append(similarity)
        accuracies.append(accuracy)

    print('labels:', labels)
    print('similarities:', similarities)
    print('accuracies:', accuracies)

    fig, ax = plt.subplots()
    ind = np.arange(len(labels))
    width = 0.35
    ax.bar(ind, similarities, width, label='similarities', color='r')
    ax.bar(ind+width, accuracies, width, label='accuracies', color='b')
    ax.set_xlabel('two labels')
    ax.set_ylabel('performance')
    ax.set_xticks(ind+width)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 1)
    plt.legend(loc='lower right')
    # plt.show()
    plt.savefig('two_label_performance.png')
Esempio n. 12
0
def random_process(dtrain, dtest, iterations):
    print("Starting hyperparameter tuning with start params:")
    random.seed(a=42)
    min_mae = float("Inf")
    l = 0
    for i in range(0, iterations):
        step_params = {
            'max_depth': 0 + random.randint(0, 10),
            'min_child_weight': 0 + random.randint(0, 10),
            'eta': random.uniform(LOWER_BOUND, 1),
            'subsample': random.uniform(LOWER_BOUND, 1),
            'colsample_bytree': random.uniform(LOWER_BOUND, 1),
            'objective': 'reg:linear'
        }

        print(utils.print_params(step_params))
        cv_results = xgb.cv(step_params,
                            dtrain,
                            num_boost_round=10,
                            seed=42,
                            nfold=5,
                            metrics={'mae'},
                            early_stopping_rounds=10)
        mean_mae = cv_results['test-mae-mean'].min()
        boost_rounds = cv_results['test-mae-mean'].argmin()
        print("\tMAE {} for {} rounds".format(mean_mae, boost_rounds))

        if mean_mae < min_mae:
            min_mae = mean_mae
            best_params = step_params.copy()
            l = l + 1
            print(l)

    print("\t")
    print(l)
    print("Found best solution:")
    print(utils.print_params(best_params))
    print("MAE:")
    print(min_mae)

    return (best_params, min_mae)
Esempio n. 13
0
    def __init__(self, cfg, try_load_best=False):
        self.cfg = cfg
        self.net = GatedPixelCNN(input_dim=cfg.input_dim,
                                 dim=cfg.dim,
                                 n_layers=cfg.n_layers,
                                 n_classes=cfg.n_classes).cuda()

        utils.print_params(self.net)
        self.optimizer = optim.Adam(self.net.parameters())
        self.last_epoch = 0
        self.best_loss = None

        self.writer = SummaryWriter(cfg.tbp)
        ckp = cfg.ckp
        if try_load_best and os.path.isfile(cfg.ckp_best):
            ckp = cfg.ckp_best
        if os.path.isfile(ckp):
            checkpoint = torch.load(ckp)
            self.net.load_state_dict(checkpoint['net'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
            self.last_epoch = checkpoint['epoch']
            self.best_loss = checkpoint['best_loss']
            print('Load {}! Epoch: {} Best loss: {:.4f}'.format(
                ckp, self.last_epoch, self.best_loss))
Esempio n. 14
0
def random_process_class(dtrain, dtest, iterations, y_test):
    print("Starting hyperparameter tuning with start params:")
    random.seed(a=42)
    maxacc = 0
    l = 0
    for i in range(0, iterations):
        step_params = {
            'max_depth': 0 + random.randint(0, 10),
            'min_child_weight': 0 + random.randint(0, 10),
            'eta': random.uniform(LOWER_BOUND, 1),
            'subsample': random.uniform(LOWER_BOUND, 1),
            'colsample_bytree': random.uniform(LOWER_BOUND, 1),
            'objective': 'binary:logistic'
        }
        cv_results = xgb.train(
            step_params,
            dtrain,
            num_boost_round=10,
        )
        preds = cv_results.predict(dtest)
        preds = [1 if z > 0.5 else 0 for z in preds]

        #print(preds)
        err = 0

        res = [i for i, j in zip(preds, y_test) if i == j]
        #accuracy = accuracy_score(dtest.label, predictions)
        #print("Accuracy: %.2f%%" % (accuracy * 100.0))
        print(len(res))

        print(100 * len(res) / len(preds))

        if len(res) > maxacc:
            maxacc = len(res)
            best_params = step_params.copy()

    print("\t")
    print(l)
    print("Found best solution:")
    print(utils.print_params(best_params))
    print("Random result:")
    print(maxacc)
    print(maxacc / len(y_test))

    return (best_params, maxacc)
Esempio n. 15
0
def train(opt):
    if opt.use_model == 'bert':
        # datasets
        train_set = BERTDGLREDataset(opt.train_set, opt.train_set_save, word2id, ner2id, rel2id, dataset_type='train',
                                     opt=opt)
        # dev_set = BERTDGLREDataset(opt.dev_set, opt.dev_set_save, word2id, ner2id, rel2id, dataset_type='dev',
        #                            instance_in_train=train_set.instance_in_train, opt=opt)

        # dataloaders
        train_loader = DGLREDataloader(train_set, batch_size=opt.batch_size, shuffle=True,
                                       negativa_alpha=opt.negativa_alpha)
        # dev_loader = DGLREDataloader(dev_set, batch_size=opt.test_batch_size, dataset_type='dev')

        model = GAIN_BERT(opt)

    elif opt.use_model == 'bilstm':
        # datasets
        train_set = DGLREDataset(opt.train_set, opt.train_set_save, word2id, ner2id, rel2id, dataset_type='train',
                                 opt=opt)
        # dev_set = DGLREDataset(opt.dev_set, opt.dev_set_save, word2id, ner2id, rel2id, dataset_type='dev',
        #                        instance_in_train=train_set.instance_in_train, opt=opt)

        # dataloaders
        train_loader = DGLREDataloader(train_set, batch_size=opt.batch_size, shuffle=True,
                                       negativa_alpha=opt.negativa_alpha)
        # dev_loader = DGLREDataloader(dev_set, batch_size=opt.test_batch_size, dataset_type='dev')

        model = GAIN_GloVe(opt)
    else:
        assert 1 == 2, 'please choose a model from [bert, bilstm].'

    print(model.parameters)
    print_params(model)

    start_epoch = 1
    pretrain_model = opt.pretrain_model
    lr = opt.lr
    model_name = opt.model_name

    if pretrain_model != '':
        chkpt = torch.load(pretrain_model, map_location=torch.device('cpu'))
        model.load_state_dict(chkpt['checkpoint'])
        logging('load model from {}'.format(pretrain_model))
        start_epoch = chkpt['epoch'] + 1
        lr = chkpt['lr']
        logging('resume from epoch {} with lr {}'.format(start_epoch, lr))
    else:
        logging('training from scratch with lr {}'.format(lr))

    model = get_cuda(model)

    if opt.use_model == 'bert':
        bert_param_ids = list(map(id, model.bert.parameters()))
        base_params = filter(lambda p: p.requires_grad and id(p) not in bert_param_ids, model.parameters())

        optimizer = optim.AdamW([
            {'params': model.bert.parameters(), 'lr': lr * 0.01},
            {'params': base_params, 'weight_decay': opt.weight_decay}
        ], lr=lr)
    else:
        optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=lr,
                                weight_decay=opt.weight_decay)

    BCE = nn.BCEWithLogitsLoss(reduction='none')

    if opt.coslr:
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(opt.epoch // 4) + 1)

    checkpoint_dir = opt.checkpoint_dir
    if not os.path.exists(checkpoint_dir):
        os.mkdir(checkpoint_dir)
    fig_result_dir = opt.fig_result_dir
    if not os.path.exists(fig_result_dir):
        os.mkdir(fig_result_dir)

    best_ign_auc = 0.0
    best_ign_f1 = 0.0
    best_epoch = 0

    model.train()

    global_step = 0
    total_loss = 0

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim(0.0, 1.0)
    plt.xlim(0.0, 1.0)
    plt.title('Precision-Recall')
    plt.grid(True)

    acc_NA, acc_not_NA, acc_total = Accuracy(), Accuracy(), Accuracy()
    logging('begin..')

    for epoch in range(start_epoch, opt.epoch + 1):
        start_time = time.time()
        for acc in [acc_NA, acc_not_NA, acc_total]:
            acc.clear()

        for ii, d in enumerate(train_loader):
            relation_multi_label = d['relation_multi_label']
            relation_mask = d['relation_mask']
            relation_label = d['relation_label']

            predictions = model(words=d['context_idxs'],
                                src_lengths=d['context_word_length'],
                                mask=d['context_word_mask'],
                                entity_type=d['context_ner'],
                                entity_id=d['context_pos'],
                                mention_id=d['context_mention'],
                                distance=None,
                                entity2mention_table=d['entity2mention_table'],
                                graphs=d['graphs'],
                                h_t_pairs=d['h_t_pairs'],
                                relation_mask=relation_mask,
                                path_table=d['path_table'],
                                entity_graphs=d['entity_graphs'],
                                ht_pair_distance=d['ht_pair_distance']
                                )
            loss = torch.sum(BCE(predictions, relation_multi_label) * relation_mask.unsqueeze(2)) / (
                    opt.relation_nums * torch.sum(relation_mask))

            optimizer.zero_grad()
            loss.backward()

            if opt.clip != -1:
                nn.utils.clip_grad_value_(model.parameters(), opt.clip)
            optimizer.step()
            if opt.coslr:
                scheduler.step(epoch)

            output = torch.argmax(predictions, dim=-1)
            output = output.data.cpu().numpy()
            relation_label = relation_label.data.cpu().numpy()

            for i in range(output.shape[0]):
                for j in range(output.shape[1]):
                    label = relation_label[i][j]
                    if label < 0:
                        break

                    is_correct = (output[i][j] == label)
                    if label == 0:
                        acc_NA.add(is_correct)
                    else:
                        acc_not_NA.add(is_correct)

                    acc_total.add(is_correct)

            global_step += 1
            total_loss += loss.item()

            log_step = opt.log_step
            if global_step % log_step == 0:
                cur_loss = total_loss / log_step
                elapsed = time.time() - start_time
                logging(
                    '| epoch {:2d} | step {:4d} |  ms/b {:5.2f} | train loss {:5.3f} | NA acc: {:4.2f} | not NA acc: {:4.2f}  | tot acc: {:4.2f} '.format(
                        epoch, global_step, elapsed * 1000 / log_step, cur_loss * 1000, acc_NA.get(), acc_not_NA.get(),
                        acc_total.get()))
                total_loss = 0
                start_time = time.time()

        if epoch % opt.test_epoch == 0:
            logging('-' * 89)
            eval_start_time = time.time()
            model.eval()
            ign_f1, ign_auc, pr_x, pr_y = test(model, dev_loader, model_name, id2rel=id2rel)
            model.train()
            logging('| epoch {:3d} | time: {:5.2f}s'.format(epoch, time.time() - eval_start_time))
            logging('-' * 89)

            if ign_f1 > best_ign_f1:
                best_ign_f1 = ign_f1
                best_ign_auc = ign_auc
                best_epoch = epoch
                path = os.path.join(checkpoint_dir, model_name + '_best.pt')
                torch.save({
                    'epoch': epoch,
                    'checkpoint': model.state_dict(),
                    'lr': lr,
                    'best_ign_f1': ign_f1,
                    'best_ign_auc': ign_auc,
                    'best_epoch': epoch
                }, path)

                plt.plot(pr_x, pr_y, lw=2, label=str(epoch))
                plt.legend(loc="upper right")
                plt.savefig(os.path.join(fig_result_dir, model_name))

        if epoch % opt.save_model_freq == 0:
            path = os.path.join(checkpoint_dir, model_name + '_{}.pt'.format(epoch))
            torch.save({
                'epoch': epoch,
                'lr': lr,
                'checkpoint': model.state_dict()
            }, path)

    print("Finish training")
    print("Best epoch = %d | Best Ign F1 = %f" % (best_epoch, best_ign_f1))
    print("Storing best result...")
    print("Finish storing")
Esempio n. 16
0
def train(opt, isbody=False):
    train_ds = MedicalExtractionDataset(opt.train_data)
    dev_ds = MedicalExtractionDataset(opt.dev_data)
    test_ds = MedicalExtractionDataset(opt.test_data)

    dev_dl = DataLoader(dev_ds,
                        batch_size=opt.dev_batch_size,
                        shuffle=False,
                        num_workers=opt.num_worker)
    test_dl = DataLoader(test_ds,
                         batch_size=opt.dev_batch_size,
                         shuffle=False,
                         num_workers=opt.num_worker)

    if isbody:
        logging('training for body')
        model = MedicalExtractionModelForBody(opt)
    else:
        logging('training for subject, decorate and body')
        model = MedicalExtractionModel(opt)
    # print(model.parameters)
    print_params(model)

    start_epoch = 1
    learning_rate = opt.lr
    total_epochs = opt.epochs
    pretrain_model = opt.pretrain_model
    model_name = opt.model_name  # 要保存的模型名字

    # load pretrained model
    if pretrain_model != '' and not isbody:
        chkpt = torch.load(pretrain_model, map_location=torch.device('cpu'))
        model.load_state_dict(chkpt['checkpoints'])
        logging('load model from {}'.format(pretrain_model))
        start_epoch = chkpt['epoch'] + 1
        learning_rate = chkpt['learning_rate']
        logging('resume from epoch {} with learning_rate {}'.format(
            start_epoch, learning_rate))
    else:
        logging('training from scratch with learning_rate {}'.format(
            learning_rate))

    model = get_cuda(model)

    num_train_steps = int(len(train_ds) / opt.batch_size * opt.epochs)
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.001
        },
        {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        },
    ]

    # optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    optimizer = optim.AdamW(optimizer_parameters, lr=learning_rate)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=opt.num_warmup_steps,
        num_training_steps=num_train_steps)
    threshold = opt.threshold
    criterion = nn.BCEWithLogitsLoss(reduction='none')

    checkpoint_dir = opt.checkpoint_dir
    if not os.path.exists(checkpoint_dir):
        os.mkdir(checkpoint_dir)

    es = EarlyStopping(patience=opt.patience, mode="min", criterion='val loss')
    for epoch in range(start_epoch, total_epochs + 1):
        train_loss = 0.0
        model.train()
        train_dl = DataLoader(train_ds,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=opt.num_worker)
        tk_train = tqdm(train_dl, total=len(train_dl))
        for batch in tk_train:
            optimizer.zero_grad()
            subject_target_ids = batch['subject_target_ids']
            decorate_target_ids = batch['decorate_target_ids']
            freq_target_ids = batch['freq_target_ids']
            body_target_ids = batch['body_target_ids']
            mask = batch['mask'].float().unsqueeze(-1)
            body_mask = batch['body_mask'].unsqueeze(-1)
            loss = None
            if isbody:
                body_logits = model(
                    input_ids=batch['body_input_ids'],
                    attention_mask=batch['body_mask'],
                    token_type_ids=batch['body_token_type_ids'])
                loss = torch.sum(
                    criterion(body_logits, body_target_ids) *
                    body_mask) / torch.sum(body_mask)
            else:
                subject_logits, decorate_logits, freq_logits = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['mask'],
                    token_type_ids=batch['token_type_ids'])
                loss = torch.sum(
                    (criterion(subject_logits, subject_target_ids) +
                     criterion(decorate_logits, decorate_target_ids) +
                     criterion(freq_logits, freq_target_ids)) *
                    mask) / torch.sum(mask)

            loss.backward()
            optimizer.step()
            scheduler.step()

            tk_train.set_postfix(train_loss='{:5.3f} / 1000'.format(
                1000 * loss.item()),
                                 epoch='{:2d}'.format(epoch))
            train_loss += loss.item() * subject_target_ids.shape[0]

        avg_train_loss = train_loss * 1000 / len(train_ds)
        print('train loss per example: {:5.3f} / 1000'.format(avg_train_loss))

        avg_val_loss = test(model,
                            dev_ds,
                            dev_dl,
                            criterion,
                            threshold,
                            'val',
                            isbody=isbody)

        # 保留最佳模型方便evaluation
        if isbody:
            save_model_path = os.path.join(checkpoint_dir,
                                           model_name + '_body_best.pt')
        else:
            save_model_path = os.path.join(checkpoint_dir,
                                           model_name + '_best.pt')

        es(avg_val_loss,
           model,
           model_path=save_model_path,
           epoch=epoch,
           learning_rate=learning_rate)
        if es.early_stop:
            print("Early stopping")
            break

        # 保存epoch的模型方便断点续训
        if epoch % opt.save_model_freq == 0:
            if isbody:
                save_model_path = os.path.join(
                    checkpoint_dir, model_name + '_body_{}.pt'.format(epoch))
            else:
                save_model_path = os.path.join(
                    checkpoint_dir, model_name + '_{}.pt'.format(epoch))
            torch.save(
                {
                    'epoch': epoch,
                    'learning_rate': learning_rate,
                    'checkpoints': model.state_dict()
                }, save_model_path)

    # load best model and test
    if isbody:
        best_model_path = os.path.join(checkpoint_dir,
                                       model_name + '_body_best.pt')
    else:
        best_model_path = os.path.join(checkpoint_dir, model_name + '_best.pt')
    chkpt = torch.load(best_model_path, map_location=torch.device('cpu'))
    model.load_state_dict(chkpt['checkpoints'])
    if isbody:
        logging('load best body model from {} and test ...'.format(
            best_model_path))
    else:
        logging('load best model from {} and test ...'.format(best_model_path))
    test(model, test_ds, test_dl, criterion, threshold, 'test', isbody)
    model.cpu()
Esempio n. 17
0
    optimizer = torch.optim.SGD(params=net.parameters(),
                                lr=lr, 
                                momentum=momentum, 
                                weight_decay=weight_decay)
    
    criterion = nn.CrossEntropyLoss().cuda()
    net = torch.nn.DataParallel(net).to(device)
    
    best_model, best_acc = engine.train_reg(args, net, criterion, optimizer, trainloader, testloader, n_epochs)
    
    torch.save({'state_dict':best_model.state_dict()}, os.path.join(model_path, "{}_{}_{:.2f}.tar").format(model_name, mode, best_acc))

if __name__ == '__main__':
    file = "./config/" + dataset_name + ".json"
    args = utils.load_json(json_file=file)
    
    log_file = "{}_{}.txt".format(model_name, mode)
    utils.Tee(os.path.join(log_path, log_file), 'w')

    print(log_file)
    print("---------------------Training [%s]---------------------" % model_name)
    utils.print_params(args["dataset"], args[model_name], dataset=args['dataset']['name'])
    
    train_file = args['dataset']['train_file']
    test_file = args['dataset']['test_file']
    trainloader = utils.init_dataloader(args, train_file, mode="train")
    testloader = utils.init_dataloader(args, test_file, mode="test")
    
    main(args, model_name, trainloader, testloader)
Esempio n. 18
0
def train(opt):
    train_ds = MedicalExtractionDataset(opt.train_data)
    dev_ds = MedicalExtractionDataset(opt.dev_data)

    dev_dl = DataLoader(dev_ds,
                        batch_size=opt.dev_batch_size,
                        shuffle=False,
                        num_workers=1
                        )

    model = MedicalExtractionModel(opt)
    print(model.parameters)
    print_params(model)

    start_epoch = 1
    learning_rate = opt.lr
    total_epochs = opt.epochs
    log_step = opt.log_step
    pretrain_model = opt.pretrain_model
    model_name = opt.model_name  # 要保存的模型名字

    # load pretrained model
    if pretrain_model != '':
        chkpt = torch.load(pretrain_model, map_location=torch.device('cpu'))
        model.load_state_dict(chkpt['checkpoints'])
        logging('load model from {}'.format(pretrain_model))
        start_epoch = chkpt['epoch'] + 1
        learning_rate = chkpt['learning_rate']
        logging('resume from epoch {} with learning_rate {}'.format(start_epoch, learning_rate))
    else:
        logging('training from scratch with learning_rate {}'.format(learning_rate))

    model = get_cuda(model)

    # TODO 如果用Bert可以改成AdamW
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # TODO loss function
    # criterion =

    checkpoint_dir = opt.checkpoint_dir
    if not os.path.exists(checkpoint_dir):
        os.mkdir(checkpoint_dir)

    # training process
    # 1.
    global_step = 0
    total_loss = 0
    for epoch in range(1, total_epochs + 1):
        start_time = time.time()
        train_dl = DataLoader(train_ds,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=8
                              )
        model.train()
        for batch in train_dl:
            optimizer.zero_grad()
            # TODO 喂数据

            # TODO loss计算
            loss = None

            loss.backward()
            optimizer.step()

            global_step += 1
            total_loss += loss.item()
            if global_step % log_step == 0:
                cur_loss = total_loss / log_step
                elapsed = time.time() - start_time
                logging(
                    '| epoch {:2d} | step {:4d} |  ms/b {:5.2f} | train loss {:5.3f} '.format(
                        epoch, global_step, elapsed * 1000 / log_step, cur_loss * 1000))
                total_loss = 0
                start_time = time.time()

        if epoch % opt.test_epoch == 0:
            model.eval()
            with torch.no_grad():
                for batch in dev_dl:
                    # TODO 在验证集上测试
                    pass

        # save model
        # TODO 可以改成只save在dev上最佳的模型
        if epoch % opt.save_model_freq == 0:
            path = os.path.join(checkpoint_dir, model_name + '_{}.pt'.format(epoch))
            torch.save({
                'epoch': epoch,
                'learning_rate': learning_rate,
                'checkpoint': model.state_dict()
            }, path)
Esempio n. 19
0
    print("The best test l2 is {:.3f}".format(test_l2))
    print(
        "=============================================================================="
    )


if __name__ == '__main__':
    file = dataset_name + ".json"
    args = utils.load_params(file)
    if w > 0:
        log_file = "attack" + '_' + target_name + '_{}_{}.txt'.format(
            target_mode, w)
    else:
        log_file = "attack" + '_' + target_name + '_{}.txt'.format(target_mode)
    logger = utils.Tee(os.path.join(save_log_path, log_file), 'w')
    utils.print_params(args)

    train_file = args['dataset']['test_file']
    test_file = args['dataset']['train_file']
    trainloader = utils.init_dataloader(args, train_file, mode="train")
    testloader = utils.init_dataloader(args, test_file, mode="test")

    eval_model = utils.get_model(args, "VGG16", "reg")
    eval_model = torch.nn.DataParallel(eval_model).to(device)
    utils.load_state_dict(eval_model, eval_path)

    save_img_path = os.path.join(
        save_img_path, "attack_{}_{}".format(target_name, target_mode))
    os.makedirs(save_img_path, exist_ok=True)
    main(args, trainloader, testloader, eval_model)
    def train(self):
        print('Training model ...')
        # load params
        self.maxlen_userUtter = self.train_data.maxlen_userUtter
        self.word_vocab_size = self.train_data.word_vocab_size
        self.userIntent_vocab_size = self.train_data.userIntent_vocab_size
        self.userTag_vocab_size = self.train_data.userTag_vocab_size
        self.id2word = self.train_data.id2word
        self.id2userTag = self.train_data.id2userTag
        self.id2userIntent = self.train_data.id2userIntent
        self.userTag2id = self.train_data.userTag2id
        other_npz = '{}/other_vars.npz'.format(self.model_folder)
        train_vars = {
            'id2userTag': self.id2userTag,
            'id2word': self.id2word,
            'id2userIntent': self.id2userIntent,
            'userTag2id': self.userTag2id,
            'userTag_vocab_size': self.userTag_vocab_size,
            'userIntent_vocab_size': self.userIntent_vocab_size,
            'word_vocab_size': self.word_vocab_size,
            'maxlen_userUtter': self.maxlen_userUtter
        }
        np.savez_compressed(other_npz, **train_vars)
        self.params['maxlen_userUtter'] = self.maxlen_userUtter
        self.params['word_vocab_size'] = self.word_vocab_size
        self.params['userTag_vocab_size'] = self.userTag_vocab_size
        self.params['userIntent_vocab_size'] = self.userIntent_vocab_size
        print_params(self.params)
        # build model graph, save graph and plot graph
        self._build()
        self._plot_graph()
        graph_yaml = '{}/graph-arch.yaml'.format(self.model_folder)
        with open(graph_yaml, 'w') as fyaml:
            fyaml.write(self.model.to_yaml())
        # load train data
        X_train = self.train_data.userUtter_encodePad
        tag_train = self.train_data.userTag_1hotPad
        intent_train = self.train_data.userIntent_vecBin
        train_utter_txt = self.train_data.userUtter_txt
        train_intent_txt = self.train_data.userIntent_txt
        train_tag_txt = self.train_data.userTag_txt
        train_target_fname = '{}/train.target'.format(self.model_folder)
        writeUtterTagIntentTxt(train_utter_txt, train_tag_txt,
                               train_intent_txt, train_target_fname)
        # load dev data
        X_dev = self.dev_data.userUtter_encodePad
        tag_dev = self.dev_data.userTag_1hotPad
        intent_dev = self.dev_data.userIntent_vecBin
        dev_utter_txt = self.dev_data.userUtter_txt
        dev_intent_txt = self.dev_data.userIntent_txt
        dev_tag_txt = self.dev_data.userTag_txt
        dev_target_fname = '{}/dev.target'.format(self.model_folder)
        writeUtterTagIntentTxt(dev_utter_txt, dev_tag_txt, dev_intent_txt,
                               dev_target_fname)
        # get mask matrix for train and dev set
        mask_array_train = np.zeros_like(X_train)
        mask_array_train[X_train != 0] = 1
        mask_array_dev = np.zeros_like(X_dev)
        mask_array_dev[X_dev != 0] = 1
        # jointly training
        for ep in xrange(self.epoch_nb):
            print('<Epoch {}>'.format(ep))
            print '------------------------------------------------------------'
            print X_train
            self.model.fit(x=X_train,
                           y={
                               'slot_output': tag_train,
                               'intent_output': intent_train
                           },
                           sample_weight={
                               'slot_output': mask_array_train,
                               'intent_output': None
                           },
                           batch_size=self.batch_size,
                           nb_epoch=1,
                           verbose=2)

            tag_probs, intent_probs = self.model.predict(X_dev)
            # calculate token-level scores
            precision_tag, recall_tag, fscore_tag, accuracy_frame_tag = eval_slotTagging(
                tag_probs, mask_array_dev, tag_dev, self.userTag2id['tag-O'])
            print(
                'SlotTagging: ep={}, precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}'
                .format(ep, precision_tag, recall_tag, fscore_tag,
                        accuracy_frame_tag))
            precision_intent, recall_intent, fscore_intent, accuracy_frame_intent, threshold = eval_intentPredict(
                intent_probs, intent_dev)
            print(
                'Intent Prediction: ep={}, precision={:.4f}, recall={:.4f}, fscore={:.4f}, accuracy_frame={:.4f}, threshold={:.4f}'
                .format(ep, precision_intent, recall_intent, fscore_intent,
                        accuracy_frame_intent, threshold))
            accuracy_frame_both = getNLUframeAccuracy(tag_probs,
                                                      mask_array_dev, tag_dev,
                                                      intent_probs, intent_dev,
                                                      threshold)
            print('NLU Frame: ep={}, accuracy={:.4f}'.format(
                ep, accuracy_frame_both))
            dev_tag_pred_txt, dev_intent_pred_txt = getNLUpred(
                tag_probs, mask_array_dev, self.id2userTag, intent_probs,
                threshold, self.id2userIntent)
            dev_results_fname = '{}/dev_results/dev_ep={}.pred'.format(
                self.model_folder, ep)
            writeUtterTagIntentTxt(dev_utter_txt, dev_tag_pred_txt,
                                   dev_intent_pred_txt, dev_results_fname)
            print('Write dev results: {}'.format(dev_results_fname))
            weights_fname = '{}/weights/ep={}_tagF1={:.4f}frameAcc={:.4f}_intentF1={:.4f}frameAcc={:.4f}th={:.4f}.h5'.format(
                self.model_folder, ep, fscore_tag, accuracy_frame_tag,
                fscore_intent, accuracy_frame_intent, threshold)
            print('Saving Model: {}'.format(weights_fname))
            self.model.save_weights(weights_fname, overwrite=True)
Esempio n. 21
0
def main_process_class(dtrain,
                       dtest,
                       params,
                       epsilon,
                       y_test,
                       stop_value=None):
    print("Starting hyperparameter tuning with start params:")
    print(utils.print_params(params))
    print("With epsilon (stop) value: {}".format(epsilon))
    gradients = utils.get_gradient_list(params, global_constraint.STEP)
    steps = utils.get_possible_steps(params, gradients, [])
    maxacc = 0
    step_mae = 0
    iterations = 0
    best_params = params.copy()
    last_steps = []
    while True:
        last_steps = steps.copy()
        for step_params in steps:
            print(utils.print_params(step_params))
            #bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, , verbose = 2)

            cv_results = xgb.train(
                step_params,
                dtrain,
                num_boost_round=10,
            )
            print(step_mae)
            preds = cv_results.predict(dtest)
            preds = [1 if z > 0.5 else 0 for z in preds]

            #print(preds)
            err = 0

            res = [i for i, j in zip(preds, y_test) if i == j]
            #accuracy = accuracy_score(dtest.label, predictions)
            #print("Accuracy: %.2f%%" % (accuracy * 100.0))
            print(len(res))

            print(100 * len(res) / len(preds))

            if len(res) > maxacc:
                maxacc = len(res)
                best_params = step_params.copy()

        iterations = iterations + 1
        print(iterations)
        if (abs(step_mae - maxacc) < epsilon):
            if (iterations < 500):
                utils.reduce_steps()
                step_mae = maxacc
                steps = utils.get_possible_steps(best_params, gradients,
                                                 last_steps)
            else:
                break
        else:
            step_mae = maxacc
            print("aaaa")
            steps = utils.get_possible_steps(best_params, gradients,
                                             last_steps)

    print("Found best solution:")
    print(utils.print_params(best_params))
    print("MAE:")
    print(maxacc)

    return (params, maxacc, iterations)
Esempio n. 22
0
def main():
    if not torch.cuda.is_available():
        raise NotImplementedError()
    hparams = type('', (object, ), EMOTIONX_MODEL_HPARAMS)()  # dict to class

    # data
    fr_train_dialogs, fr_train_labels = load_data(hparams,
                                                  hparams.fr_train_path)
    train_dialogs = fr_train_dialogs
    train_labels = fr_train_labels
    test_dialogs, test_labels = load_data(hparams, hparams.fr_test_path)
    assert len(train_dialogs) == len(train_labels)
    assert len(test_dialogs) == len(test_labels)

    # hyper-parameter
    hparams.n_appear = [sum(train_labels, []).count(i) for i in range(5)]
    max_i = len(train_dialogs) // hparams.batch_size
    total_step = 0
    print_per = len(train_dialogs) // 4
    highest_micro_f1 = 0.

    # model
    model = EmotionX_Model(hparams)
    model.cuda()
    model.train()
    print_params(model)
    optimizer = torch.optim.Adam(model.parameters(), hparams.learning_rate)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                           T_max=max_i)
    writer = SummaryWriter(log_dir=hparams.log_dir)

    # train
    for i_epoch in range(hparams.n_epoch):
        train_dialogs, train_labels = shuffle_trainset(train_dialogs,
                                                       train_labels)
        scheduler.step()

        for i_step in tqdm(range(max_i)):
            batch_dialogs = get_batch(train_dialogs, hparams.batch_size,
                                      i_step)
            batch_labels = get_batch(train_labels, hparams.batch_size, i_step)
            optimizer.zero_grad()
            pred_labels = model(batch_dialogs)
            loss = model.cal_loss(batch_labels, pred_labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), hparams.clip)
            optimizer.step()

            # print
            if i_step % print_per == 0:
                model.eval()
                n_appear = [0] * (hparams.n_class - 1)
                n_correct = [0] * (hparams.n_class - 1)
                n_positive = [0] * (hparams.n_class - 1)
                for i_test in range(len(test_dialogs) // hparams.batch_size):
                    batch_dialogs = get_batch(test_dialogs, hparams.batch_size,
                                              i_test)
                    batch_labels = get_batch(test_labels, hparams.batch_size,
                                             i_test)
                    pred_labels = model(batch_dialogs)
                    counts = model.count_for_eval(batch_labels, pred_labels)
                    n_appear = [x + y for x, y in zip(n_appear, counts[0])]
                    n_correct = [x + y for x, y in zip(n_correct, counts[1])]
                    n_positive = [x + y for x, y in zip(n_positive, counts[2])]
                uwa, wa = model.get_uwa_and_wa(n_appear, n_correct)
                precision, recall, f1, micro_f1, macro_f1 = model.get_f1_scores(
                    n_appear, n_correct, n_positive)

                print('i_epoch: ', i_epoch)
                print('i_total_step: ', total_step)
                print('n_true:\t\t\t', n_appear)
                print('n_positive:\t\t', n_positive)
                print('n_true_positive:\t', n_correct)
                print('precision:\t[%.4f, %.4f, %.4f, %.4f]' %
                      (precision[0], precision[1], precision[2], precision[3]))
                print('recall:\t\t[%.4f, %.4f, %.4f, %.4f]' %
                      (recall[0], recall[1], recall[2], recall[3]))
                print('f1:\t\t[%.4f, %.4f, %.4f, %.4f]' %
                      (f1[0], f1[1], f1[2], f1[3]))
                if micro_f1 > highest_micro_f1:
                    highest_micro_f1 = micro_f1
                    friend_high_step = total_step
                print('Micro F1: %.4f (<=%.4f at %d-th total_step)' %
                      (micro_f1, highest_micro_f1, friend_high_step))
                print()

                # write
                writer.add_scalar(hparams.log_micro_f1 + 'fr', micro_f1,
                                  total_step)
                writer.add_scalar(hparams.log_wce_loss + 'fr', loss,
                                  total_step)
                total_step += 1

                model.train()
# Horovod: pin GPU to be used to process local rank (one GPU per process)
tfconfig =  tf.compat.v1.ConfigProto() #tf.ConfigProto()
tfconfig.gpu_options.allow_growth = True
tfconfig.gpu_options.visible_device_list = str(hvd.local_rank())
tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=tfconfig))

################################################################################
# Argument handling
################################################################################

params = p.parse_args()

if hvd.rank()==0:
    print_cl(sys.argv)
    print_params(params)

if params.yaml_dump_then_exit:
    sys.exit(0)

#-------------------------------- optimizer -----------------------------------#
if params.optimizer=='adam':
    params.optimizer = Adam(lr=params.learning_rate * hvd.size())
elif params.optimizer=='sgd':
    params.optimizer = SGD(lr=params.learning_rate * hvd.size())

params.optimizer = hvd.DistributedOptimizer(params.optimizer)

#------------------------------- model reloading ------------------------------#

reloading_model = False
Esempio n. 24
0
def main():
    start_epoch = 0
    best_prec1 = 0.0

    seed=np.random.randint(10000)

    if seed is not None:
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

    if args.gpus is not None:
        device = torch.device("cuda:{}".format(args.gpus[0]))
        cudnn.benchmark = False
        # cudnn.deterministic = True
        cudnn.enabled = True 
    else:
        device = torch.device("cpu")
    
    now = datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
    if args.mission is not None:
        if 'vgg' == args.arch and args.batchnorm:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{args.mission}/{now}'
        elif 'resnet20' == args.arch:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}/{args.mission}/{now}'
        else:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{args.mission}/{now}'

    else:
        if 'vgg' == args.arch and args.batchnorm:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{now}'
        else:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{now}'
    
    _make_dir(args.job_dir)
    ckpt = utils.checkpoint(args)
    print_logger = utils.get_logger(os.path.join(args.job_dir, "logger.log"))
    utils.print_params(vars(args), print_logger.info)
    writer_train = SummaryWriter(args.job_dir +'/run/train')
    writer_test = SummaryWriter(args.job_dir+ '/run/test')

    ## hyperparameters settings ##
    n_layers = (args.num_layers - 2) * 2 
    unit_k_bits = int(args.k_bits)
    kbits_list = [unit_k_bits for i in range(n_layers)]
    print_logger.info(f'k_bits_list {kbits_list}')

    # Data loading
    print('=> Preparing data..')

    if args.dataset in ['cifar10', 'cifar100','mnist']:
        IMAGE_SIZE = 32
    elif args.dataset == 'tinyimagenet':
        IMAGE_SIZE = 64
    else:
        IMAGE_SIZE = 224

    if args.dataset == 'imagenet':
        train_loader = get_imagenet_iter_dali(type = 'train',image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
        val_loader = get_imagenet_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
    elif args.dataset == 'tinyimagenet':
        train_loader = get_imagenet_iter_dali(type = 'train',image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
        val_loader = get_imagenet_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
    elif args.dataset == 'cifar10':
        train_loader = get_cifar_iter_dali(type='train', image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers)
        val_loader = get_cifar_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers)

    # Create model
    print('=> Building model...')
    if args.dataset =='cifar10':
        num_classes = 10
        train_data_length = 50000
        eval_data_length =10000
    elif args.dataset == 'imagenet':
        num_classes = 1000
        train_data_length = 50000
        eval_data_length =10000

    # arch = args.arch
    # model = models.__dict__[arch]

    model_config = {'k_bits':kbits_list,'num_layers':args.num_layers,'pre_k_bits':args.pre_k_bits,'ratio':args.ratio}
    if args.arch == 'mobilenetv2':
        model_config = {'k_bits':kbits_list,'num_layers':args.num_layers,'pre_k_bits':args.pre_k_bits,'ratio':args.ratio,'width_mult':args.width_mult}
    if 'vgg' == args.arch and args.batchnorm:
        model,model_k_bits = import_module(f"models.{args.dataset}.{args.archtype}.{args.arch}").__dict__[f'{args.arch}{args.num_layers}_bn'](model_config)
    elif 'resnet20' == args.arch:
        model,model_k_bits = import_module(f"models.{args.dataset}.{args.archtype}.{args.arch}").__dict__[f'{args.arch}'](model_config)
    else:
        model,model_k_bits = import_module(f"models.{args.dataset}.{args.archtype}.{args.arch}").__dict__[f'{args.arch}{args.num_layers}'](model_config)

    model = model.to(device)
    print_logger.info(f'model_k_bits_list {model_k_bits}')
    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    scheduler = MultiStepLR(optimizer, milestones=[0.5 * args.train_epochs, 0.75 * args.train_epochs], gamma=0.1)
  
    # Optionally resume from a checkpoint
    resume = args.resume
    if resume:
        print('=> Loading checkpoint {}'.format(resume))
        checkpoint = torch.load(resume, map_location=device)
        state_dict = checkpoint['state_dict']
        start_epoch = checkpoint['epoch']
        pre_train_best_prec1 = checkpoint['best_prec1']
        model_check = load_check(state_dict,model)
        pdb.set_trace()
        model.load_state_dict(model_check)
        print('Prec@1:',pre_train_best_prec1)

    if args.test_only:
        test_prec1 = test(args, device, val_loader, model, criterion, writer_test,print_logger,start_epoch )
        print('=> Test Prec@1: {:.2f}'.format(test_prec1))
        print(f'sample k_bits {kbits_list}')
        return

    for epoch in range(0, args.train_epochs):
        scheduler.step(epoch)
        train_loss, train_prec1 = train(args, device, train_loader, train_data_length, model, criterion, optimizer, writer_train, print_logger, epoch)
        test_prec1 = test(args, device, val_loader, eval_data_length, model, criterion, writer_test, print_logger, epoch)

        is_best = best_prec1 < test_prec1
        best_prec1 = max(test_prec1, best_prec1) 

        state = {
                'state_dict': model.state_dict(),
                'test_prec1': test_prec1, 
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'epoch': epoch + 1
            }
        ckpt.save_model(state, epoch + 1, is_best,mode='train')
        print_logger.info('==> BEST ACC {:.3f}'.format(best_prec1.item()))
Esempio n. 25
0
            nn.Sigmoid())

    def forward(self, input):
        output = self.main(input)
        return output.view(-1, 1).squeeze(1)


writer = SummaryWriter('runs/dcgan')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
netG = Generator().to(device)
netG.apply(weights_init)

netD = Discriminator().to(device)
netD.apply(weights_init)

utils.print_params(netG)
utils.print_params(netD)

criterion = nn.BCELoss()

fixed_noise = torch.randn(64, nz, 1, 1, device=device)
real_label = 1
fake_label = 0

# setup optimizer
lr = 0.0002
beta1 = 0.5
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

last_epoch = 0
Esempio n. 26
0
if __name__ == "__main__":

    file = "./MNIST.json"
    args = load_json(json_file=file)

    file_path = 'train_gan_file'
    model_name = 'GAN'
    lr = args[model_name]['lr']
    batch_size = args[model_name]['batch_size']
    z_dim = args[model_name]['z_dim']
    epochs = args[model_name]['epochs']
    n_critic = args[model_name]['n_critic']

    print("---------------------Training [%s]------------------------------" %
          model_name)
    utils.print_params(args["dataset"], args[model_name])

    dataloader = init_dataloader(args, file_path, batch_size, mode="gan")

    G = GeneratorMNIST(z_dim)
    DG = DGWGAN32()

    G = torch.nn.DataParallel(G).cuda()
    DG = torch.nn.DataParallel(DG).cuda()

    dg_optimizer = torch.optim.Adam(DG.parameters(), lr=lr, betas=(0.5, 0.999))
    g_optimizer = torch.optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.999))

    step = 0

    for epoch in range(epochs):
Esempio n. 27
0
    def loop(sess: tf.Session):
        i_step = 0

        if is_root: print('Initializing')
        sess.run(tf.global_variables_initializer())
        if restore_checkpoint is not None:
            # Restore from checkpoint
            if is_root:
                saver = tf.train.Saver()
                print('Restoring checkpoint:', restore_checkpoint)
                restore_step = int(restore_checkpoint.split('-')[-1])
                print('Restoring from step:', restore_step)
                saver.restore(sess, restore_checkpoint)
                i_step = restore_step
            else:
                saver = None
        else:
            # No checkpoint: perform data dependent initialization
            if is_root: print('Data dependent init')
            init_loss = sess.run(
                init_loss_sym, {
                    x_init_sym:
                    data_train[np.random.randint(0, data_train.shape[0],
                                                 init_bs)]
                })
            if is_root: print('Init loss:', init_loss * bpd_scale_factor)
            sess.run(copy_params_to_ema)
            saver = tf.train.Saver() if is_root else None
        if is_root: print('Broadcasting initial parameters')
        sess.run(hvd.broadcast_global_variables(0))
        sess.graph.finalize()

        if is_root:
            print('Training')
            print(f'Total GFLOPS: {flops}')
            print_params()

        loss_hist = deque(maxlen=steps_per_log)
        gnorm_hist = deque(maxlen=steps_per_log)
        for i_epoch in range(99999999999):
            if i_epoch % epochs_per_val == 0:
                run_validation(sess, i_step=i_step)
                if saver is not None:
                    saver.save(sess,
                               os.path.join(checkpointdir, 'model'),
                               global_step=i_step)

            epoch_start_t = time.time()
            for i_epoch_step, (batch, ) in enumerate(
                    iterbatches(  # non-sharded: each gpu goes through the whole dataset
                        [data_train],
                        batch_size=local_bs,
                        include_final_partial_batch=False,
                    )):

                lr = lr_schedule(i_step)
                loss, gnorm, _ = sess.run([loss_sym, grad_norm_sym, opt_sym], {
                    x_sym: batch,
                    lr_sym: lr
                })
                loss_hist.append(loss)
                gnorm_hist.append(gnorm)

                # Skip timing the very first step, which will be unusually slow due to TF initialization
                if i_epoch == i_epoch_step == 0:
                    epoch_start_t = time.time()

                if i_step % steps_per_log == 0:
                    loss_hist_means = MPI.COMM_WORLD.gather(float(
                        np.mean(loss_hist)),
                                                            root=0)
                    gnorm_hist_means = MPI.COMM_WORLD.gather(float(
                        np.mean(gnorm_hist)),
                                                             root=0)
                    steps_per_sec = (i_epoch_step + 1) / (time.time() -
                                                          epoch_start_t)

                    if is_root:
                        kvs = [
                            ('iter', i_step),
                            ('epoch', i_epoch + i_epoch_step * local_bs /
                             data_train.shape[0]),  # epoch for this gpu
                            ('bpd',
                             float(
                                 np.mean(loss_hist_means) * bpd_scale_factor)),
                            ('gnorm', float(np.mean(gnorm_hist_means))),
                            ('lr', float(lr)),
                            # ('fps', steps_per_sec * total_bs),  # fps calculated over all gpus (this epoch)
                            ('sps', steps_per_sec),
                        ]
                        logger.writekvs(kvs, i_step)

                i_step += 1
Esempio n. 28
0
        test_loader = DGLREDataloader(test_set,
                                      batch_size=opt.test_batch_size,
                                      dataset_type='test')

        model = GAIN_GloVe(opt)
    else:
        assert 1 == 2, 'please choose a model from [bert, bilstm].'

    import gc

    del train_set
    gc.collect()

    # print(model.parameters)
    print_params(model)

    start_epoch = 1
    pretrain_model = opt.pretrain_model
    lr = opt.lr
    model_name = opt.model_name

    if pretrain_model != '':
        chkpt = torch.load(pretrain_model, map_location=torch.device('cpu'))
        model.load_state_dict(chkpt['checkpoint'])
        logging('load checkpoint from {}'.format(pretrain_model))
    else:
        assert 1 == 2, 'please provide checkpoint to evaluate.'

    model = get_cuda(model)
    model.eval()
Esempio n. 29
0
def evaluate(
    *,
    flow_constructor,
    seed,
    restore_checkpoint,
    total_bs,
    iw_samples=4096,
    dtype=tf.float32,
    dataset='cifar10',
    samples_filename='samples.png',
):
    hvd, MPI, is_root, mpi_average = setup_horovod()

    restore_checkpoint = os.path.expanduser(restore_checkpoint)

    # Seeding and logging setup
    seed_all(hvd.rank() + hvd.size() * seed)
    assert total_bs % hvd.size() == 0
    local_bs = total_bs // hvd.size()
    assert iw_samples % total_bs == 0

    if is_root:
        print('===== EVALUATING {} ({} IW samples) ====='.format(
            restore_checkpoint, iw_samples))

    # Load data
    if is_root:
        # Load once on root first to prevent downloading conflicts
        print('Loading data')
        load_data(dataset=dataset, dtype=dtype.as_numpy_dtype)
    MPI.COMM_WORLD.Barrier()
    data_train, data_val = load_data(dataset=dataset,
                                     dtype=dtype.as_numpy_dtype)
    img_shp = list(data_train.shape[1:])
    H, W, Cx = img_shp
    bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp))
    if is_root:
        print('Training data: {}, Validation data: {}'.format(
            data_train.shape[0], data_val.shape[0]))
        print('Image shape:', img_shp)

    # Build graph
    if is_root: print('Building graph')
    dequant_flow, flow, posterior_flow = flow_constructor()
    x_sym = tf.placeholder(dtype, [local_bs] + img_shp)
    # This is a fake training graph. Just used to mimic flow_training, so we can load from the saver
    build_forward(x=x_sym,
                  dequant_flow=dequant_flow,
                  flow=flow,
                  posterior_flow=posterior_flow,
                  flow_kwargs=dict(vcfg=VarConfig(init=False,
                                                  ema=None,
                                                  dtype=dtype),
                                   dropout_p=0,
                                   verbose=is_root)
                  # note dropout is 0: it doesn't matter
                  )

    # EMA
    params = tf.trainable_variables()
    if is_root: print_params()
    ema = tf.train.ExponentialMovingAverage(
        decay=0.9999999999999)  # ema turned off
    maintain_averages_op = tf.group(ema.apply(params))

    # Validation and sampling (with EMA)
    if is_root: print('===== Validation graph =====')
    val_flow_kwargs = dict(vcfg=VarConfig(init=False, ema=ema, dtype=dtype),
                           dropout_p=0.,
                           verbose=is_root)
    val_loss_sym, val_logratio_sym = build_forward(
        x=x_sym,
        dequant_flow=dequant_flow,
        flow=flow,
        posterior_flow=posterior_flow,
        flow_kwargs=val_flow_kwargs)

    allgathered_val_logratios_sym = hvd.allgather(val_logratio_sym)
    # for debugging invertibility
    # val_dequant_x_sym_rep = tf.reshape(tf.tile(tf.expand_dims(val_dequant_x_sym, 0), [sampling_times, 1, 1, 1, 1]), [-1] + val_dequant_x_sym.shape.as_list()[1:])
    # val_inverr_sym = tf.reduce_max(tf.abs(val_dequant_x_sym_rep - flow.inverse(val_y_sym, **val_flow_kwargs)[0][:,:,:,:img_shp[-1]]))

    if is_root: print('===== Sampling graph =====')
    samples_sym, _ = flow.sample(64, val_flow_kwargs)
    allgathered_samples_x_sym = hvd.allgather(tf.to_float(samples_sym))

    assert len(tf.trainable_variables()) == len(params)

    def run_iw_eval(sess):
        if is_root:
            print('Running IW eval with {} samples...'.format(iw_samples))
        # Go through one example at a time
        all_val_losses = []
        for i_example in (trange if is_root else range)(len(data_val)):
            # take this single example and tile it
            batch_x = np.tile(data_val[i_example, None, ...],
                              (local_bs, 1, 1, 1))
            # repeatedly evaluate logd for the IWAE bound
            batch_logratios = np.concatenate([
                sess.run(allgathered_val_logratios_sym, {x_sym: batch_x})
                for _ in range(iw_samples // total_bs)
            ]).astype(np.float64)
            assert batch_logratios.shape == (iw_samples, )
            # log [1/n \sum_i exp(r_i)] = log [exp(-b) 1/n \sum_i exp(r_i + b)] = -b + log [1/n \sum_i exp(r_i + b)]
            shift = batch_logratios.max()
            all_val_losses.append(
                -bpd_scale_factor *
                (shift + np.log(np.mean(np.exp(batch_logratios - shift)))))
            if i_example % 100 == 0 and is_root:
                print(i_example, np.mean(all_val_losses))
        if is_root:
            print(f'Final ({len(data_val)}):', np.mean(all_val_losses))

    def run_standard_eval(sess):
        if is_root:
            print('Running standard eval...')
        # Standard validation (single sample)
        data_val_shard = np.array_split(data_val, hvd.size(),
                                        axis=0)[hvd.rank()]
        shard_losses = np.concatenate([
            sess.run([val_loss_sym], {x_sym: val_batch})
            for val_batch, in iterbatches([data_val_shard],
                                          batch_size=local_bs,
                                          include_final_partial_batch=False)
        ])
        val_loss, total_count = mpi_average(shard_losses)
        if is_root:
            for k, v in [
                ('val_bpd', bpd_scale_factor * val_loss),
                ('num_val_examples', total_count * local_bs),
            ]:
                print(k, v)

    def run_sampling_only(sess):
        samples = sess.run(allgathered_samples_x_sym)
        if is_root:
            from PIL import Image
            Image.fromarray(
                tile_imgs(np.clip(samples, 0, 255).astype(
                    np.uint8))).save(samples_filename)
            print('Saved {} samples to {}'.format(len(samples),
                                                  samples_filename))
            # print('Sampled in {} seconds'.format(sample_time))

    # Run
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(
        hvd.local_rank())  # Pin GPU to local rank (one GPU per process)
    with tf.Session(config=config) as sess:
        if is_root: print('Initializing')
        sess.run(tf.global_variables_initializer())
        # Restore from checkpoint
        if is_root:
            print('Restoring checkpoint:', restore_checkpoint)
            saver = tf.train.Saver()
            saver.restore(sess, restore_checkpoint)
            print('Broadcasting initial parameters')
        sess.run(hvd.broadcast_global_variables(0))
        sess.graph.finalize()

        if samples_filename:
            run_sampling_only(sess)

        # Make sure data is the same on all MPI processes
        tmp_inds = [0, 183, 3, 6, 20, 88]
        check_batch = np.ascontiguousarray(data_val[tmp_inds])
        gathered_batches = np.zeros(
            (hvd.size(),
             *check_batch.shape), check_batch.dtype) if is_root else None
        MPI.COMM_WORLD.Gather(check_batch, gathered_batches, root=0)
        if is_root:
            assert all(
                np.allclose(check_batch, b)
                for b in gathered_batches), 'data must be in the same order!'
            print('data ordering ok')

        # Run validation
        run_standard_eval(sess)
        run_iw_eval(sess)
Esempio n. 30
0
def main():
    start_epoch = 0
    best_prec1 = 0.0

    seed = np.random.randint(10000)

    if seed is not None:
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

    if args.gpus is not None:
        device = torch.device("cuda:{}".format(args.gpus[0]))
        cudnn.benchmark = False
        cudnn.deterministic = True
        cudnn.enabled = True
    else:
        device = torch.device("cpu")

    now = datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
    if args.mission is not None:
        if 'vgg' == args.arch and args.batchnorm:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{args.mission}/{now}'
        elif 'resnet20' == args.arch:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}/{args.mission}/{now}'
        else:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{args.mission}/{now}'
    else:
        if 'vgg' == args.arch and args.batchnorm:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{now}'
        else:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{now}'

    _make_dir(args.job_dir)
    ckpt = utils.checkpoint(args)
    print_logger = utils.get_logger(os.path.join(args.job_dir, "logger.log"))
    utils.print_params(vars(args), print_logger.info)
    log_file = os.path.join(args.job_dir, 'search_log.csv')
    writer_train = SummaryWriter(args.job_dir + '/run/train')
    writer_test = SummaryWriter(args.job_dir + '/run/test')

    ## hyperparameters settings ##
    n_layers = (args.num_layers - 2) * 2
    unit_k_bits = int(args.k_bits)
    kbits_list = [unit_k_bits for i in range(n_layers)]
    print_logger.info(f'k_bits_list {kbits_list}')

    # Data loading
    print('=> Preparing data..')

    if args.dataset in ['cifar10', 'cifar100', 'mnist']:
        IMAGE_SIZE = 32
    else:
        IMAGE_SIZE = 224

    if args.dataset == 'imagenet':
        # train_loader = get_imagenet_iter_dali(type = 'train',image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
        # val_loader = get_imagenet_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
        train_data = get_imagenet_iter_torch(type='train',
                                             image_dir=args.base_data_dir,
                                             batch_size=args.train_batch_size,
                                             num_threads=args.workers,
                                             crop=IMAGE_SIZE,
                                             device_id=0,
                                             num_gpus=1)

    elif args.dataset == 'cifar10':
        train_transform, test_transform = utils._data_transforms_cifar10(
            cutout=args.cutout)
        train_data = torchvision.datasets.CIFAR10(args.data_dir,
                                                  train=True,
                                                  transform=train_transform,
                                                  download=True)
        # test_data = torchvision.datasets.CIFAR10(args.data_dir,train=False, transform=test_transform, download=True)
        # train_loader = get_cifar_iter_dali(type='train', image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers)
        # val_loader = get_cifar_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers)

    # Create model
    # Create model
    print('=> Building model...')
    if args.dataset == 'cifar10' or args.dataset == 'mnist':
        num_classes = 10
        train_data_length = 50000
        eval_data_length = 10000
    elif args.dataset == 'imagenet':
        num_classes = 1000
        train_data_length = 50000
        eval_data_length = 10000

    if args.arch == 'mobilenetv2':
        model_config = {
            'k_bits': kbits_list,
            'num_layers': args.num_layers,
            'pre_k_bits': args.pre_k_bits,
            'ratio': args.ratio,
            'width_mult': args.width_mult
        }
    else:
        model_config = {
            'k_bits': kbits_list,
            'num_layers': args.num_layers,
            'pre_k_bits': args.pre_k_bits,
            'ratio': args.ratio
        }

    if 'vgg' == args.arch and args.batchnorm:
        model, model_k_bits = import_module(
            f"models.{args.dataset}.{args.archtype}.{args.arch}"
        ).__dict__[f'{args.arch}{args.num_layers}_bn'](model_config)
    elif 'resnet20' == args.arch:
        model, model_k_bits = import_module(
            f"models.{args.dataset}.{args.archtype}.{args.arch}"
        ).__dict__[f'{args.arch}'](model_config)
    else:
        model, model_k_bits = import_module(
            f"models.{args.dataset}.{args.archtype}.{args.arch}"
        ).__dict__[f'{args.arch}{args.num_layers}'](model_config)

    model = model.to(device)

    print_logger.info(f'model_k_bits_list {model_k_bits}')

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()

    # Optionally resume from a checkpoint
    resume = args.resume

    if resume:
        print('=> Loading checkpoint {}'.format(resume))
        checkpoint = torch.load(resume, map_location=device)
        state_dict = checkpoint['state_dict']
        start_epoch = checkpoint['epoch']
        pre_train_best_prec1 = checkpoint['best_prec1']
        model_check = load_check(state_dict, model)
        model.load_state_dict(model_check)
        print('Prec@1:', pre_train_best_prec1)
    else:
        checkpoint = model.state_dict()

    choose_model,k_bits = architecture_search(args=args,nn_model=model,device = device,checkpoint=checkpoint, \
                            step=args.step,criterion=criterion,train_data=train_data,train_batch_size=args.train_batch_size, \
                            eval_batch_size=args.eval_batch_size,train_data_length = train_data_length, \
                            eval_data_length = eval_data_length,clip_value=args.grad_clip,lam=args.lam,\
                            gpu_id = 0,print_logger = print_logger,ckpt = ckpt,log_file=log_file)
Esempio n. 31
0
def train(
    *,
    flow_constructor,
    logdir,
    lr_schedule,
    dropout_p,
    seed,
    init_bs,
    total_bs,
    val_total_bs,
    ema_decay,
    steps_per_log,
    epochs_per_val,
    max_grad_norm,
    dtype=tf.float32,
    scale_loss=None,
    restore_checkpoint=None,
    scale_grad=None,
    dataset='cifar10',
    steps_per_samples=2000,
):
    hvd, MPI, is_root, mpi_average = setup_horovod()

    # Seeding and logging setup
    seed_all(hvd.rank() + hvd.size() * seed)
    assert total_bs % hvd.size() == 0
    assert val_total_bs % hvd.size() == 0
    local_bs = total_bs // hvd.size()
    val_local_bs = val_total_bs // hvd.size()

    # Setting up the logger
    logger = None
    logdir = '{}_mpi{}_{}'.format(os.path.expanduser(logdir), hvd.size(),
                                  time.time())
    checkpointdir = os.path.join(logdir, 'checkpoints')
    if is_root:
        print('Floating point format:', dtype)
        pprint(locals())
        os.makedirs(logdir)
        os.makedirs(checkpointdir)
        logger = TensorBoardOutput(logdir)

    # Load data
    if is_root:
        # Load once on root first to prevent downloading conflicts
        print('Loading data')
        load_data(dataset=dataset, dtype=dtype.as_numpy_dtype)

    MPI.COMM_WORLD.Barrier()

    data_train, data_val = load_data(dataset=dataset,
                                     dtype=dtype.as_numpy_dtype)
    img_shp = list(data_train.shape[1:])
    H, W, Cx = img_shp
    bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp))
    if is_root:
        print('Training data: {}, Validation data: {}'.format(
            data_train.shape[0], data_val.shape[0]))
        print('Image shape:', img_shp)

    # Build graph
    if is_root: print('Building graph')
    dequant_flow, flow, posterior_flow = flow_constructor()

    # Data-dependent init
    if restore_checkpoint is None:
        if is_root: print('===== Init graph =====')
        x_init_sym = tf.placeholder(dtype, [init_bs] + img_shp)
        init_loss_sym, _ = build_forward(x=x_init_sym,
                                         dequant_flow=dequant_flow,
                                         flow=flow,
                                         posterior_flow=posterior_flow,
                                         flow_kwargs=dict(vcfg=VarConfig(
                                             init=True, ema=None, dtype=dtype),
                                                          dropout_p=dropout_p,
                                                          verbose=is_root))
        flops = int(get_flops()) / (10**9)
    # Training
    if is_root: print('===== Training graph =====')
    x_sym = tf.placeholder(dtype, [local_bs] + img_shp)
    loss_sym, _ = build_forward(x=x_sym,
                                dequant_flow=dequant_flow,
                                flow=flow,
                                posterior_flow=posterior_flow,
                                flow_kwargs=dict(vcfg=VarConfig(init=False,
                                                                ema=None,
                                                                dtype=dtype),
                                                 dropout_p=dropout_p,
                                                 verbose=is_root))

    # EMA
    params = tf.trainable_variables()
    if is_root: print_params()
    ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
    maintain_averages_op = tf.group(ema.apply(params))
    # Op for setting the ema params to the current non-ema params (for use after data-dependent init)
    name2var = {v.name: v for v in tf.global_variables()}
    copy_params_to_ema = tf.group([
        name2var[p.name.replace(':0', '') +
                 '/ExponentialMovingAverage:0'].assign(p) for p in params
    ])

    val_x_sym = tf.placeholder(dtype, [val_local_bs] + img_shp)
    # Validation and sampling (with EMA)
    if is_root: print('===== Validation graph =====')
    val_flow_kwargs = dict(vcfg=VarConfig(init=False, ema=ema, dtype=dtype),
                           dropout_p=0.,
                           verbose=is_root)
    val_loss_sym, _ = build_forward(x=val_x_sym,
                                    dequant_flow=dequant_flow,
                                    flow=flow,
                                    posterior_flow=posterior_flow,
                                    flow_kwargs=val_flow_kwargs)
    # for debugging invertibility
    # val_inverr_sym = tf.reduce_max(tf.abs(dequant_x - flow.inverse(y, train_flow_kwargs)[0][:,:,:,:img_shp[-1]]))

    if is_root: print('===== Sampling graph =====')
    sample_flow_kwargs = dict(vcfg=VarConfig(init=False, ema=ema, dtype=dtype),
                              dropout_p=0,
                              verbose=is_root)
    samples_sym, _ = flow.sample(val_local_bs, sample_flow_kwargs)
    allgathered_samples_x_sym = hvd.allgather(tf.to_float(samples_sym))

    assert len(tf.trainable_variables()) == len(params)

    def run_validation(sess, i_step):
        data_val_shard = np.array_split(data_val, hvd.size(),
                                        axis=0)[hvd.rank()]
        shard_losses = np.concatenate([
            sess.run([val_loss_sym], {val_x_sym: val_batch})
            for val_batch, in iterbatches([data_val_shard],
                                          batch_size=val_local_bs,
                                          include_final_partial_batch=False)
        ])
        val_loss, total_count = mpi_average(shard_losses)
        samples = sess.run(allgathered_samples_x_sym)
        if is_root:
            logger.writekvs(
                [('val_bpd', bpd_scale_factor * val_loss),
                 ('num_val_examples', total_count * val_local_bs),
                 ('samples',
                  tile_imgs(np.clip(samples, 0, 255).astype(np.uint8)))],
                i_step)

    if is_root: print('===== Optimization graph =====')
    # Optimization
    lr_sym = tf.placeholder(dtype, [], 'lr')
    optimizer = hvd.DistributedOptimizer(tf.train.AdamOptimizer(lr_sym))

    if scale_loss is None:
        grads_and_vars = optimizer.compute_gradients(loss_sym, var_list=params)
    else:
        grads_and_vars = [(g / scale_loss, v)
                          for (g, v) in optimizer.compute_gradients(
                              loss_sym * scale_loss, var_list=params)]

    if scale_grad is not None:
        grads_and_vars = [(g / scale_grad, v) for (g, v) in grads_and_vars]
    if max_grad_norm is not None:
        clipped_grads, grad_norm_sym = tf.clip_by_global_norm(
            [g for (g, _) in grads_and_vars], max_grad_norm)
        grads_and_vars = [
            (cg, v) for (cg, (_, v)) in zip(clipped_grads, grads_and_vars)
        ]
    else:
        grad_norm_sym = tf.constant(0.)
    opt_sym = tf.group(optimizer.apply_gradients(grads_and_vars),
                       maintain_averages_op)

    def loop(sess: tf.Session):
        i_step = 0

        if is_root: print('Initializing')
        sess.run(tf.global_variables_initializer())
        if restore_checkpoint is not None:
            # Restore from checkpoint
            if is_root:
                saver = tf.train.Saver()
                print('Restoring checkpoint:', restore_checkpoint)
                restore_step = int(restore_checkpoint.split('-')[-1])
                print('Restoring from step:', restore_step)
                saver.restore(sess, restore_checkpoint)
                i_step = restore_step
            else:
                saver = None
        else:
            # No checkpoint: perform data dependent initialization
            if is_root: print('Data dependent init')
            init_loss = sess.run(
                init_loss_sym, {
                    x_init_sym:
                    data_train[np.random.randint(0, data_train.shape[0],
                                                 init_bs)]
                })
            if is_root: print('Init loss:', init_loss * bpd_scale_factor)
            sess.run(copy_params_to_ema)
            saver = tf.train.Saver() if is_root else None
        if is_root: print('Broadcasting initial parameters')
        sess.run(hvd.broadcast_global_variables(0))
        sess.graph.finalize()

        if is_root:
            print('Training')
            print(f'Total GFLOPS: {flops}')
            print_params()

        loss_hist = deque(maxlen=steps_per_log)
        gnorm_hist = deque(maxlen=steps_per_log)
        for i_epoch in range(99999999999):
            if i_epoch % epochs_per_val == 0:
                run_validation(sess, i_step=i_step)
                if saver is not None:
                    saver.save(sess,
                               os.path.join(checkpointdir, 'model'),
                               global_step=i_step)

            epoch_start_t = time.time()
            for i_epoch_step, (batch, ) in enumerate(
                    iterbatches(  # non-sharded: each gpu goes through the whole dataset
                        [data_train],
                        batch_size=local_bs,
                        include_final_partial_batch=False,
                    )):

                lr = lr_schedule(i_step)
                loss, gnorm, _ = sess.run([loss_sym, grad_norm_sym, opt_sym], {
                    x_sym: batch,
                    lr_sym: lr
                })
                loss_hist.append(loss)
                gnorm_hist.append(gnorm)

                # Skip timing the very first step, which will be unusually slow due to TF initialization
                if i_epoch == i_epoch_step == 0:
                    epoch_start_t = time.time()

                if i_step % steps_per_log == 0:
                    loss_hist_means = MPI.COMM_WORLD.gather(float(
                        np.mean(loss_hist)),
                                                            root=0)
                    gnorm_hist_means = MPI.COMM_WORLD.gather(float(
                        np.mean(gnorm_hist)),
                                                             root=0)
                    steps_per_sec = (i_epoch_step + 1) / (time.time() -
                                                          epoch_start_t)

                    if is_root:
                        kvs = [
                            ('iter', i_step),
                            ('epoch', i_epoch + i_epoch_step * local_bs /
                             data_train.shape[0]),  # epoch for this gpu
                            ('bpd',
                             float(
                                 np.mean(loss_hist_means) * bpd_scale_factor)),
                            ('gnorm', float(np.mean(gnorm_hist_means))),
                            ('lr', float(lr)),
                            # ('fps', steps_per_sec * total_bs),  # fps calculated over all gpus (this epoch)
                            ('sps', steps_per_sec),
                        ]
                        logger.writekvs(kvs, i_step)

                i_step += 1
            # End of epoch

    # Train
    config = tf.ConfigProto()
    # config.log_device_placement = True
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(
        hvd.local_rank())  # Pin GPU to local rank (one GPU per process)
    if is_root: print('===== Creating session =====')
    with tf.Session(config=config) as sess:
        loop(sess)
Esempio n. 32
0
def main():

    parser = argparse.ArgumentParser(description="==========[RNN]==========")
    parser.add_argument("--mode",
                        default="train",
                        help="available modes: train, test, eval")
    parser.add_argument("--model",
                        default="rnn",
                        help="available models: rnn, lstm")
    parser.add_argument("--dataset",
                        default="all",
                        help="available datasets: all, MA, MI, TN")
    parser.add_argument("--rnn_layers",
                        default=3,
                        type=int,
                        help="number of stacked rnn layers")
    parser.add_argument("--hidden_dim",
                        default=16,
                        type=int,
                        help="number of hidden dimensions")
    parser.add_argument("--lin_layers",
                        default=1,
                        type=int,
                        help="number of linear layers before output")
    parser.add_argument("--epochs",
                        default=100,
                        type=int,
                        help="number of max training epochs")
    parser.add_argument("--dropout",
                        default=0.0,
                        type=float,
                        help="dropout probability")
    parser.add_argument("--learning_rate",
                        default=0.01,
                        type=float,
                        help="learning rate")
    parser.add_argument("--verbose",
                        default=2,
                        type=int,
                        help="how much training output?")

    options = parser.parse_args()
    verbose = options.verbose

    if torch.cuda.is_available():
        device = torch.device("cuda")
        if verbose > 0:
            print("GPU available, using cuda...")
            print()
    else:
        device = torch.device("cpu")
        if verbose > 0:
            print("No available GPU, using CPU...")
            print()

    params = {
        "MODE": options.mode,
        "MODEL": options.model,
        "DATASET": options.dataset,
        "RNN_LAYERS": options.rnn_layers,
        "HIDDEN_DIM": options.hidden_dim,
        "LIN_LAYERS": options.lin_layers,
        "EPOCHS": options.epochs,
        "DROPOUT_PROB": options.dropout,
        "LEARNING_RATE": options.learning_rate,
        "DEVICE": device,
        "OUTPUT_SIZE": 1
    }

    params["PATH"] = "models/" + params["MODEL"] + "_" + params[
        "DATASET"] + "_" + str(params["RNN_LAYERS"]) + "_" + str(
            params["HIDDEN_DIM"]) + "_" + str(
                params["LIN_LAYERS"]) + "_" + str(
                    params["LEARNING_RATE"]) + "_" + str(
                        params["DROPOUT_PROB"]) + "_" + str(
                            params["EPOCHS"]) + "_model.pt"

    #if options.mode == "train":
    #    print("training placeholder...")

    train_data = utils.DistrictData(params["DATASET"], "train")
    val_data = utils.DistrictData(params["DATASET"], "val")

    params["INPUT_SIZE"] = train_data[0]['sequence'].size()[1]

    if params["MODEL"] == "rnn":
        model = RNN(params)
    elif params["MODEL"] == "lstm":
        model = LSTM(params)
    model.to(params["DEVICE"])
    criterion = nn.MSELoss(reduction='sum')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params["LEARNING_RATE"])

    if verbose == 0:
        print(params["PATH"])
    else:
        utils.print_params(params)
        print("Beginning training...")
        print()
    since = time.time()
    best_val_loss = 10.0

    for e in range(params["EPOCHS"]):

        running_loss = 0.0
        #model.zero_grad()
        model.train()
        train_loader = DataLoader(train_data,
                                  batch_size=32,
                                  shuffle=True,
                                  num_workers=4)

        for batch in train_loader:
            x = batch['sequence'].to(device)
            y = batch['target'].to(device)
            seq_len = batch['size'].to(device)

            optimizer.zero_grad()
            y_hat, hidden = model(x, seq_len)
            loss = criterion(y_hat, y)

            running_loss += loss

            loss.backward()
            optimizer.step()

        mean_loss = running_loss / len(train_data)
        val_loss = evaluate(val_data,
                            model,
                            params,
                            criterion,
                            validation=True)

        if verbose == 2 or (verbose == 1 and (e + 1) % 100 == 0):
            print('=' * 25 + ' EPOCH {}/{} '.format(e + 1, params["EPOCHS"]) +
                  '=' * 25)
            print('Training Loss: {}'.format(mean_loss))
            print('Validation Loss: {}'.format(val_loss))
            print()

        if e > params["EPOCHS"] / 3:
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = model.state_dict()
                torch.save(best_model, params["PATH"])

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Final Training Loss: {:4f}'.format(mean_loss))
    print('Best Validation Loss: {:4f}'.format(best_val_loss))

    test_data = utils.DistrictData(params["DATASET"], "test")
    test_loss = evaluate(test_data, model, params, criterion)
    print('Test Loss: {}'.format(test_loss))
    print()
def main():
    parser = get_parser()
    try:
        args = parser.parse_args()
    except:
        print("Unable to get parser, exiting now...")
        sys.exit(0)

    ############################################################################
    #### Sample output directory and file name, tested locally.
    # out_dir="/Users/gil/Google Drive/repos/quantum_state_diffusion/num_json_specifications"
    # json_file_name="tmp_file.json"
    # json_file_dir=os.path.join(out_dir, json_file_name)

    # make_one_system_example(json_file_dir)
    # make_two_system_example(json_file_dir)
    ############################################################################

    ############################################################################
    #### Set up commands from parser
    #### Sample call from command line
    # python /scratch/users/tabakg/qsd_dev/generate_num_model.py --output_dir '/scratch/users/tabakg/qsd_output/json_spec/' --Nfock_a 30 \
    # --seed 1 --regime 'kerr_bistableA21.75' --num_systems 2 --delta_t 1e-05 --duration 0.2 --downsample 100 \
    # --sdeint_method_name 'itoImplicitEuler' --R 1.0 --eps 1.0 --noise_amp 1.0 --lambda 0.999
    ############################################################################

    params = dict()
    ntraj = params['Ntraj'] = args.ntraj
    seed = params['seed'] = args.seed
    duration = params['duration'] = args.duration
    delta_t = params['delta_t'] = args.delta_t
    Nfock_a = params['Nfock_a'] = args.Nfock_a
    Nfock_j = params['Nfock_j'] = args.Nfock_j
    downsample = params['downsample'] = args.downsample
    Regime = params['regime'] = args.regime
    num_systems = params['num_systems'] = args.num_systems
    drive_second_system = params[
        'drive_second_system'] = args.drive_second_system

    if args.sdeint_method_name == "":
        logging.info(
            "sdeint_method_name not set. Using itoEuler as a default.")
        sdeint_method_name = params['sdeint_method_name'] = "itoEuler"
    else:
        sdeint_method_name = params[
            'sdeint_method_name'] = args.sdeint_method_name

    R = params['R'] = args.R
    eps = params['eps'] = args.eps
    noise_amp = params['noise_amp'] = args.noise_amp
    lambd = params['lambd'] = args.lambd
    trans_phase = params['trans_phase'] = args.trans_phase

    # Does the user want to print verbose output?
    quiet = args.quiet

    if not quiet:
        print_params(params=params)

    #### output directory and file name, generated from inputs

    params_args = (Regime, seed, ntraj, delta_t, Nfock_a, Nfock_j, duration,
                   downsample, sdeint_method_name, num_systems, R, eps,
                   noise_amp, lambd, trans_phase, drive_second_system)

    param_str = make_params_string(params_args)

    json_file_name = "json_spec_" + param_str + ".json"

    json_file_dir = os.path.join(args.output_dir, json_file_name)
    print("output file location is ", json_file_dir)

    tspan = np.arange(0, duration, delta_t)

    if num_systems == 1:

        if Regime == "absorptive_bistable":
            logging.info("Regime is set to %s", Regime)
            H, psi0, Ls, obsq_data, obs_names = make_system_JC(
                Nfock_a, Nfock_j)
        elif Regime == "kerr_bistable":
            logging.info("Regime is set to %s", Regime)
            H, psi0, Ls, obsq_data, obs_names = make_system_kerr_bistable(
                Nfock_a)
        elif Regime[:len(
                "kerr_bistable"
        )] == "kerr_bistable":  ##inputs in this case are e.g. kerr_bistableA33.25_...
            which_kerr = Regime[len(
                "kerr_bistable")]  ## e.g. A in kerr_bistableA33.25_
            custom_drive = float(Regime[len("kerr_bistableA"):]
                                 )  ## e.g. 33.25 in kerr_bistableA33.25
            logging.info("Regime is set to %s, with custom drive %s" %
                         (Regime, custom_drive))
            H, psi0, Ls, obsq_data, obs_names = make_system_kerr_bistable_regime_chose_drive(
                Nfock_a, which_kerr, custom_drive)
        elif Regime == "kerr_qubit":
            logging.info("Regime is set to %s", Regime)
            H, psi0, Ls, obsq_data, obs_names = make_system_kerr_qubit(Nfock_a)
        else:
            logging.error("Unknown regime, %s, or not implemented yet.",
                          Regime)
            raise ValueError("Unknown regime, or not implemented yet.")

        gen_num_system(json_file_dir,
                       H,
                       psi0,
                       duration,
                       delta_t,
                       Ls,
                       sdeint_method_name,
                       obsq=obsq_data,
                       downsample=downsample,
                       ntraj=ntraj,
                       seed=seed)

    elif num_systems == 2:

        if Regime == "absorptive_bistable":
            logging.info("Regime is set to %s", Regime)
            H1, H2, psi0, L1s, L2s, obsq_data, obs_names = make_system_JC_two_systems(
                Nfock_a, Nfock_j, drive_second_system)
        elif Regime == "kerr_bistable":
            logging.info("Regime is set to %s", Regime)
            H1, H2, psi0, L1s, L2s, obsq_data, obs_names = make_system_kerr_bistable_two_systems(
                Nfock_a, drive_second_system)
        elif Regime == "kerr_qubit":
            logging.info("Regime is set to %s", Regime)
            H1, H2, psi0, L1s, L2s, obsq_data, obs_names = make_system_kerr_qubit_two_systems(
                Nfock_a, drive_second_system)
        elif Regime[:len("empty_then_kerr"
                         )] == 'empty_then_kerr':  ##e.g. empty_then_kerrA33.25
            which_kerr = Regime[len(
                "empty_then_kerr")]  ## e.g. A in empty_then_kerrA33.25_
            custom_drive = float(Regime[len("empty_then_kerrA"):]
                                 )  ## e.g. 33.25 in empty_then_kerrA33.25
            logging.info("Regime is set to %s, with custom drive %s" %
                         (Regime, custom_drive))
            H1, H2, psi0, L1s, L2s, obsq_data, obs_names = make_system_empty_then_kerr(
                Nfock_a, which_kerr, custom_drive)
        elif Regime[:len(
                "kerr_bistable"
        )] == "kerr_bistable":  ##inputs in this case are e.g. kerr_bistableA33.25_...
            which_kerr = Regime[len(
                "kerr_bistable")]  ## e.g. A in kerr_bistableA33.25_
            custom_drive = float(Regime[len("kerr_bistableA"):]
                                 )  ## e.g. 33.25 in kerr_bistableA33.25
            logging.info("Regime is set to %s, with custom drive %s" %
                         (Regime, custom_drive))
            H1, H2, psi0, L1s, L2s, obsq_data, obs_names = make_system_kerr_bistable_regime_chose_drive_two_systems(
                Nfock_a, which_kerr, custom_drive)
        else:
            logging.error("Unknown regime, %s, or not implemented yet.",
                          Regime)
            raise ValueError("Unknown regime, or not implemented yet.")

        gen_num_system_two_systems(json_file_dir,
                                   H1,
                                   H2,
                                   psi0,
                                   duration,
                                   delta_t,
                                   L1s,
                                   L2s,
                                   R,
                                   eps,
                                   noise_amp,
                                   lambd,
                                   sdeint_method_name,
                                   trans_phase=None,
                                   obsq=obsq_data,
                                   downsample=downsample,
                                   ops_on_whole_space=False,
                                   ntraj=ntraj,
                                   seed=seed)

    else:  ## num_systems not equal to 1 or 2
        logging.error("Unknown num_systems, %s, or not implemented yet.",
                      num_systems)
        raise ValueError("Unknown num_systems, or not implemented yet.")