def evaluate_inner(eval_text: List[str],
                   eval_labels: List[int],
                   model_checkpoint: str,
                   model_log_directory: str,
                   batch_size: int,
                   output_prefix: str,
                   gpu_device: Optional[torch.device] = None,
                   disable_tqdm: bool = False) -> None:
    # load model checkpoint
    model_checkpoint_loaded = torch.load(model_checkpoint,
                                         map_location=torch.device("cpu"))

    # log current stage
    LOGGER.info("Loading and pre-compiling regex model")

    # load linear submodule
    linear = Linear(
        len(model_checkpoint_loaded["activating_regex"]),
        model_checkpoint_loaded["linear_state_dict"]["weight"].size(0))
    linear.load_state_dict(model_checkpoint_loaded["linear_state_dict"])

    # create model and load respective parameters
    model = RegexProxyClassifier(model_checkpoint_loaded["pattern_specs"],
                                 model_checkpoint_loaded["activating_regex"],
                                 linear)

    # log model information
    LOGGER.info("Model: %s" % model)

    # send model to correct device
    if gpu_device is not None:
        LOGGER.info("Transferring model to GPU device: %s" % gpu_device)
        model.to(gpu_device)

    # set model on eval mode and disable autograd
    model.eval()
    torch.autograd.set_grad_enabled(False)

    # loop over data in batches
    predicted = []
    for batch in tqdm(chunked(eval_text, batch_size), disable=disable_tqdm):
        predicted.extend(torch.argmax(model.forward(batch), 1).tolist())

    # process classification report
    clf_report = classification_report(eval_labels,
                                       predicted,
                                       output_dict=True)

    # designate filename
    filename = os.path.join(
        model_log_directory,
        os.path.basename(model_checkpoint).replace(".pt", "_") +
        output_prefix + "_classification_report.json")

    # dump json report in model_log_directory
    LOGGER.info("Writing classification report: %s" % filename)
    with open(filename, "w") as output_file_stream:
        json.dump(clf_report, output_file_stream)
mission_model = Linear(in_features=mission_feat_dim, out_features=1)
mission_model.weight.data *= 0.0
mission_model.bias.data *= 0.0
maint_model = Linear(in_features=maint_feat_dim, out_features=1)
maint_model.weight.data *= 0.0
maint_model.bias.data *= 0.0
mission_model.to(device)
maint_model.to(device)

if continue_fname:
    load_path = os.path.join('saved-sessions', continue_fname)
    print(f'Loading saved session from {load_path}...')

    ckpt = torch.load(load_path)
    mission_model.load_state_dict(ckpt['mission_state_dict'])
    maint_model.load_state_dict(ckpt['maint_state_dict'])
    train_losses = ckpt['train_losses']
    val_accs = ckpt['val_accs']
    time_elapsed = ckpt['time_elapsed']

    print('Done!\n')

print('Training regression model...\n')

loss_fun = BCEWithLogitsLoss()
optimizer = LBFGS(chain(mission_model.parameters(), maint_model.parameters()),
                  lr=lr)
# optimizer = Adam(chain(mission_model.parameters(), maint_model.parameters()), lr=lr)

n_seen = 0
Beispiel #3
0
def rank_bunos(bunos, pred_date, model_fp):
# Load the mission and maintenance histories for each BUNO
    features = dict()

    for buno in bunos:
        mission_fp = os.path.join(mission_dir, f'{buno}-mission-feat.csv')
        maint_fp = os.path.join(maint_dir, f'{buno}-maint-feat.csv')
        cum_fh_fp = os.path.join(cum_fh_dir, f'{buno}-cum-fh.csv')

        mission_df = pd.read_csv(mission_fp)
        maint_df = pd.read_csv(maint_fp)
        cum_fh_df = pd.read_csv(cum_fh_fp)
        mission_df['LaunchDate'] = pd.to_datetime(mission_df['LaunchDate'])
        maint_df['Date'] = pd.to_datetime(maint_df['Date'])
        cum_fh_df['Date'] = pd.to_datetime(cum_fh_df['Date'])

        # Get all of the mission and maintenance that happened before the prediction
        # date
        mission_hist = mission_df[mission_df['LaunchDate'] < pred_date]
        maint_hist = maint_df[maint_df['Date'] < pred_date]

        # Get the timedelta feature
        # current_fh = cum_fh_df[cum_fh_df['Date'] == pred_date].iloc[0]['Cum FH']
        # mission_hist['FH Delta'] = current_fh - mission_hist['Cum FH']
        # maint_hist['FH Delta'] = current_fh - maint_hist['Cum FH']
        mission_hist = mission_hist.fillna(0)
        maint_hist = maint_hist.fillna(0)

        # Make a list of the features we want to keep
        mission_features = [col for col in mission_hist.columns if col not \
                            in mission_exclude_cols]
        maint_features = [col for col in maint_hist.columns if col not \
                          in maint_exclude_cols]

        x_mission = np.asarray(mission_hist[mission_features])
        x_maint = np.asarray(maint_hist[maint_features])
        x_mission, x_maint = torch.tensor(x_mission).float(), torch.tensor(x_maint).float()

        features[buno] = (x_mission, x_maint)

        mission_dim = x_mission.shape[1]
        maint_dim = x_maint.shape[1]

    # Load the model
    mission_model = Linear(mission_dim, 1)
    maint_model = Linear(maint_dim, 1)

    ckpt = torch.load(model_fp, map_location='cpu')
    mission_model.load_state_dict(ckpt['mission_state_dict'])
    maint_model.load_state_dict(ckpt['maint_state_dict'])

    # Predict breakage probabilities for each BUNO
    mission_model.eval()
    maint_model.eval()
    breakage_probs = dict()

    with torch.no_grad():
        for buno in bunos:
            x_mission, x_maint = features[buno]

            mission_logits = mission_model(x_mission)
            maint_logits = maint_model(x_maint)

            logit = torch.sum(mission_logits) + torch.sum(maint_logits)
            pred_prob = sigmoid(logit)

            breakage_probs[buno] = pred_prob.detach().numpy()

    # Now rank the BUNOs
    rank = lambda x: 1 - x[1]
    ranked_bunos = sorted(breakage_probs.items(), key=rank)

    return ranked_bunos
Beispiel #4
0
def main():
    cmd_args = add_argument()

    path_to_file_tr = cmd_args.path_to_file_tr
    path_to_file_ts = cmd_args.path_to_file_ts

    min_len_mol = cmd_args.min_len_mol
    max_len_mol = cmd_args.max_len_mol

    num_examples_tr = cmd_args.num_examples_tr
    num_examples_ts = cmd_args.num_examples_ts

    train_batch_size = json.load(open(cmd_args.ds_conf))['train_batch_size']
    gradient_accumulation_steps = json.load(open(
        cmd_args.ds_conf))['gradient_accumulation_steps']

    deepspeed_optimizer = True if json.load(open(cmd_args.ds_conf)).get(
        'optimizer', None) is not None else False

    epochs = cmd_args.epochs
    emb_dim = cmd_args.emb_dim
    dim = cmd_args.dim
    bucket_size = cmd_args.bucket_size
    depth = cmd_args.depth
    heads = cmd_args.heads
    n_hashes = cmd_args.n_hashes
    ff_chunks = cmd_args.ff_chunks
    attn_chunks = cmd_args.attn_chunks
    validate_every = cmd_args.validate_every
    save_every = cmd_args.save_every
    output_folder = cmd_args.output_folder

    use_full_attn = cmd_args.use_full_attn
    mrpc_test = cmd_args.mrpc_test
    use_deepspeed = cmd_args.use_deepspeed

    os.makedirs(output_folder, exist_ok=True)

    pickle.dump(cmd_args,
                open(os.sep.join([output_folder, 'training_conf.pkl']), 'wb'))

    MIN_LENGTH_MOL = min_len_mol
    MAX_LENGTH_MOL = max_len_mol  # 2048
    NUM_EXAMPLES_TR = num_examples_tr  # 1024
    NUM_EXAMPLES_TS = num_examples_ts  # 1024
    N_EPOCHS = epochs  # 10
    VALIDATE_EVERY = validate_every
    SAVE_EVERY = save_every

    MOL_SEQ_LEN = MAX_LENGTH_MOL  # output_lang.max_len if (output_lang.max_len % 2) == 0  else output_lang.max_len + 1 # ??

    saved_mol_lang = os.sep.join([output_folder, 'mol_lang.pkl'])

    MAX_LENGTH_MOL = cmd_args.max_len_mol

    saved_target_lang = os.sep.join([output_folder, 'mol_lang.pkl'])

    if mrpc_test:
        mol_lang, tr_samples, ts_samples = readMRPC(
            molecule_file_tr=path_to_file_tr,
            molecule_file_ts=path_to_file_ts,
            saved_molecule_lang=saved_target_lang,
            num_examples_tr=NUM_EXAMPLES_TR,
            num_examples_ts=NUM_EXAMPLES_TS,
            min_len_molecule=MIN_LENGTH_MOL,
            max_len_molecule=MAX_LENGTH_MOL,
            shuffle=True)
    else:
        mol_lang, tr_samples, ts_samples = readMolecules(
            molecule_file_tr=path_to_file_tr,
            molecule_file_ts=path_to_file_ts,
            saved_molecule_lang=saved_target_lang,
            num_examples_tr=NUM_EXAMPLES_TR,
            num_examples_ts=NUM_EXAMPLES_TS,
            min_len_molecule=MIN_LENGTH_MOL,
            max_len_molecule=MAX_LENGTH_MOL,
            shuffle=True)

    pickle.dump(mol_lang, open(saved_mol_lang, 'wb'))

    train_dataset = MolecularSimilarityDataset(
        tr_samples, mol_lang, train_batch_size if device == 'cuda' else 1)
    test_dataset = MolecularSimilarityDataset(
        ts_samples, mol_lang, train_batch_size if device == 'cuda' else 1)

    MAX_SEQ_LEN = MOL_SEQ_LEN * 2
    print('Axial Embedding shape:', compute_axial_position_shape(MAX_SEQ_LEN))
    model = ReformerLM(
        num_tokens=mol_lang.n_words,
        dim=dim,
        bucket_size=bucket_size,
        depth=depth,
        heads=heads,
        n_hashes=n_hashes,
        max_seq_len=MAX_SEQ_LEN,
        ff_chunks=ff_chunks,
        attn_chunks=attn_chunks,
        weight_tie=True,
        weight_tie_embedding=True,
        axial_position_emb=True,
        axial_position_shape=compute_axial_position_shape(MAX_SEQ_LEN),
        axial_position_dims=(dim // 2, dim // 2),
        return_embeddings=True,
        use_full_attn=use_full_attn).to(device)

    linear_regressor = Linear(512, 2).to(device)

    model = TrainingWrapper(model, ignore_index=PAD_IDX,
                            pad_value=PAD_IDX).to(device)

    model_params = filter(lambda p: p.requires_grad, model.parameters())
    linear_params = filter(lambda p: p.requires_grad,
                           linear_regressor.parameters())

    SAVE_DIR = os.sep.join([output_folder, 'saved_model'])
    os.makedirs(SAVE_DIR, exist_ok=True)

    try:
        model_ckp_max = np.max(
            [int(ckp) for ckp in os.listdir(os.sep.join([SAVE_DIR, 'model']))])
    except:
        model_ckp_max = 0

    gpus_mini_batch = (train_batch_size // gradient_accumulation_steps
                       ) // torch.cuda.device_count()
    print('gpus_mini_batch:', gpus_mini_batch,
          'with gradient_accumulation_steps:', gradient_accumulation_steps)
    log_file = open(os.sep.join([output_folder, 'training_log.log']), 'a')
    log_file.write(
        "\n\n\n{}\tStarting new training from chekpoint: EncoderDecoder-{}\n".
        format(datetime.datetime.now(), model_ckp_max))
    log_file.flush()

    if use_deepspeed:
        if deepspeed_optimizer == False:
            print('No DeepSpeed optimizer found. Using RangerLars.')
            model_optimizer = RangerLars(model.parameters())
            linear_optimizer = RangerLars(linear_regressor.parameters())

            model_engine, model_optimizer, trainloader, _ = deepspeed.initialize(
                args=cmd_args,
                model=model,
                optimizer=model_optimizer,
                model_parameters=model_params,
                training_data=train_dataset)

            linear_engine, linear_optimizer, _, _ = deepspeed.initialize(
                args=cmd_args,
                model=linear_regressor,
                optimizer=linear_optimizer,
                model_parameters=linear_params)

        else:
            print('Found optimizer in the DeepSpeed configurations. Using it.')
            model_engine, model_optimizer, trainloader, _ = deepspeed.initialize(
                args=cmd_args,
                model=model,
                model_parameters=model_params,
                training_data=train_dataset)
            linear_engine, linear_optimizer, _, _ = deepspeed.initialize(
                args=cmd_args,
                model=linear_regressor,
                model_parameters=linear_params)

        _, model_client_sd = model_engine.load_checkpoint(
            os.sep.join([SAVE_DIR, 'model']), model_ckp_max)

        testloader = model_engine.deepspeed_io(test_dataset)

        ######TO DO
        for eph in range(epochs):
            print('Starting Epoch: {}'.format(eph))
            for i, pair in enumerate(tqdm(trainloader)):
                tr_step = ((eph * len(trainloader)) + i) + 1

                src = pair[0]
                trg = pair[1]

                pickle.dump(src, open('src.pkl', 'wb'))
                pickle.dump(trg, open('trg.pkl', 'wb'))

                model_engine.train()
                linear_engine.train()
                #enc_dec.train()

                src = src.to(model_engine.local_rank)
                trg = trg.to(linear_engine.local_rank)

                print("Sample:", src)
                print("Target:", trg)
                print("Target Shape:", trg.shape)
                print("len Samples:", len(src))

                ## Need to learn how to use masks correctly
                enc_input_mask = torch.tensor(
                    [[1 if idx != PAD_IDX else 0 for idx in smpl]
                     for smpl in src]).bool().to(model_engine.local_rank)

                # context_mask = torch.tensor([[1 for idx in smpl if idx != PAD_IDX] for smpl in trg]).bool().to(device)
                #################

                enc_keys = model_engine(
                    src, return_loss=False, input_mask=enc_input_mask
                )  #enc_input_mask)#, context_mask=context_mask)
                #loss = enc_dec(src, trg, return_loss = True, enc_input_mask = None)#enc_input_mask)#, context_mask=context_mask)

                print('enc_keys shape', enc_keys.shape)
                #enc_keys_cls = enc_keys[:,0:1,:].to(linear_engine.local_rank)#torch.tensor([s[0] for s in enc_keys]).to(linear_engine.local_rank)
                #print('enc_keys_cls shape', enc_keys_cls.shape)
                preds = torch.softmax(linear_engine(enc_keys),
                                      dim=1).to(linear_engine.local_rank)

                print('preds shape', preds.shape)
                #preds = np.array([r[0] for r in results])
                #print('Pred:', preds.shape)
                loss = F.cross_entropy(preds, trg).to(linear_engine.local_rank)
                loss.backward()

                model_engine.step()
                linear_engine.step()

                print('Training Loss:', loss.item())
                if tr_step % validate_every == 0:
                    val_loss = []
                    for pair in tqdm(
                            testloader
                    ):  #Can't use the testloader or I will mess up with the model assignment and it won't learn during training, need to use normal validation instead of parallel one
                        model_engine.eval()
                        linear_engine.eval()
                        with torch.no_grad():
                            ts_src = pair[0]
                            ts_trg = pair[1]

                            pickle.dump(ts_src, open('ts_src.pkl', 'wb'))
                            pickle.dump(ts_trg, open('ts_trg.pkl', 'wb'))

                            ts_src = ts_src.to(model_engine.local_rank)
                            ts_trg = ts_trg.to(linear_engine.local_rank)

                            #ts_src = torch.tensor(np.array([pair[0].numpy()])).to(device)
                            #ts_trg = torch.tensor(np.array([pair[1].numpy()])).to(device)

                            ## Need to learn how to use masks correctly
                            ts_enc_input_mask = torch.tensor([
                                [1 if idx != PAD_IDX else 0 for idx in smpl]
                                for smpl in ts_src
                            ]).bool().to(model_engine.local_rank)
                            #ts_context_mask = torch.tensor([[1 for idx in smpl if idx != PAD_IDX] for smpl in ts_trg]).bool().to(device)

                            # loss = model_engine(
                            #     ts_src,
                            #     ts_trg,
                            #     return_loss=True,
                            #     enc_input_mask=ts_enc_input_mask
                            # )  #ts_enc_input_mask)#, context_mask=ts_context_mask)
                            # #loss = enc_dec(ts_src, ts_trg, return_loss = True, enc_input_mask = None)

                            ts_enc_keys = model_engine(
                                ts_src,
                                return_loss=False,
                                input_mask=ts_enc_input_mask)
                            ts_pred = torch.softmax(
                                linear_engine(ts_enc_keys),
                                dim=1).to(linear_engine.local_rank)
                            loss = F.cross_entropy(ts_pred, ts_trg).to(
                                linear_engine.local_rank)
                            val_loss.append(loss.item())

                    print(
                        f'\tValidation Loss: AVG: {np.mean(val_loss)}, MEDIAN: {np.median(val_loss)}, STD: {np.std(val_loss)} '
                    )
                    log_file.write(
                        'Step: {}\tTraining Loss:{}\t Validation LOSS: AVG: {}| MEDIAN: {}| STD: {}\n'
                        .format(i, loss.item(), np.mean(val_loss),
                                np.median(val_loss), np.std(val_loss)))
                else:
                    log_file.write('Step: {}\tTraining Loss:{}\n'.format(
                        i, loss.item()))

                log_file.flush()

                if tr_step % save_every == 0:
                    print('\tSaving Checkpoint')
                    model_ckpt_id = str(model_ckp_max + tr_step + 1)
                    model_engine.save_checkpoint(
                        os.sep.join([SAVE_DIR, 'model']), model_ckpt_id)

        log_file.close()
        print('\tSaving Final Checkpoint')
        model_ckpt_id = str(model_ckp_max + tr_step + 1)
        model_engine.save_checkpoint(os.sep.join([SAVE_DIR, 'model']),
                                     model_ckpt_id)
    else:
        #model_optimizer = torch.optim.Adam(model.parameters()) # RangerLars(model.parameters())
        #linear_optimizer = torch.optim.Adam(linear_regressor.parameters())  # RangerLars(linear_regressor.parameters())

        model_optimizer = torch.optim.Adam(
            list(model.parameters()) + list(linear_regressor.parameters())
        )  #RangerLars(list(model.parameters())+list(linear_regressor.parameters())) #

        PATH = os.sep.join(
            [SAVE_DIR, 'model',
             str(model_ckp_max), 'sts_model.pt'])
        if os.path.exists(PATH):
            print('********** Found Checkpoint. Loading:', PATH)
            checkpoint = torch.load(PATH)

            model.load_state_dict(checkpoint['model_state_dict'])
            linear_regressor.load_state_dict(checkpoint['linear_state_dict'])
            model_optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        trainloader = DataLoader(train_dataset,
                                 batch_size=train_batch_size,
                                 shuffle=False)
        testloader = DataLoader(test_dataset,
                                batch_size=train_batch_size,
                                shuffle=False)
        ######TO DO
        train_loss_list = []
        for eph in range(epochs):
            print('Starting Epoch: {}'.format(eph))
            for i, pair in enumerate(tqdm(trainloader)):
                tr_step = ((eph * len(trainloader)) + i) + 1

                src = pair[0]
                trg = pair[1]

                pickle.dump(src, open('src.pkl', 'wb'))
                pickle.dump(trg, open('trg.pkl', 'wb'))

                model.train()
                linear_regressor.train()
                #enc_dec.train()

                src = src.to(device)
                trg = trg.to(device)

                #print("Sample:", src)
                #print("Target:", trg)
                #print("Target Shape:", trg.shape)
                #print("len Samples:", len(src))

                ## Need to learn how to use masks correctly
                enc_input_mask = torch.tensor(
                    [[1 if idx != PAD_IDX else 0 for idx in smpl]
                     for smpl in src]).bool().to(device)

                # context_mask = torch.tensor([[1 for idx in smpl if idx != PAD_IDX] for smpl in trg]).bool().to(device)
                #################

                enc_keys = model(
                    src, return_loss=False, input_mask=enc_input_mask
                )  #enc_input_mask)#, context_mask=context_mask)
                #loss = enc_dec(src, trg, return_loss = True, enc_input_mask = None)#enc_input_mask)#, context_mask=context_mask)

                #print('enc_keys shape', enc_keys.shape)
                enc_keys_cls = enc_keys[:, 0, :].to(
                    device
                )  #torch.tensor([s[0] for s in enc_keys]).to(linear_engine.local_rank)
                #print('enc_keys_cls shape', enc_keys_cls.shape)
                preds = torch.softmax(linear_regressor(enc_keys_cls),
                                      dim=1).to(device)

                #print('preds shape', preds.shape)
                #preds = np.array([r[0] for r in results])
                #print('Pred:', preds.shape)
                loss = F.cross_entropy(preds, trg).to(device)
                loss.backward()

                model_optimizer.step()
                #linear_optimizer.step()

                train_loss_list.append(loss.item())
                #print('Training Loss:', loss.item())
                if tr_step % validate_every == 0:
                    val_loss = []
                    ACC_list = []
                    MCC_list = []
                    for pair in tqdm(
                            testloader
                    ):  #Can't use the testloader or I will mess up with the model assignment and it won't learn during training, need to use normal validation instead of parallel one
                        model.eval()
                        linear_regressor.eval()
                        with torch.no_grad():
                            ts_src = pair[0]
                            ts_trg = pair[1]

                            pickle.dump(ts_src, open('ts_src.pkl', 'wb'))
                            pickle.dump(ts_trg, open('ts_trg.pkl', 'wb'))

                            ts_src = ts_src.to(device)
                            ts_trg = ts_trg.to(device)

                            #ts_src = torch.tensor(np.array([pair[0].numpy()])).to(device)
                            #ts_trg = torch.tensor(np.array([pair[1].numpy()])).to(device)

                            ## Need to learn how to use masks correctly
                            ts_enc_input_mask = torch.tensor(
                                [[1 if idx != PAD_IDX else 0 for idx in smpl]
                                 for smpl in ts_src]).bool().to(device)
                            #ts_context_mask = torch.tensor([[1 for idx in smpl if idx != PAD_IDX] for smpl in ts_trg]).bool().to(device)

                            # loss = model_engine(
                            #     ts_src,
                            #     ts_trg,
                            #     return_loss=True,
                            #     enc_input_mask=ts_enc_input_mask
                            # )  #ts_enc_input_mask)#, context_mask=ts_context_mask)
                            # #loss = enc_dec(ts_src, ts_trg, return_loss = True, enc_input_mask = None)

                            ts_enc_keys = model(ts_src,
                                                return_loss=False,
                                                input_mask=ts_enc_input_mask)
                            ts_enc_keys_cls = ts_enc_keys[:, 0, :].to(device)

                            ts_pred = torch.softmax(
                                linear_regressor(ts_enc_keys_cls),
                                dim=1).to(device)

                            loss = F.cross_entropy(ts_pred, ts_trg).to(device)

                            ACC, MCC = compute_simple_metrics(ts_pred, ts_trg)
                            ACC_list.append(ACC)
                            MCC_list.append(MCC)

                            val_loss.append(loss.item())

                    print(
                        f'\Train Loss: LAST: {train_loss_list[-1]}, AVG: {np.mean(train_loss_list)}, MEDIAN: {np.median(train_loss_list)}, STD: {np.std(train_loss_list)} '
                    )
                    print(
                        f'\tValidation Loss: AVG: {np.mean(val_loss)}, MEDIAN: {np.median(val_loss)}, STD: {np.std(val_loss)} '
                    )
                    print(
                        f'\tValidation ACC: AVG: {np.mean(ACC_list)}, MEDIAN: {np.median(ACC_list)}, STD: {np.std(ACC_list)} '
                    )
                    print(
                        f'\tValidation MCC: AVG: {np.mean(MCC_list)}, MEDIAN: {np.median(MCC_list)}, STD: {np.std(MCC_list)} '
                    )
                    log_file.write(
                        'Step: {}\tTraining Loss:{}\t Validation LOSS: AVG: {}| MEDIAN: {}| STD: {}\n'
                        .format(i, loss.item(), np.mean(val_loss),
                                np.median(val_loss), np.std(val_loss)))
                else:
                    log_file.write('Step: {}\tTraining Loss:{}\n'.format(
                        i, loss.item()))

                log_file.flush()

                if tr_step % save_every == 0:
                    print('\tSaving Checkpoint')
                    model_ckpt_id = str(model_ckp_max + tr_step + 1)
                    #model_engine.save_checkpoint(os.sep.join([SAVE_DIR, 'model']),
                    #                            model_ckpt_id)
                    PATH = os.sep.join([
                        SAVE_DIR, 'model',
                        str(model_ckpt_id), 'sts_model.pt'
                    ])
                    os.makedirs(os.sep.join(PATH.split(os.sep)[:-1]),
                                exist_ok=True)
                    torch.save(
                        {
                            'step': tr_step,
                            'model_state_dict': model.state_dict(),
                            'linear_state_dict': linear_regressor.state_dict(),
                            'optimizer_state_dict':
                            model_optimizer.state_dict(),
                        }, PATH)

        log_file.close()
        print('\tSaving Final Checkpoint')
        model_ckpt_id = str(model_ckp_max + tr_step + 1)
        #model_engine.save_checkpoint(os.sep.join([SAVE_DIR, 'model']),
        #                            model_ckpt_id)
        PATH = os.sep.join(
            [SAVE_DIR, 'model',
             str(model_ckpt_id), 'sts_model.pt'])
        os.makedirs(os.sep.join(PATH.split(os.sep)[:-1]), exist_ok=True)
        torch.save(
            {
                'step': tr_step,
                'model_state_dict': model.state_dict(),
                'linear_state_dict': linear_regressor.state_dict(),
                'optimizer_state_dict': model_optimizer.state_dict(),
            }, PATH)
Beispiel #5
0
def compare_inner(eval_data: List[Tuple[List[int], int]],
                  eval_text: List[str],
                  neural_model: Module,
                  neural_model_checkpoint: str,
                  regex_model_checkpoint: str,
                  model_log_directory: str,
                  batch_size: int,
                  atol: float,
                  output_prefix: str,
                  gpu_device: Optional[torch.device] = None,
                  max_doc_len: Optional[int] = None,
                  disable_tqdm: bool = False) -> None:
    # load neural model checkpoint
    neural_model_checkpoint_loaded = torch.load(
        neural_model_checkpoint, map_location=torch.device("cpu"))
    neural_model.load_state_dict(
        neural_model_checkpoint_loaded["model_state_dict"])  # type: ignore

    # load regex model checkpoint
    LOGGER.info("Loading and pre-compiling regex model")
    regex_model_checkpoint_loaded = torch.load(
        regex_model_checkpoint, map_location=torch.device("cpu"))

    # load linear submodule
    linear = Linear(
        len(regex_model_checkpoint_loaded["activating_regex"]),
        regex_model_checkpoint_loaded["linear_state_dict"]["weight"].size(0))
    linear.load_state_dict(regex_model_checkpoint_loaded["linear_state_dict"])

    # create model and load respective parameters
    regex_model = RegexProxyClassifier(
        regex_model_checkpoint_loaded["pattern_specs"],
        regex_model_checkpoint_loaded["activating_regex"], linear)

    # log model information
    LOGGER.info("Regex model: %s" % regex_model)

    # send models to correct device
    if gpu_device is not None:
        LOGGER.info("Transferring models to GPU device: %s" % gpu_device)
        neural_model.to(gpu_device)
        regex_model.to(gpu_device)

    # set model on eval mode and disable autograd
    neural_model.eval()
    regex_model.eval()
    torch.autograd.set_grad_enabled(False)

    # create results storage
    results_store = {
        "neural_model": neural_model_checkpoint,
        "regex_model": regex_model_checkpoint,
        "comparisons": {}
    }

    # log current state
    LOGGER.info("Looping over data and text using neural and regex models")

    # loop over evaluation data and text
    for eval_batch in tqdm(chunked(list(zip(eval_data, eval_text)),
                                   batch_size),
                           disable=disable_tqdm):
        # separate data and text for processing
        eval_batch_data, eval_batch_text = map(list, zip(*eval_batch))
        eval_batch_labels = [
            label  # type: ignore
            for _, label in eval_batch_data
        ]

        # proceed with neural model processsing
        neural_forward_trace_output = neural_model.forward_with_trace(  # type: ignore
            Batch(
                [doc for doc, _ in eval_batch_data],  # type: ignore
                neural_model.embeddings,  # type: ignore
                to_cuda(gpu_device),
                0.,
                max_doc_len),
            atol)

        # proceed with regex model processing
        regex_forward_trace_output = regex_model.forward_with_trace(
            eval_batch_text)  # type: ignore

        # loop/process outputs and add them to results storage
        for (eval_sample_text, eval_sample_label, back_pointers,
             neural_linear_output, regex_lookup, regex_linear_output) in zip(
                 *((eval_batch_text, ) + (eval_batch_labels, ) +
                   neural_forward_trace_output + regex_forward_trace_output)):
            # assign current key to update results storage
            if results_store["comparisons"] == {}:
                current_key = 0
            else:
                current_key = max(map(
                    int,
                    results_store["comparisons"].keys())) + 1  # type: ignore

            # create local storage which will be updated in loop
            local_store = results_store["comparisons"][  # type: ignore
                current_key] = {}

            # add text related data
            local_store["text"] = eval_sample_text
            local_store["gold_label"] = eval_sample_label

            # add neural model diagnostics
            neural_local_store = local_store["neural_model"] = {}
            neural_local_store["activating_text"] = [
                back_pointer.get_text(eval_sample_text.split())
                if back_pointer.binarized_score else None
                for back_pointer in back_pointers
            ]
            neural_local_store["binaries"] = [
                int(back_pointer.binarized_score)
                for back_pointer in back_pointers
            ]
            neural_local_store["softmax"] = torch.softmax(
                neural_linear_output, 0).tolist()
            neural_local_store["predicted_label"] = torch.argmax(
                neural_linear_output, 0).item()

            # add regex model diagnostics
            regex_local_store = local_store["regex_model"] = {}
            regex_local_store["activating_text"] = [
                regex_match.group(2) if regex_match is not None else None
                for regex_match in regex_lookup
            ]
            regex_local_store["binaries"] = [
                1 if regex_match else 0 for regex_match in regex_lookup
            ]
            regex_local_store["softmax"] = torch.softmax(
                regex_linear_output, 0).tolist()
            regex_local_store["predicted_label"] = torch.argmax(
                regex_linear_output, 0).item()

            # add inter-model diagnostics
            inter_model_store = local_store[
                "inter_model_distance_metrics"] = {}
            inter_model_store["softmax_difference_norm"] = torch.dist(
                torch.FloatTensor(neural_local_store["softmax"]),
                torch.FloatTensor(regex_local_store["softmax"])).item()
            inter_model_store["binary_misalignment_rate"] = sum([
                neural_binary != regex_binary
                for neural_binary, regex_binary in zip(
                    neural_local_store["binaries"],
                    regex_local_store["binaries"])
            ]) / len(neural_local_store["binaries"])

    # designate filename
    filename = os.path.join(
        model_log_directory, "_".join([
            "compare", output_prefix,
            os.path.basename(neural_model_checkpoint).replace(".pt", ""),
            os.path.basename(regex_model_checkpoint).replace(".pt", "")
        ]) + ".json")

    # dump final dictionary in model_log_directory
    LOGGER.info("Writing output file: %s" % filename)
    with open(filename, "w") as output_file_stream:
        json.dump(results_store, output_file_stream)