Beispiel #1
0
def main(address):
    directories = glob.glob(address)
    for directory in directories:
        if(files_exits(directory)):
            print(directory)
            args = utils.load_model_config(directory)
            lc_path, log_path, experiment = parse_args(directory)
            x = np.load(lc_path)[()]
            experiment = generate_experiment_string(args)
            epoch_times = extract_epoch_time(log_path)
            train_ppls, val_ppls, train_losses, val_losses = [x['train_ppls'], x['val_ppls'], x['train_losses'], x['val_losses'] ]
            plots(train_losses, val_losses, train_ppls, val_ppls, epoch_times, experiment, directory)
Beispiel #2
0
def main():
    """
    Main predict function for the wikikg90m
    """
    args = ArgParser().parse_args()
    config = load_model_config(
        os.path.join(args.model_path, 'model_config.json'))
    args = use_config_replace_args(args, config)
    dataset = get_dataset(args, args.data_path, args.dataset, args.format,
                          args.delimiter, args.data_files,
                          args.has_edge_importance)
    print("Load the dataset done.")
    eval_dataset = EvalDataset(dataset, args)

    model = BaseKEModel(
        args=args,
        n_entities=dataset.n_entities,
        n_relations=dataset.n_relations,
        model_name=args.model_name,
        hidden_size=args.hidden_dim,
        entity_feat_dim=dataset.entity_feat.shape[1],
        relation_feat_dim=dataset.relation_feat.shape[1],
        gamma=args.gamma,
        double_entity_emb=args.double_ent,
        cpu_emb=args.cpu_emb,
        relation_times=args.ote_size,
        scale_type=args.scale_type)

    print("Create the model done.")
    model.entity_feat = dataset.entity_feat
    model.relation_feat = dataset.relation_feat
    load_model_from_checkpoint(model, args.model_path)
    print("The model load the checkpoint done.")

    if args.infer_valid:
        valid_sampler_tail = eval_dataset.create_sampler(
            'valid',
            args.batch_size_eval,
            mode='tail',
            num_workers=args.num_workers,
            rank=0,
            ranks=1)
        infer(args, model, config, 0, [valid_sampler_tail], "valid")

    if args.infer_test:
        test_sampler_tail = eval_dataset.create_sampler(
            'test',
            args.batch_size_eval,
            mode='tail',
            num_workers=args.num_workers,
            rank=i,
            ranks=args.num_proc)
        infer(args, model, config, 0, [test_sampler_tail], "test")
Beispiel #3
0
def summarize_models(address):
    directories = glob.glob(address)
    train_ppls_list, val_ppls_list, train_losses_list, val_losses_list, experiment_list, directory_list, epoch_times_list = [], [], [], [], [], [], []
    for directory in directories:
        if(files_exits(directory)):
            print(directory)
            args = utils.load_model_config(directory)
            lc_path, log_path, experiment = parse_args(directory)
            x = np.load(lc_path)[()]
            experiment = generate_experiment_string(args)
            epoch_times = extract_epoch_time(log_path)
            train_ppls, val_ppls, train_losses, val_losses = [x['train_ppls'], x['val_ppls'], x['train_losses'], x['val_losses'] ]
            train_ppls_list.append(train_ppls)
            val_ppls_list.append(val_ppls)
            train_losses_list.append(train_losses)
            val_losses_list.append(val_losses)
            experiment_list.append(experiment)
            directory_list.append(directory)
            epoch_times_list.append(epoch_times)

    
    summarize_plots(train_losses_list, val_losses_list, train_ppls_list, val_ppls_list, epoch_times_list, experiment_list, directory_list)
parser = argparse.ArgumentParser(
    description='PyTorch Penn Treebank Language Modeling')

parser.add_argument('--saved_models_dir',
                    type=str,
                    help='Directory with saved models \
                         (best_params.pt and exp_config.txt must be present there). \
                         All its\' individual subdirectories will be iterated')

saved_model_dir = parser.parse_args().saved_models_dir
plt.figure()  #(figsize=(12, 12))
dirs = [
    x[0] for x in os.walk(saved_model_dir)
    if x[0] != saved_model_dir and 'TRANSFORMER' not in x[0]
]
for dir_name in dirs:
    args = utils.load_model_config(dir_name)
    x_raw = np.load(os.path.join(dir_name, 'timestep_grads.npy'))
    x = minmax_scale(x_raw)
    plt.plot(x, marker='o', label=args['model'])
    plt.title('{}'.format('Final time-step loss gradient wrt hidden states'))

plt.xlabel("Hidden state (concatenated)")
plt.ylabel("Rescaled gradient norm")
plt.grid()
plt.legend()
for dir in dirs:
    plt.savefig('{}/timestamp_grads_corrected.png'.format(dir))
plt.show()
                training_data.iloc[::downsample_full_train].loc[:,
                                                                feature_cols],
                training_data.iloc[::downsample_full_train][target])
            save_model(model, model_name)
        gc.collect()

    model_config["feature_cols"] = feature_cols
    model_config["targets"] = targets
    model_config["best_pred_col"] = best_pred_col
    model_config["riskiest_features"] = riskiest_features
    print(f"saving model config for {model_config_name}")
    save_model_config(model_config, model_config_name)
else:
    # load model config from previous model selection loop
    print(f"loading model config for {model_config_name}")
    model_config = load_model_config(model_config_name)
    feature_cols = model_config["feature_cols"]
    targets = model_config["targets"]
    best_pred_col = model_config["best_pred_col"]
    riskiest_features = model_config["riskiest_features"]
""" Things that we always do even if we've already trained """
gc.collect()

print("reading tournament_data")
live_data = pd.read_parquet('v4/live.parquet')
print("reading validation_data")
validation_data = pd.read_parquet('v4/validation.parquet')
print("reading example_predictions")
example_preds = pd.read_parquet('v4/live_example_preds.parquet')
print("reading example_validaton_predictions")
validation_example_preds = pd.read_parquet(
                    type=str,
                    help='Directory with saved models \
                         (best_params.pt and exp_config.txt must be present there). \
                         All its\' individual subdirectories will be iterated')

parser.add_argument('--generated_seq_len',
                    type=int,
                    default=35,
                    help='length of generated sequences')

args = parser.parse_args()
output_dir = parser.parse_args().saved_models_dir
seq_len = args.generated_seq_len

# load model configuration
args = load_model_config(output_dir)

# Set the random seed manually for reproducibility.
torch.manual_seed(args.seed)

# LOAD DATA
print('Loading data from ' + args.data)
raw_data = ptb_raw_data(data_path=args.data)
train_data, valid_data, test_data, word_to_id, id_2_word = raw_data
vocab_size = len(word_to_id)
print('  vocabulary size: {}'.format(vocab_size))

if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,