def compute_generation(model, tokenizer):
    global tf_valid_ds
    global tf_test_ds
    global validation
    global test
    global exp_dir
    global base_dir
    global param_sets

    print("Starting Generate Variants:")
    param_count = 1
    for param_set in param_sets:

        print(f"Generate {param_count}/{len(param_sets)}")
        print(str(param_set))

        #Returns a dictionary for model output for test
        model_ouput = get_model_output(model, tokenizer, param_set, None, None,
                                       tf_test_ds)

        #Test Out
        test_out = model_ouput['test']['output']
        ts_val = time.strftime("%Y%m%d_%H%M")
        print(ts_val)
        write_model_output(test, "test", ts_val, test_out, write_path=exp_dir)

        # Let's Use E2E Evaluation Metrics
        scores = compute_metrics(exp_dir, base_dir, ts_val, 'test', param_set)

        print(scores)
        print()
        save_metrics(exp_dir, ts_val, scores)

        param_count += 1
def compute_generation(model, tokenizer):
    global tf_valid_ds
    global validation
    global exp_dir
    global base_dir
    global param_sets

    print("Starting Generate Variants:")
    param_count = 1
    for param_set in param_sets:

        #         ### IF SOMETHING BREAKS PICKUP WHERE WE LEFT OFF
        #         if param_count < PICK_THE_NUM:
        #             print(f'Skipping: Model#: {model_count} Param#: {param_count}')
        #             param_count +=1
        #             continue
        #         ###

        print(f"Generate {param_count}/{len(param_sets)}")
        print(str(param_set))

        #Returns a list of all the model generated outputs
        model_ouput = get_model_output(model, tokenizer, param_set, None,
                                       tf_valid_ds, None)

        v_out = model_ouput['validation']['output']
        ts_val = time.strftime("%Y%m%d_%H%M")
        print(ts_val)
        write_model_output(validation,
                           "validation",
                           ts_val,
                           v_out,
                           write_path=exp_dir)

        # Let's Use E2E Evaluation Metrics
        scores = compute_metrics(exp_dir, base_dir, ts_val, 'validation',
                                 param_set)

        print(scores)
        print()
        save_metrics(exp_dir, ts_val, scores)
        param_count += 1
Ejemplo n.º 3
0
# <hr>

# ### Generate Results + Metrics

# +
gen_params = {
    'num_beams': 1,
    'max_length': 60,
    'min_length': 20,
    'early_stopping': True,
    'do_sample': False,
    'no_repeat_ngram_size': 2
}

#Returns a list of all the model generated outputs
model_ouput = get_model_output(model, tokenizer, {}, None, tf_valid_ds, None)
# -
#Write model outputs
v_out = model_ouput['validation']['output']
ts_val = time.strftime("%Y%m%d_%H%M")
print(ts_val)
write_model_output(valid_ds, "validation", ts_val, v_out, write_path=exp_dir)

# Let's Use E2E Evaluation Metrics
scores = compute_metrics(exp_dir, base_dir, ts_val, 'validation', gen_params)
print(scores)

print(scores)
save_metrics(exp_dir, ts_val, scores)

# #### If we like the scores and want to save the scores to our model track
Ejemplo n.º 4
0
# length_penalty (float, optional, defaults to 1.0) – Exponential penalty to the length. 1.0 means no penalty.
# Set to values < 1.0 in order to encourage the model to generate shorter sequences, to a value > 1.0 in order to encourage the model to produce longer sequences.

# no_repeat_ngram_size (int, optional, defaults to 0) – If set to int > 0, all ngrams of that size can only occur once.

# num_return_sequences (int, optional, defaults to 1) – The number of independently computed returned sequences for each element in the batch.

# use_cache – (bool, optional, defaults to True): Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding.

# +
#Write model outputs

for param_set in params_array:

    #Returns a list of all the model generated outputs
    model_ouput = get_model_output(model, tokenizer, param_set, None, tf_valid_ds, None)

    v_out = model_ouput['validation']['output']
    ts_val=time.strftime("%Y%m%d_%H%M")
    print(ts_val)
    write_model_output(valid_ds, "validation", ts_val, v_out, write_path=exp_dir)
    
    # Let's Use E2E Evaluation Metrics
    scores = compute_metrics(exp_dir, base_dir, ts_val, 'validation', param_set)

    print(scores)
    
    save_metrics(exp_dir, ts_val, scores)    
# -

Ejemplo n.º 5
0
# top_p (float, optional, defaults to 1.0) – If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.

# repetition_penalty (float, optional, defaults to 1.0) – The parameter for repetition penalty. 1.0 means no penalty. See this paper for more details.

# length_penalty (float, optional, defaults to 1.0) – Exponential penalty to the length. 1.0 means no penalty.
# Set to values < 1.0 in order to encourage the model to generate shorter sequences, to a value > 1.0 in order to encourage the model to produce longer sequences.

# no_repeat_ngram_size (int, optional, defaults to 0) – If set to int > 0, all ngrams of that size can only occur once.

# num_return_sequences (int, optional, defaults to 1) – The number of independently computed returned sequences for each element in the batch.

# use_cache – (bool, optional, defaults to True): Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding.

#Returns a list of all the model generated outputs
model_ouput = get_model_output(model, tokenizer, gen_params, None, tf_valid_ds,
                               None)
# -
#Write model outputs
v_out = model_ouput['validation']['output']
ts_val = time.strftime("%Y%m%d_%H%M")
print(ts_val)
write_model_output(valid_ds, "validation", ts_val, v_out, write_path=exp_dir)

# +
# Let's Use E2E Evaluation Metrics
scores = compute_metrics(exp_dir, base_dir, ts_val, 'validation',
                         v_out['gen_params'])

print(scores)
# -