pred_tagged_essays_train = dill.load(f)

test_fname = coref_output_folder + "test_crel_anatagged_essays_most_recent_code.dill"
with open(test_fname, "rb") as f:
    pred_tagged_essays_test = dill.load(f)

# Get Test Data In Order to Get Test CRELS
# load the test essays to make sure we compute metrics over the test CR labels
config = get_config(coref_output_folder)
results_processor = ResultsProcessor(dbname="metrics_causal_model")
########################################################

logger.info("Started at: " + str(datetime.datetime.now()))
logger.info("Number of pred tagged essays %i" % len(pred_tagged_essays_train))  # should be 902

cr_tags = get_cr_tags(train_tagged_essays=pred_tagged_essays_train, tag_essays_test=pred_tagged_essays_test)
cv_folds  = [(pred_tagged_essays_train, pred_tagged_essays_test)]  # type: List[Tuple[Any,Any]]

def evaluate_model(
        collection_prefix: str,
        folds: List[Tuple[Any, Any]],
        extractor_fn_names_lst: List[str],
        cost_function_name: str,
        beta: float,
        ngrams: int,
        stemmed: bool,
        max_epochs: int,
        down_sample_rate=1.0) -> float:

    if down_sample_rate < 1.0:
        new_folds = []  # type: List[Tuple[Any, Any]]
Ejemplo n.º 2
0
# Get Test Data In Order to Get Test CRELS
# load the test essays to make sure we compute metrics over the test CR labels
test_config = get_config(test_folder)
tagged_essays_test = load_process_essays(**test_config)
########################################################

fname = rnn_predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill"
with open(fname, "rb") as f:
    pred_tagged_essays = dill.load(f)

logger.info("Started at: " + str(datetime.datetime.now()))
logger.info("Number of pred tagged essays %i" %
            len(pred_tagged_essays))  # should be 902

cr_tags = get_cr_tags(train_tagged_essays=pred_tagged_essays,
                      tag_essays_test=tagged_essays_test)
cv_folds = cross_validation(pred_tagged_essays,
                            CV_FOLDS)  # type: List[Tuple[Any,Any]]


def evaluate_model(collection_prefix: str,
                   folds: List[Tuple[Any, Any]],
                   extractor_fn_names_lst: List[str],
                   cost_function_name: str,
                   beta: float,
                   ngrams: int,
                   stemmed: bool,
                   max_epochs: int,
                   down_sample_rate=1.0) -> float:

    if down_sample_rate < 1.0: