Exemple #1
0
def load_data(data_size, features):
    data = {}
    if data_size != 263088:
        data['train_x'], data['train_y'] = load_postprocessed_feature_data(features, TRAIN_SET_NAME, sample=data_size)
    else:
        data['train_x'], data['train_y'] = load_postprocessed_feature_data(features, TRAIN_SET_NAME)
    assert data['train_x'].shape[0] == data_size

    data['heldout_x'], data['heldout_y'] = load_postprocessed_feature_data(features, HELDOUT_SET_NAME)
    data['test_x'], data['test_y'] = load_postprocessed_feature_data(features, TEST_SET_NAME)
    assert data['train_x'].shape[1] == data['test_x'].shape[1]
    return data
Exemple #2
0
def load_data(cutoff_value=5):
    data_path = None  # cutoff_value == 5 je defaultni hodnota, cesta bude zjistena automaticky
    if cutoff_value in (0, 3):
        data_path = SETTINGS.get(
            'paths', 'dataFeaturesPennPostprocessed{}'.format(cutoff_value))
    features = get_features_for_set_names(('orig', ))
    data = {}
    data['train_x'], data['train_y'] = load_postprocessed_feature_data(
        features, TRAIN_SET_NAME, data_path=data_path)
    assert data['train_x'].shape[0] == DATA_SIZE

    data['test_x'], data['test_y'] = load_postprocessed_feature_data(
        features, TEST_SET_NAME, data_path=data_path)
    assert data['train_x'].shape[1] == data['test_x'].shape[1]
    return data
def load_data(data_size, cutoff_value):
    data_path = None  # cutoff_value == 5 je defaultni hodnota, cesta bude zjistena automaticky
    if cutoff_value in (0, 3):
        data_path = SETTINGS.get('paths', 'dataFeaturesPennPostprocessed{}'.format(cutoff_value))
    data = {}
    features = get_features_for_set_names(('orig',))
    if data_size != 263088:
        data['train_x'], data['train_y'] = load_postprocessed_feature_data(features, TRAIN_SET_NAME, sample=data_size, data_path=data_path)
    else:
        data['train_x'], data['train_y'] = load_postprocessed_feature_data(features, TRAIN_SET_NAME, data_path=data_path)
    assert data['train_x'].shape[0] == data_size

    data['heldout_x'], data['heldout_y'] = load_postprocessed_feature_data(features, HELDOUT_SET_NAME, data_path=data_path)
    data['test_x'], data['test_y'] = load_postprocessed_feature_data(features, TEST_SET_NAME, data_path=data_path)
    assert data['train_x'].shape[1] == data['test_x'].shape[1] == data['heldout_x'].shape[1]
    return data
Exemple #4
0
    'a_words_after_np', 'a_words_before_head', 'a_words_before_np',
    'b_head_proper', 'b_head_pos_simple', 'b_object_form',
    'b_pos_after_head_as_list', 'b_pos_before_head_as_list',
    'b_pp_object_form', 'b_postmodification_type', 'b_referent',
    'b_words_after_head_as_list', 'b_words_after_np_as_list',
    'b_words_before_head_as_list', 'b_words_before_np_as_list',
    'c_countability_bnc', 'd_head_form_embeddings', 'e_kenlm_ggl_5_lc_nbs'
]
TRAIN_SET_NAME = 'train'
TEST_SET_NAME = 'heldout'
TRAINING_DATA_SIZE = 20000


def softmax(z):
    return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)


def cost(Y, T):
    # Y - predicted matrix
    # T - target matrix
    return -np.multiply(T, np.log(Y)).sum()


if __name__ == '__main__':
    data = {}
    data['train_x'], data['train_y'] = load_postprocessed_feature_data(
        FEATURES, TRAIN_SET_NAME, sample=TRAINING_DATA_SIZE)
    data['test_x'], data['test_y'] = load_postprocessed_feature_data(
        FEATURES, TEST_SET_NAME)
    assert data['train_x'].shape[1] == data['test_x'].shape[1]