]
test_classes = [test_data[i].loc[:, intents].values for i in range(n_splits)]

sent_emb_train = [
    infersent.encode(train_requests[model_ind], tokenize=True)
    for model_ind in range(n_splits)
]
sent_emb_test = [
    infersent.encode(test_requests[model_ind], tokenize=True)
    for model_ind in range(n_splits)
]

if FIND_BEST_PARAMS:
    print("___TO FIND APPROPRIATE PARAMETERS____")

    FindBestRecognizer = IntentRecognizer(
        intents, fasttext_embedding_model=fasttext_model, n_splits=n_splits)

    best_mean_f1 = 0.
    best_network_params = dict()
    best_learning_params = dict()
    params_f1 = []

    for p in range(20):
        FindBestRecognizer.gener_network_parameters(
            coef_reg_cnn={
                'range': [0.0001, 0.01],
                'scale': 'log'
            },
            coef_reg_den={
                'range': [0.0001, 0.01],
                'scale': 'log'
Ejemplo n.º 2
0
    train_classes_part = []
    for model_ind in range(n_splits):
        requests = np.hstack(
            (train_requests[model_ind][train_index_parts[model_ind]],
             train_paraphrases[model_ind][train_index_parts[model_ind]]))
        train_requests_part.append(requests)
        classes = np.vstack(
            (train_classes[model_ind][train_index_parts[model_ind]],
             train_classes[model_ind][train_index_parts[model_ind]]))
        train_classes_part.append(classes)

    if FIND_BEST_PARAMS:
        print("___TO FIND APPROPRIATE PARAMETERS____")

        FindBestRecognizer = IntentRecognizer(
            intents,
            fasttext_embedding_model=fasttext_model,
            n_splits=n_splits)

        best_mean_f1 = 0.
        best_network_params = dict()
        best_learning_params = dict()
        params_f1 = []

        for p in range(20):
            FindBestRecognizer.gener_network_parameters(
                coef_reg_cnn={
                    'range': [0.0001, 0.01],
                    'scale': 'log'
                },
                coef_reg_den={
                    'range': [0.0001, 0.01],
        train_index_parts.append(train_part)

    train_requests_part = []
    train_classes_part = []
    for model_ind in range(n_splits):
        requests = np.hstack((train_requests[model_ind][train_index_parts[model_ind]],
                              train_paraphrases[model_ind][train_index_parts[model_ind]]))
        train_requests_part.append(requests)
        classes = np.vstack((train_classes[model_ind][train_index_parts[model_ind]],
                             train_classes[model_ind][train_index_parts[model_ind]]))
        train_classes_part.append(classes)

    f1_mean_scores = []

    for p in range(16):
        AverageRecognizer = IntentRecognizer(intents, fasttext_embedding_model=fasttext_model, n_splits=1)
        AverageRecognizer.init_network_parameters([params[n_size][0]])
        AverageRecognizer.init_learning_parameters([params[n_size][1]])
        AverageRecognizer.init_model(cnn_word_model, text_size, embedding_size, kernel_sizes, add_network_params=None)

        AverageRecognizer.fit_model(train_requests_part, train_classes_part, to_use_kfold=False, verbose=True)

        train_predictions = AverageRecognizer.predict(train_requests_part)
        AverageRecognizer.report(np.vstack([train_classes_part[i] for i in range(1)]),
                                 np.vstack([train_predictions[i] for i in range(1)]),
                                 mode='TRAIN')

        test_predictions = AverageRecognizer.predict(test_requests)

        f1_scores = AverageRecognizer.report(np.vstack([test_classes[i] for i in range(1)]),
                                             np.vstack([test_predictions[i] for i in range(1)]),
Ejemplo n.º 4
0
intents = ['is_fact', 'is_opinion', 'ignore']

train_requests = [
    train_data[i].loc[:, 'sentence'].values for i in range(n_splits)
]
train_classes = [train_data[i].loc[:, intents].values for i in range(n_splits)]
test_requests = [
    test_data[i].loc[:, 'sentence'].values for i in range(n_splits)
]
test_classes = [test_data[i].loc[:, intents].values for i in range(n_splits)]

if FIND_BEST_PARAMS:
    print("___TO FIND APPROPRIATE PARAMETERS____")

    FindBestRecognizer = IntentRecognizer(
        intents, fasttext_embedding_model=fasttext_model, n_splits=n_splits)

    best_mean_f1 = 0.
    best_network_params = dict()
    best_learning_params = dict()
    params_f1 = []

    for p in range(100):
        FindBestRecognizer.gener_network_parameters(
            coef_reg_cnn={
                'range': [0.001, 0.1],
                'scale': 'log'
            },
            coef_reg_den={
                'range': [0.001, 0.1],
                'scale': 'log'
    'SearchCreativeWork', 'SearchScreeningEvent'
]

train_requests = [
    train_data[i].loc[:, 'request'].values for i in range(n_splits)
]
train_classes = [train_data[i].loc[:, intents].values for i in range(n_splits)]
test_requests = [
    test_data[i].loc[:, 'request'].values for i in range(n_splits)
]
test_classes = [test_data[i].loc[:, intents].values for i in range(n_splits)]

if FIND_BEST_PARAMS:
    print("___TO FIND APPROPRIATE PARAMETERS____")

    FindBestRecognizer = IntentRecognizer(
        intents, fasttext_embedding_model=fasttext_model, n_splits=n_splits)

    best_mean_f1 = 0.
    best_network_params = dict()
    best_learning_params = dict()
    params_f1 = []

    for p in range(100):
        FindBestRecognizer.gener_network_parameters(
            coef_reg_cnn={
                'range': [0.0001, 0.01],
                'scale': 'log'
            },
            coef_reg_den={
                'range': [0.0001, 0.01],
                'scale': 'log'
Ejemplo n.º 6
0
intents = ['AddToPlaylist', 'BookRestaurant', 'GetWeather',
           'PlayMusic', 'RateBook', 'SearchCreativeWork',
           'SearchScreeningEvent']
#---------------------------------------
train_requests = [train_data[i].loc[:,'request'].values for i in range(n_splits)]
train_classes = [train_data[i].loc[:,intents].values for i in range(n_splits)]
test_requests = [test_data[i].loc[:, 'request'].values for i in range(n_splits)]
test_classes = [test_data[i].loc[:, intents].values for i in range(n_splits)]
train_ner = [train_data[i].loc[:, 'ner_tag'].values for i in range(n_splits)]
test_ner = [test_data[i].loc[:, 'ner_tag'].values for i in range(n_splits)]

if FIND_BEST_PARAMS:
    print("___TO FIND APPROPRIATE PARAMETERS____")

    FindBestRecognizer = IntentRecognizer(intents, fasttext_embedding_model=fasttext_model, n_splits=n_splits)

    best_mean_f1 = 0.
    best_network_params = dict()
    best_learning_params = dict()
    params_f1 = []

    for p in range(20):
        FindBestRecognizer.gener_network_parameters(coef_reg_cnn_tag={'range': [0.0001,0.01], 'scale': 'log'},
                                                    coef_reg_cnn_emb={'range': [0.0001, 0.01],  'scale': 'log'},
                                                    coef_reg_den={'range': [0.0001,0.01], 'scale': 'log'},
                                                    filters_cnn_emb={'range': [200,300], 'discrete': True},
                                                    filters_cnn_tag={'range': [100,200], 'discrete': True},
                                                    dense_size={'range': [50,150], 'discrete': True},
                                                    dropout_rate={'range': [0.4,0.6]})
        FindBestRecognizer.gener_learning_parameters(batch_size={'range': [16,64], 'discrete': True},