def compare_classes_amount(reviews_data, rates_data): less_classes_reviews, less_classes_rates = split_by_rates( reviews_data, rates_data) print('two classes:') print( 'f1 score:', train_and_test(less_classes_reviews, less_classes_rates, min_df=default_min_df, max_df=default_max_df, ngram_range=default_n_gram, alpha=default_alpha, clf=classifier_name, max_features=default_max_features)) print('four classes:') print( 'f1 score:', train_and_test(reviews_data, rates_data, min_df=default_min_df, max_df=default_max_df, ngram_range=default_n_gram, alpha=default_alpha, clf=classifier_name, max_features=default_max_features))
def learn_model(): test_data_set = get_representation_data(AVAILABLE_DATA_DIRECTORIES[3]) prediction_data_set = get_representation_data( AVAILABLE_DATA_DIRECTORIES[3]) train_reviews = get_reviews_from_data_set(test_data_set) train_ratings = get_ratings_from_data_set(test_data_set) prediction_reviews = get_reviews_from_data_set(prediction_data_set) prediction_ratings = get_ratings_from_data_set(prediction_data_set) # optimize_parameters(train_reviews, train_ratings) train_and_test(train_reviews, train_ratings, prediction_reviews, prediction_ratings)
def test_classifiers(reviews_data, rates_data): # MultinomialNB print(classifier_name) print( f'\nmin_df: {default_min_df} | max_df: {default_max_df} | ngram: {default_n_gram} | alpha: {default_alpha} ' f'| max_features: {default_max_features}') print( 'f1 score:', train_and_test(reviews_data, rates_data, min_df=default_min_df, max_df=default_max_df, ngram_range=default_n_gram, alpha=default_alpha, clf=classifier_name, max_features=default_max_features)) # SVC_linear classifier = 'SVC_linear' print('\n', classifier) print( f'min_df: {default_min_df} | max_df: {default_max_df} | ngram: {default_n_gram} | alpha: {default_alpha} ' f'| max_features: {default_max_features}') print( 'f1 score:', train_and_test(reviews_data, rates_data, min_df=default_min_df, max_df=default_max_df, ngram_range=default_n_gram, alpha=default_alpha, clf=classifier, max_features=default_max_features)) # SVC_rbf classifier = 'SVC_rbf' print('\n', classifier) print( f'min_df: {default_min_df} | max_df: {default_max_df} | ngram: {default_n_gram} | alpha: {default_alpha} ' f'| max_features: {default_max_features}') print( 'f1 score:', train_and_test(reviews_data, rates_data, min_df=default_min_df, max_df=default_max_df, ngram_range=default_n_gram, alpha=default_alpha, clf=classifier, max_features=default_max_features))
def test_tuning(reviews_data, rates_data): print('Before:') print( 'f1 score:', train_and_test(reviews_data, rates_data, min_df=default_min_df, max_df=default_max_df, ngram_range=default_n_gram, alpha=default_alpha, clf=classifier_name, max_features=default_max_features)) print('After:') print( 'f1 score:', train_and_test(reviews_data, rates_data, min_df=2, max_df=0.6, ngram_range=(1, 2), alpha=0.01, clf=classifier_name, max_features=default_max_features))
def run_parameters_test(reviews, rates, classifier, min_df, max_df, n_gram, alpha, max_features, test_type): print('\n', classifier, f'{test_type} test') print( f'min_df: {min_df} | max_df: {max_df} | ngram: {n_gram} | alpha: {alpha} ' f'| max_features: {max_features} ') print( 'f1 score:', train_and_test(reviews, rates, min_df=min_df, max_df=max_df, ngram_range=n_gram, alpha=alpha, clf=classifier, max_features=max_features))
def test_size_and_random_state(reviews_data, rates_data): default_test_size = 0.2 default_train_size = 0.25 default_random_state = 40 print('test_size') print( 'f1 score:', train_and_test(reviews_data, rates_data, test_size=0.1, train_size=default_train_size, random_state=default_random_state)) print('test_size') print( 'f1 score:', train_and_test(reviews_data, rates_data, test_size=0.2, train_size=default_train_size, random_state=default_random_state)) print('test_size') print( 'f1 score:', train_and_test(reviews_data, rates_data, test_size=0.3, train_size=default_train_size, random_state=default_random_state)) print('train_size') print( 'f1 score:', train_and_test(reviews_data, rates_data, test_size=default_test_size, train_size=0.1, random_state=default_random_state)) print('train_size') print( 'f1 score:', train_and_test(reviews_data, rates_data, test_size=default_test_size, train_size=0.2, random_state=default_random_state)) print('train_size') print( 'f1 score:', train_and_test(reviews_data, rates_data, test_size=default_test_size, train_size=0.3, random_state=default_random_state)) print('random_state') print( 'f1 score:', train_and_test(reviews_data, rates_data, test_size=default_test_size, train_size=default_train_size, random_state=5)) print('random_state') print( 'f1 score:', train_and_test(reviews_data, rates_data, test_size=default_test_size, train_size=default_train_size, random_state=15)) print('random_state') print( 'f1 score:', train_and_test(reviews_data, rates_data, test_size=default_test_size, train_size=default_train_size, random_state=25))