'raw_data/models/reduced_search/best_model_{0}.pkl'.format(lang)) if not my_file.exists(): continue else: with open( 'raw_data/models/reduced_search/best_model_{0}.pkl'.format( lang), 'rb') as file: best_model = joblib.load(file) # Load best model parameters with open( 'raw_data/experiments/reduced_search/best_model_parameters_{0}.pkl' .format(lang), 'rb') as file: best_params = joblib.load(file) dataset = mlconjug3.DataSet(mlconjug3.Verbiste(language=lang).verbs) dataset.split_data(proportion=0.95) predicted = best_model.predict(dataset.test_input) predicted2 = best_model.predict(dataset.verbs_list) score = len([ (a, b) for a, b in zip(predicted, dataset.test_labels) if a == b ]) / len(predicted) misses = len([(a, b) for a, b in zip(predicted, dataset.test_labels) if a != b]) entries = len(predicted) print('The score of the {0} best model is {1} on test set.'.format( lang, score)) score2 = len([(a, b)
langs = ('ro', 'it', 'en', 'es', 'pt', 'fr') if __name__ == "__main__": # multiprocessing requires the fork to happen in a __main__ protected # block for lang in langs: my_file = Path( 'raw_data/models/reduced_search/best_model_{0}.pkl'.format(lang)) if my_file.is_file(): continue else: # ############################################################################# # Initialize Data Set dataset = mlconjug3.DataSet( mlconjug3.Verbiste(language=lang).verbs) # ############################################################################# # Define a pipeline. # Transforms dataset with CountVectorizer. We pass the function extract_verb_features to the CountVectorizer. ngrange = (2, 7) vectorizer = mlconjug3.CountVectorizer(analyzer=partial( mlconjug3.extract_verb_features, lang=lang, ngram_range=ngrange), binary=True) # Feature reduction feature_reductor = mlconjug3.SelectFromModel( mlconjug3.LinearSVC(random_state=42,