def full_cycle(metric): csv_file_name = Constants.generate_file_name(metric, 'csv', Constants.RESULTS_FOLDER, None, None, False) json_file_name = Constants.generate_file_name(metric, 'json', Constants.RESULTS_FOLDER, None, None, False) print(json_file_name) print(csv_file_name) properties = Constants.get_properties_copy() results = evaluate_topic_model(metric) print(results) results.update(properties) ETLUtils.write_row_to_csv(csv_file_name, results) ETLUtils.write_row_to_json(json_file_name, results)
def full_cycle(metric): csv_file_name = Constants.generate_file_name( metric, 'csv', Constants.RESULTS_FOLDER, None, None, False) json_file_name = Constants.generate_file_name( metric, 'json', Constants.RESULTS_FOLDER, None, None, False) print(json_file_name) print(csv_file_name) properties = Constants.get_properties_copy() results = evaluate_topic_model(metric) print(results) results.update(properties) ETLUtils.write_row_to_csv(csv_file_name, results) ETLUtils.write_row_to_json(json_file_name, results)
def full_cycle(): plant_random_seeds() my_records = load_records() preprocess_records(my_records) x_matrix, y_vector = transform(my_records) count_specific_generic(my_records) # Error estimation error_estimation_results = [] best_classifier = None best_score = 0.0 for classifier, params in PARAM_GRID_MAP.items(): # print('Classifier: %s' % classifier) cv = StratifiedKFold(Constants.CROSS_VALIDATION_NUM_FOLDS) score = error_estimation(x_matrix, y_vector, params, cv, SCORE_METRIC).mean() error_estimation_results.append({ 'classifier': classifier, 'accuracy': score, Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE }) print('%s score: %f' % (classifier, score)) if score > best_score: best_score = score best_classifier = classifier # Model selection cv = StratifiedKFold(Constants.CROSS_VALIDATION_NUM_FOLDS) grid_search_cv = model_selection(x_matrix, y_vector, PARAM_GRID_MAP[best_classifier], cv, SCORE_METRIC) # best_model = grid_search_cv.best_estimator_.get_params()['classifier'] # features_importance = best_model.coef_ print('%s: %f' % (SCORE_METRIC, grid_search_cv.best_score_)) print('best params', grid_search_cv.best_params_) # for key, value in grid_search_cv.best_params_.items(): # print(key, value) # print('best estimator', grid_search_cv.best_estimator_) # print('features importance', features_importance) # csv_file_name = Constants.generate_file_name( # 'classifier_results', 'csv', Constants.RESULTS_FOLDER, None, # None, False) # json_file_name = Constants.generate_file_name( # 'classifier_results', 'json', Constants.RESULTS_FOLDER, None, # None, False) csv_file_name2 = Constants.RESULTS_FOLDER + 'classifier_results.csv' json_file_name2 = Constants.RESULTS_FOLDER + 'classifier_results.json' # results = get_scores(final_grid_search_cv.cv_results_) # csv_file = '/Users/fpena/tmp/' + Constants.ITEM_TYPE + '_new_reviews_classifier_results.csv' # ETLUtils.save_csv_file( # csv_file_name, error_estimation_results, # error_estimation_results[0].keys()) # ETLUtils.save_json_file(json_file_name, error_estimation_results) for result in error_estimation_results: ETLUtils.write_row_to_csv(csv_file_name2, result) ETLUtils.write_row_to_json(json_file_name2, result) # # print(csv_file) best_hyperparams_file_name = Constants.generate_file_name( 'best_hyperparameters', 'json', Constants.CACHE_FOLDER, None, None, False) save_parameters(best_hyperparams_file_name, grid_search_cv.best_params_)
def full_cycle(): plant_random_seeds() my_records = load_records() preprocess_records(my_records) x_matrix, y_vector = transform(my_records) count_specific_generic(my_records) # Error estimation error_estimation_results = [] best_classifier = None best_score = 0.0 for classifier, params in PARAM_GRID_MAP.items(): # print('Classifier: %s' % classifier) cv = StratifiedKFold(Constants.CROSS_VALIDATION_NUM_FOLDS) score = error_estimation(x_matrix, y_vector, params, cv, SCORE_METRIC).mean() error_estimation_results.append( { 'classifier': classifier, 'accuracy': score, Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE } ) print('%s score: %f' % (classifier, score)) if score > best_score: best_score = score best_classifier = classifier # Model selection cv = StratifiedKFold(Constants.CROSS_VALIDATION_NUM_FOLDS) grid_search_cv = model_selection( x_matrix, y_vector, PARAM_GRID_MAP[best_classifier], cv, SCORE_METRIC) # best_model = grid_search_cv.best_estimator_.get_params()['classifier'] # features_importance = best_model.coef_ print('%s: %f' % (SCORE_METRIC, grid_search_cv.best_score_)) print('best params', grid_search_cv.best_params_) # for key, value in grid_search_cv.best_params_.items(): # print(key, value) # print('best estimator', grid_search_cv.best_estimator_) # print('features importance', features_importance) # csv_file_name = Constants.generate_file_name( # 'classifier_results', 'csv', Constants.RESULTS_FOLDER, None, # None, False) # json_file_name = Constants.generate_file_name( # 'classifier_results', 'json', Constants.RESULTS_FOLDER, None, # None, False) csv_file_name2 = Constants.RESULTS_FOLDER + 'classifier_results.csv' json_file_name2 = Constants.RESULTS_FOLDER + 'classifier_results.json' # results = get_scores(final_grid_search_cv.cv_results_) # csv_file = '/Users/fpena/tmp/' + Constants.ITEM_TYPE + '_new_reviews_classifier_results.csv' # ETLUtils.save_csv_file( # csv_file_name, error_estimation_results, # error_estimation_results[0].keys()) # ETLUtils.save_json_file(json_file_name, error_estimation_results) for result in error_estimation_results: ETLUtils.write_row_to_csv( csv_file_name2, result) ETLUtils.write_row_to_json(json_file_name2, result) # # print(csv_file) best_hyperparams_file_name = Constants.generate_file_name( 'best_hyperparameters', 'json', Constants.CACHE_FOLDER, None, None, False) save_parameters(best_hyperparams_file_name, grid_search_cv.best_params_)