def main(on_switch=False): if on_switch: obj = DataPreparationNameEthnicityProject() obj.data_preparation_steps() if obj.control_panel['done_switch']: hf.done_alert()
def main(on_switch=False): if on_switch: obj = FeatureCreationNameEthnicityProject() obj.feature_generation_steps() if local_control_panel['done_switch']: hf.done_alert()
def main(on_switch=False): if on_switch: save_switch = False # WARNING: Will overwrite existing if True run_on_subsampled_data = True run_on_full_data = False run_on_unfeatured_data = False run_on_featured_data = True # Eg, full list >> [1, 5, 10, 50, 100, 500] nk_list = [5] '''Eg, Full list >> ['dummy', 'sex_only', 'name_basic_only', 'name_substring_only', 'name_numeric_only', 'name_metaphone_only', 'name_all', 'loc_basic_only', 'loc_sep_entity_only', 'loc_substring_only', 'loc_all', 'name_all_loc_all']''' feature_set_list = ['name_all_loc_all_reduced'] ml_algo_dict = { 'LR': { 'clf': LogisticRegression(tol=3.359818286283781e-05, solver='liblinear', penalty='l1', max_iter=4000, class_weight=None, C=4.281332398719396) }, 'SVC_LINEAR': { 'clf': CalibratedClassifierCV(LinearSVC(penalty='l2', C=0.01)), }, 'NB': { 'clf': BernoulliNB(), }, } # Eg, Full list >> ['ab', 'fn', 'metis', 'inuit', 'ch', 'ja', 'en', 'fr', 'ir', 'it', 'rus', 'sc', 'others'] target_set_list = ['ch', 'ab'] if run_on_subsampled_data: # Loop through subsampling n set with unfeatured data if run_on_unfeatured_data: for nk in nk_list: for algo_key, algo_val in ml_algo_dict.items(): for target_label in target_set_list: print('>> Current time:', datetime.datetime.now()) obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass( control_panel={ 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': nk, 'use_featured_df_switch': False, 'use_feature_set': [], 'feature_selection_switch': False, 'cross_validation_switch': False, 'ml_process_on_test_data_switch': True, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': [algo_key, algo_val], 'ml_algo_param_grid': None, 'binary_target_label': target_label, 'eval_score': None, 'random_state': 888, }) obj.machine_learning_steps() # Loop through feature set and subsampling n set if run_on_featured_data: for feature_set in feature_set_list: for nk in nk_list: for algo_key, algo_val in ml_algo_dict.items(): for target_label in target_set_list: print('>> Current time:', datetime.datetime.now()) obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass( control_panel={ 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': nk, 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': False, 'cross_validation_switch': False, 'ml_process_on_test_data_switch': True, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': [algo_key, algo_val], 'ml_algo_param_grid': None, 'binary_target_label': target_label, 'eval_score': None, 'random_state': 888, }) obj.machine_learning_steps() if run_on_full_data: # Run once using unfeatured, full dataset if run_on_unfeatured_data: for feature_set in feature_set_list: for algo_key, algo_val in ml_algo_dict.items(): for target_label in target_set_list: print('>> Current time:', datetime.datetime.now()) obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass( control_panel={ 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': 'none', 'use_featured_df_switch': False, 'use_feature_set': [], 'feature_selection_switch': False, 'cross_validation_switch': False, 'ml_process_on_test_data_switch': True, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': [algo_key, algo_val], 'ml_algo_param_grid': None, 'binary_target_label': target_label, 'eval_score': None, 'random_state': 888, }) obj.machine_learning_steps() # Run once using featured, full dataset if run_on_featured_data: for feature_set in feature_set_list: for algo_key, algo_val in ml_algo_dict.items(): for target_label in target_set_list: print('>> Current time:', datetime.datetime.now()) obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass( control_panel={ 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': [], 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': False, 'cross_validation_switch': False, 'ml_process_on_test_data_switch': True, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': [algo_key, algo_val], 'ml_algo_param_grid': None, 'binary_target_label': target_label, 'eval_score': None, 'random_state': 888, }) obj.machine_learning_steps() if local_control_panel['done_switch']: hf.done_alert()
def main(on_switch=False): if on_switch: save_switch = False run_on_subsampled_data = False run_on_full_data = True run_on_unfeatured_data = False run_on_featured_data = True # Eg, full list >> [1, 5, 10, 50, 100, 500] nk_list = [500] '''Eg, Full list >> ['dummy', 'sex_only', 'name_basic_only', 'name_substring_only', 'name_numeric_only', 'name_metaphone_only', 'name_all', 'loc_basic_only', 'loc_sep_entity_only', 'loc_substring_only', 'loc_all', 'name_all_loc_all']''' feature_set_list = ['dummy', 'name_all', 'name_all_loc_all_reduced'] eval_score_list = ['macro f1 score'] target_label_list = ['ab', 'fn', 'metis', 'inuit', 'ch', 'ja', 'fr'] ml_algo_param_dict = \ { 'LR': { 'clf': LogisticRegression(), 'param': { # empty since we're not doing grid-search here }}, } if run_on_subsampled_data: # Loop through subsampling n set with unfeatured data if run_on_unfeatured_data: for nk in nk_list: for algo_key, algo_val in ml_algo_param_dict.items(): for target_label in target_label_list: for eval_score in eval_score_list: obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': nk, 'use_featured_df_switch': False, 'use_feature_set': 'none', 'feature_selection_switch': True, 'cross_validation_switch': False, 'ml_process_on_test_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'binary_target_label': target_label, 'eval_score': eval_score, 'random_state': 888, }) obj.machine_learning_steps() # Loop through feature set and subsampling n set if run_on_featured_data: for feature_set in feature_set_list: for nk in nk_list: for algo_key, algo_val in ml_algo_param_dict.items(): for target_label in target_label_list: for eval_score in eval_score_list: obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': nk, 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': True, 'cross_validation_switch': False, 'ml_process_on_test_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'binary_target_label': target_label, 'eval_score': eval_score, 'random_state': 888, }) obj.machine_learning_steps() if run_on_full_data: # Run once using unfeatured, full dataset if run_on_unfeatured_data: for feature_set in feature_set_list: for algo_key, algo_val in ml_algo_param_dict.items(): for target_label in target_label_list: for eval_score in eval_score_list: obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': 'none', 'use_featured_df_switch': False, 'use_feature_set': 'none', 'feature_selection_switch': True, 'cross_validation_switch': False, 'ml_process_on_test_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'binary_target_label': target_label, 'eval_score': eval_score, 'random_state': 888, }) obj.machine_learning_steps() # Run once using featured, full dataset if run_on_featured_data: for feature_set in feature_set_list: for algo_key, algo_val in ml_algo_param_dict.items(): for target_label in target_label_list: for eval_score in eval_score_list: obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': 'none', 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': True, 'cross_validation_switch': False, 'ml_process_on_test_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'binary_target_label': target_label, 'eval_score': eval_score, 'random_state': 888, }) obj.machine_learning_steps() if local_control_panel['done_switch']: hf.done_alert()
def main(on_switch=False): if on_switch: save_switch = False run_on_subsampled_data = True run_on_full_data = False run_on_unfeatured_data = False run_on_featured_data = True # Eg, full list >> [1, 5, 10, 50, 100, 500] nk_list = [5] cv_repeat = 1 '''Eg, Full list >> ['dummy', 'sex_only', 'name_basic_only', 'name_substring_only', 'name_numeric_only', 'name_metaphone_only', 'name_all', 'loc_basic_only', 'loc_sep_entity_only', 'loc_substring_only', 'loc_all', 'name_all_loc_all', 'name_all_loc_all_reduced']''' feature_set_list = ['name_all'] eval_score_list = ['macro f1 score'] # Eg, Full list >> ['ab', 'fn', 'metis', 'inuit', 'ch', 'ja', 'en', 'fr', 'ir', 'it', 'rus', 'sc', 'others'] target_label_list = ['fn'] ml_algo_param_dict = \ { 'LR_V1': { 'clf': LogisticRegression(), 'param': { 'logisticregression__solver': ['liblinear'], 'logisticregression__penalty': ['l1', 'l2'], 'logisticregression__C': np.logspace(-4, 4, 20), 'logisticregression__tol': np.logspace(-5, 5, 20), 'logisticregression__class_weight': [None, 'balanced'], 'logisticregression__max_iter': [50, 1000, 4000, 20000], }}, 'LR_V2': { 'clf': LogisticRegression(), 'param': { 'logisticregression__solver': ['newton-cg', 'lbfgs', 'sag', 'saga'], 'logisticregression__penalty': ['none', 'l2'], 'logisticregression__C': np.logspace(-4, 4, 20), 'logisticregression__tol': np.logspace(-5, 5, 20), 'logisticregression__class_weight': [None, 'balanced'], 'logisticregression__max_iter': [50, 1000, 4000, 20000], }}, 'SVC_LINEAR': { 'clf': LinearSVC(), 'param': { 'linearsvc__penalty': ['l2'], 'linearsvc__loss': ['hinge', 'squared_hinge'], 'linearsvc__C': np.logspace(-4, 4, 20), 'linearsvc__tol': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1], 'linearsvc__class_weight': [None, 'balanced'], 'linearsvc__max_iter': [50, 1000, 4000, 20000], }}, 'SVC_NONLINEAR': { 'clf': SVC(), 'param': { 'svc__kernel': ['poly', 'rbf', 'sigmoid'], 'svc__C': np.logspace(-4, 4, 20), 'svc__tol': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1], 'svc__class_weight': [None, 'balanced'], 'svc__decision_function_shape': ['ovo', 'ovr'], 'svc__max_iter': [50, 1000, 4000, 20000], }}, 'NB': { 'clf': BernoulliNB(), 'param': { 'bernoullinb__alpha': np.logspace(-4, 4, 20), 'bernoullinb__binarize': [None, 0, .2, .4, .6, .8, 1], 'bernoullinb__fit_prior': [True, False], }}, } if run_on_subsampled_data: # Loop through subsampling n set with unfeatured data if run_on_unfeatured_data: for nk in nk_list: for target_label in target_label_list: for algo_key, algo_val in ml_algo_param_dict.items(): for eval_score in eval_score_list: for i in range(1, cv_repeat+1): print('>> Current time:', datetime.datetime.now()) obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': nk, 'use_featured_df_switch': False, 'use_feature_set': [], 'feature_selection_switch': False, 'cross_validation_switch': True, 'cross_validation_repeat': i, 'ml_process_on_test_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'binary_target_label': target_label, 'eval_score': eval_score, 'random_state': 888, }) obj.machine_learning_steps() # Loop through feature set and subsampling n set if run_on_featured_data: for feature_set in feature_set_list: for nk in nk_list: for target_label in target_label_list: for algo_key, algo_val in ml_algo_param_dict.items(): for eval_score in eval_score_list: for i in range(1, cv_repeat+1): print('>> Current time:', datetime.datetime.now()) obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': nk, 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': False, 'cross_validation_switch': True, 'cross_validation_repeat': i, 'ml_process_on_test_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'binary_target_label': target_label, 'eval_score': eval_score, 'random_state': 888, }) obj.machine_learning_steps() if run_on_full_data: # Run once using unfeatured, full dataset if run_on_unfeatured_data: for feature_set in feature_set_list: for target_label in target_label_list: for algo_key, algo_val in ml_algo_param_dict.items(): for eval_score in eval_score_list: for i in range(1, cv_repeat+1): print('>> Current time:', datetime.datetime.now()) obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': 'none', 'use_featured_df_switch': False, 'use_feature_set': [], 'feature_selection_switch': False, 'cross_validation_switch': True, 'cross_validation_repeat': i, 'ml_process_on_test_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'binary_target_label': target_label, 'eval_score': eval_score, 'random_state': 888, }) obj.machine_learning_steps() # Run once using featured, full dataset if run_on_featured_data: for feature_set in feature_set_list: for target_label in target_label_list: for algo_key, algo_val in ml_algo_param_dict.items(): for eval_score in eval_score_list: for i in range(1, cv_repeat+1): print('>> Current time:', datetime.datetime.now()) obj = ml_pipeline.MachineLearningNameEthnicityProjectBinaryClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, # WARNING: Switch to False in production 'use_subsampled_df_nk': [], 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': False, 'cross_validation_switch': True, 'cross_validation_repeat': i, 'ml_process_on_test_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'binary_target_label': target_label, 'eval_score': eval_score, 'random_state': 888, }) obj.machine_learning_steps() if local_control_panel['done_switch']: hf.done_alert()
def main(on_switch=False): if on_switch: save_switch = False run_on_subsampled_data = False run_on_full_data = True run_on_unfeatured_data = False run_on_featured_data = True # Eg, full list >> [5, 50, 500] nk_list = [5] # Eg, ['dummy', 'sex_only', 'name_all', 'name_all_loc_all', 'name_all_loc_all_reduced'] feature_set_list = ['name_all_loc_all_reduced'] ml_algo_dict = { 'LR': {'clf': LogisticRegression( tol=0.01438449888287663, solver='liblinear', penalty='l2', multi_class='ovr', max_iter=50, class_weight=None, C=10000.0) }, 'SVC': {'clf': OneVsRestClassifier(LinearSVC( tol=0.0001, penalty='l2', multi_class='ovr', max_iter=50, loss='squared_hinge', class_weight='balanced', C=0.08858667904100823)), }, 'NB': {'clf': BernoulliNB( fit_prior=True, binarize=0.6, alpha=0.00026366508987303583) }, } if run_on_subsampled_data: # Loop through subsampling n set with unfeatured data if run_on_unfeatured_data: for nk in nk_list: for algo_key, algo_val in ml_algo_dict.items(): obj = ml_pipeline.MachineLearningNameEthnicityProjectMultiClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': True, 'use_subsampled_df_nk': nk, 'use_featured_df_switch': False, 'use_feature_set': [], 'feature_selection_switch': False, 'cross_validation_switch': False, 'cross_validation_repeat': None, 'ml_process_on_test_data_switch': True, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': [algo_key, algo_val], 'ml_algo_param_grid': None, 'eval_score': None, 'label_varname': 'ETHNICITY_RECAT', 'random_state': 888, }) obj.machine_learning_steps() # Loop through feature set and subsampling n set if run_on_featured_data: for feature_set in feature_set_list: for nk in nk_list: for algo_key, algo_val in ml_algo_dict.items(): obj = ml_pipeline.MachineLearningNameEthnicityProjectMultiClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': True, 'use_subsampled_df_nk': nk, 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': False, 'cross_validation_switch': False, 'cross_validation_repeat': None, 'ml_process_on_test_data_switch': True, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': [algo_key, algo_val], 'ml_algo_param_grid': None, 'eval_score': None, 'label_varname': 'ETHNICITY_RECAT', 'random_state': 888, }) obj.machine_learning_steps() if run_on_full_data: # Run once using unfeatured, full dataset if run_on_unfeatured_data: for algo_key, algo_val in ml_algo_dict.items(): obj = ml_pipeline.MachineLearningNameEthnicityProjectMultiClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, 'use_subsampled_df_nk': [], 'use_featured_df_switch': True, 'use_feature_set': [], 'feature_selection_switch': False, 'cross_validation_switch': False, 'cross_validation_repeat': None, 'ml_process_on_test_data_switch': True, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': [algo_key, algo_val], 'ml_algo_param_grid': None, 'eval_score': None, 'label_varname': 'ETHNICITY_RECAT', 'random_state': 888, }) obj.machine_learning_steps() # Run once using featured, full dataset if run_on_featured_data: for feature_set in feature_set_list: for algo_key, algo_val in ml_algo_dict.items(): obj = ml_pipeline.MachineLearningNameEthnicityProjectMultiClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, 'use_subsampled_df_nk': [], 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': False, 'cross_validation_switch': False, 'cross_validation_repeat': None, 'ml_process_on_test_data_switch': True, 'ml_process_on_training_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_algo': [algo_key, algo_val], 'ml_algo_param_grid': None, 'eval_score': None, 'label_varname': 'ETHNICITY_RECAT', 'random_state': 888, }) obj.machine_learning_steps() if local_control_panel['done_switch']: hf.done_alert()
def main(on_switch=False): if on_switch: save_switch = False run_on_subsampled_data = False run_on_full_data = True run_on_unfeatured_data = False run_on_featured_data = True # Eg, full list >> [5, 50, 500] nk_list = [5] cv_repeat = 1 # Eg = ['name_all', 'name_all_loc_all', 'name_all_loc_all_reduced'] feature_set_list = ['name_all_loc_all_reduced'] # Eg = ['accuracy', 'macro f1 score', 'macro precision', 'macro recall'] eval_score_list = ['macro f1 score'] ml_algo_param_dict = \ { 'LR_V1': { 'clf': LogisticRegression(), 'param': { 'logisticregression__solver': ['liblinear'], 'logisticregression__penalty': ['l1', 'l2'], 'logisticregression__C': np.logspace(-4, 4, 20), 'logisticregression__tol': np.logspace(-5, 5, 20), 'logisticregression__class_weight': [None, 'balanced'], 'logisticregression__multi_class': ['ovr', 'auto'], 'logisticregression__max_iter': [50, 1000, 4000, 20000], }}, 'LR_V2': { 'clf': LogisticRegression(), 'param': { 'logisticregression__solver': ['newton-cg', 'lbfgs', 'sag', 'saga'], 'logisticregression__penalty': ['none', 'l2'], 'logisticregression__C': np.logspace(-4, 4, 20), 'logisticregression__tol': np.logspace(-5, 5, 20), 'logisticregression__class_weight': [None, 'balanced'], 'logisticregression__multi_class': ['ovr', 'multinomial', 'auto'], 'logisticregression__max_iter': [50, 1000, 4000, 20000], }}, 'SVC_LINEAR': { 'clf': OneVsRestClassifier(LinearSVC()), 'param': { 'onevsrestclassifier__estimator__penalty': ['l2'], 'onevsrestclassifier__estimator__loss': ['hinge', 'squared_hinge'], 'onevsrestclassifier__estimator__C': np.logspace(-4, 4, 20), 'onevsrestclassifier__estimator__tol': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1], 'onevsrestclassifier__estimator__class_weight': [None, 'balanced'], 'onevsrestclassifier__estimator__multi_class': ['ovr', 'crammer_singer'], 'onevsrestclassifier__estimator__max_iter': [50, 1000, 4000, 20000], }}, 'SVC_NONLINEAR': { 'clf': OneVsRestClassifier(SVC()), 'param': { 'onevsrestclassifier__estimator__kernel': ['poly', 'rbf', 'sigmoid'], 'onevsrestclassifier__estimator__C': np.logspace(-4, 4, 20), 'onevsrestclassifier__estimator__tol': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1], 'onevsrestclassifier__estimator__class_weight': [None, 'balanced'], 'onevsrestclassifier__estimator__decision_function_shape': ['ovo', 'ovr'], 'onevsrestclassifier__estimator__max_iter': [50, 1000, 4000, 20000], }}, 'NB': { 'clf': BernoulliNB(), 'param': { 'bernoullinb__alpha': np.logspace(-4, 4, 20), 'bernoullinb__binarize': [None, 0, .2, .4, .6, .8, 1], 'bernoullinb__fit_prior': [True, False], }}, } if run_on_subsampled_data: # Loop through subsampling n set with unfeatured data if run_on_unfeatured_data: for nk in nk_list: for algo_key, algo_val in ml_algo_param_dict.items(): for eval_score in eval_score_list: for i in range(1, cv_repeat+1): obj = ml_pipeline.MachineLearningNameEthnicityProjectMultiClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': True, 'use_subsampled_df_nk': nk, 'use_featured_df_switch': False, 'use_feature_set': [], 'feature_selection_switch': False, 'cross_validation_switch': True, 'cross_validation_repeat': i, 'ml_process_on_test_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'eval_score': eval_score, 'label_varname': 'ETHNICITY_RECAT', 'random_state': 888, }) obj.machine_learning_steps() # Loop through feature set and subsampling n set if run_on_featured_data: for feature_set in feature_set_list: for nk in nk_list: for algo_key, algo_val in ml_algo_param_dict.items(): for eval_score in eval_score_list: for i in range(1, cv_repeat+1): obj = ml_pipeline.MachineLearningNameEthnicityProjectMultiClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': True, 'use_subsampled_df_nk': nk, 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': False, 'cross_validation_switch': True, 'cross_validation_repeat': i, 'ml_process_on_test_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'eval_score': eval_score, 'label_varname': 'ETHNICITY_RECAT', 'random_state': 888, }) obj.machine_learning_steps() if run_on_full_data: # Run once using unfeatured, full dataset if run_on_unfeatured_data: for algo_key, algo_val in ml_algo_param_dict.items(): for eval_score in eval_score_list: for i in range(1, cv_repeat+1): obj = ml_pipeline.MachineLearningNameEthnicityProjectMultiClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, 'use_subsampled_df_nk': [], 'use_featured_df_switch': True, 'use_feature_set': [], 'feature_selection_switch': False, 'cross_validation_switch': True, 'cross_validation_repeat': i, 'ml_process_on_test_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'eval_score': eval_score, 'label_varname': 'ETHNICITY_RECAT', 'random_state': 888, }) obj.machine_learning_steps() # Run once using featured, full dataset if run_on_featured_data: for feature_set in feature_set_list: for algo_key, algo_val in ml_algo_param_dict.items(): for eval_score in eval_score_list: for i in range(1, cv_repeat+1): obj = ml_pipeline.MachineLearningNameEthnicityProjectMultiClass(control_panel = { 'save_result_switch': save_switch, # WARNING: Will overwrite existing 'use_subsampled_df_switch': False, 'use_subsampled_df_nk': [], 'use_featured_df_switch': True, 'use_feature_set': feature_set, 'feature_selection_switch': False, 'cross_validation_switch': True, 'cross_validation_repeat': i, 'ml_process_on_test_data_switch': False, 'ml_process_on_ext_data_switch': False, 'ml_process_on_training_data_switch': False, 'ml_algo': None, 'ml_algo_param_grid': [algo_key, algo_val], 'eval_score': eval_score, 'label_varname': 'ETHNICITY_RECAT', 'random_state': 888, }) obj.machine_learning_steps() if local_control_panel['done_switch']: hf.done_alert()