# cluster_Nos=cluster_Nos, district_Nos=district_Nos, \ # balance=True, rand_seed=r_seed, load_city=False,\ # save_format='h5',crime_type=target_crime,target='Label') # sample_mask = loading_info['sample_mask'] # X_train, y_train = loading_info['train_data_subset'] # X_test, _ = loading_info['test_data_subset'] # # clf = get_model_params(clf_name,rand_seed=r_seed,X=X_train)['model'] # tuning_params = get_model_params(clf_name,rand_seed=r_seed,X=X_train)['tuning_params'] # tuning_param_dicts = dict([(clf_name+'__'+key, val) for key,val in tuning_params.items()]) sample_mask = {} if train_region == 'city': filename_dict['train'] = train_data X_train, y_train = ld.load_struct_data_h5(filename_dict['train'], target_crime, 'Label', split='train') clf = get_model_params(clf_name, rand_seed=r_seed, X=X_train)['model'] tuning_params = get_model_params(clf_name, rand_seed=r_seed, X=X_train)['tuning_params'] tuning_param_dicts = dict([(clf_name + '__' + key, val) for key, val in tuning_params.items()]) # train models fitting = model_fit(X_train, y_train, clf, clf_name, tuning_param_dicts, CV_skf, scaling) model, cv_results, best_params = fitting['model'], fitting[ 'CV_result'], fitting['best_param'] print('CV results (' + train_region + train_region_num_str + '):') print(cv_results) print('Best_parameters (' + train_region + train_region_num_str + '):')
if cluster_pkl != 'NA': filename_dict['cluster'] = cluster_pkl if district_pkl != 'NA': filename_dict['district'] = district_pkl #----------------------------------------# mask_sample_region = {'train': train_region, 'test': test_region} start = time.time() sample_mask = pair_train_test_sample(filename_dict, mask_sample_region, cluster_Nos, district_Nos, chunk_size) if train_region == 'city': X_train, y_train = ld.load_struct_data_h5(train_data_list[0], target_crime, 'Label', split='train_city') sample_mask['train'] = np.ones(len(X_train)).astype(bool) else: X_train_stacked = [] y_train_stacked = [] for i, fn in enumerate(train_data_list): X_train, y_train = ld.load_struct_data_h5(fn, target_crime, 'Label', split='train_chunk') X_train_stacked.append(X_train[sample_mask['train'][i], :]) y_train_stacked.append(y_train[sample_mask['train'][i]])