コード例 #1
0
    #                                                      cluster_Nos=cluster_Nos, district_Nos=district_Nos, \
    #                                                      balance=True, rand_seed=r_seed, load_city=False,\
    #                                                      save_format='h5',crime_type=target_crime,target='Label')
    #        sample_mask = loading_info['sample_mask']
    #        X_train, y_train = loading_info['train_data_subset']
    #        X_test, _ = loading_info['test_data_subset']
    #
    #        clf = get_model_params(clf_name,rand_seed=r_seed,X=X_train)['model']
    #        tuning_params = get_model_params(clf_name,rand_seed=r_seed,X=X_train)['tuning_params']
    #        tuning_param_dicts = dict([(clf_name+'__'+key, val) for key,val in tuning_params.items()])

    sample_mask = {}
    if train_region == 'city':
        filename_dict['train'] = train_data
        X_train, y_train = ld.load_struct_data_h5(filename_dict['train'],
                                                  target_crime,
                                                  'Label',
                                                  split='train')

        clf = get_model_params(clf_name, rand_seed=r_seed, X=X_train)['model']
        tuning_params = get_model_params(clf_name, rand_seed=r_seed,
                                         X=X_train)['tuning_params']
        tuning_param_dicts = dict([(clf_name + '__' + key, val)
                                   for key, val in tuning_params.items()])
        # train models
        fitting = model_fit(X_train, y_train, clf, clf_name,
                            tuning_param_dicts, CV_skf, scaling)
        model, cv_results, best_params = fitting['model'], fitting[
            'CV_result'], fitting['best_param']
        print('CV results (' + train_region + train_region_num_str + '):')
        print(cv_results)
        print('Best_parameters (' + train_region + train_region_num_str + '):')
コード例 #2
0
    if cluster_pkl != 'NA':
        filename_dict['cluster'] = cluster_pkl
    if district_pkl != 'NA':
        filename_dict['district'] = district_pkl

    #----------------------------------------#
    mask_sample_region = {'train': train_region, 'test': test_region}

    start = time.time()

    sample_mask = pair_train_test_sample(filename_dict, mask_sample_region,
                                         cluster_Nos, district_Nos, chunk_size)

    if train_region == 'city':
        X_train, y_train = ld.load_struct_data_h5(train_data_list[0],
                                                  target_crime,
                                                  'Label',
                                                  split='train_city')
        sample_mask['train'] = np.ones(len(X_train)).astype(bool)

    else:
        X_train_stacked = []
        y_train_stacked = []

        for i, fn in enumerate(train_data_list):
            X_train, y_train = ld.load_struct_data_h5(fn,
                                                      target_crime,
                                                      'Label',
                                                      split='train_chunk')

            X_train_stacked.append(X_train[sample_mask['train'][i], :])
            y_train_stacked.append(y_train[sample_mask['train'][i]])