evalIdx_names = [] areaPct = {} for eval_tup in eval_tuplist: interval_params = list(map( float, eval_tup[1].split())) # convert string to list of numbers intervals = np.linspace(interval_params[0], interval_params[1], interval_params[2]) ev_idx = eval_tup[0].rstrip() evalIdx_names.append(ev_idx) areaPct[ev_idx] = intervals #------------------ Load data ---------------------------------------- _, grd_x, grd_y, _, mask_grdInCity, _ = ld.load_grid(grid_pkl) grid_2d = (grd_x, grd_y) groups_test = ld.load_train_test_group(group_pkl)['test_groups'] CrimeData = ld.load_crime_data(crime_pkl) baseline_test = ld.load_baseline_data(baseline_pkl, target_crime) score_list = [] for p_file, m_file in zip(predscore_filelist, samplemask_filelist): with open(m_file, 'rb') as input_file: samplemask = pickle.load(input_file) predscores = np.loadtxt(p_file, delimiter=',') predscores_city = np.tile(np.zeros(np.sum(mask_grdInCity)), len(groups_test)) #assign prediction scores to the entire city predscores_city[samplemask['test']] = predscores score_list.append(predscores_city)
def pair_train_test_sample(filenames, mask_sample_region={ 'train': None, 'test': None }, cluster_Nos={ 'train': None, 'test': None }, district_Nos={ 'train': None, 'test': None }, chunk_size={ 'train': None, 'test': None }): """ filenames: a dict with keys 'district'/'cluster' and 'group' with their filenames being the corresponding values """ # load cluster info if 'cluster' in filenames.keys(): cluster_label, _ = ld.load_clusters(filenames['cluster']) cluster_mask = { 'train': np.in1d(cluster_label, cluster_Nos['train']) if cluster_Nos['train'] is not None else None, 'test': np.in1d(cluster_label, cluster_Nos['test']) if cluster_Nos['test'] is not None else None } # load district info if 'district' in filenames.keys(): district_label = ld.load_districts(filenames['district']) district_mask = { 'train': np.in1d(district_label, district_Nos['train']) if district_Nos['train'] is not None else None, 'test': np.in1d(district_label, district_Nos['test']) if district_Nos['test'] is not None else None } # load time interval info group_info = ld.load_train_test_group(filenames['group']) groups_train, groups_test = group_info['train_groups'], group_info[ 'test_groups'] sample_mask = {'train': [], 'test': []} if mask_sample_region['train'] == 'cluster': M = cluster_mask['train'] elif mask_sample_region['train'] == 'district': M = district_mask['train'] for i in range(groups_train[0], groups_train[-1], chunk_size['train']): if i < groups_train[-1] and i + chunk_size['train'] > groups_train[-1]: # the end chunk may has smaller size sample_mask['train'].append(np.tile(M, groups_train[-1] + 1 - i)) else: sample_mask['train'].append(np.tile(M, chunk_size['train'])) if mask_sample_region['test'] == 'cluster': M = cluster_mask['test'] elif mask_sample_region['test'] == 'district': M = district_mask['test'] for i in range(groups_test[0], groups_test[-1], chunk_size['test']): if i < groups_test[-1] and i + chunk_size['test'] > groups_test[-1]: # the end chunk may has smaller size sample_mask['test'].append(np.tile(M, groups_test[-1] + 1 - i)) else: sample_mask['test'].append(np.tile(M, chunk_size['test'])) return sample_mask