コード例 #1
0
    evalIdx_names = []
    areaPct = {}
    for eval_tup in eval_tuplist:
        interval_params = list(map(
            float, eval_tup[1].split()))  # convert string to list of numbers
        intervals = np.linspace(interval_params[0], interval_params[1],
                                interval_params[2])
        ev_idx = eval_tup[0].rstrip()
        evalIdx_names.append(ev_idx)
        areaPct[ev_idx] = intervals

    #------------------ Load data ----------------------------------------
    _, grd_x, grd_y, _, mask_grdInCity, _ = ld.load_grid(grid_pkl)
    grid_2d = (grd_x, grd_y)

    groups_test = ld.load_train_test_group(group_pkl)['test_groups']

    CrimeData = ld.load_crime_data(crime_pkl)
    baseline_test = ld.load_baseline_data(baseline_pkl, target_crime)

    score_list = []
    for p_file, m_file in zip(predscore_filelist, samplemask_filelist):
        with open(m_file, 'rb') as input_file:
            samplemask = pickle.load(input_file)
        predscores = np.loadtxt(p_file, delimiter=',')
        predscores_city = np.tile(np.zeros(np.sum(mask_grdInCity)),
                                  len(groups_test))
        #assign prediction scores to the entire city
        predscores_city[samplemask['test']] = predscores
        score_list.append(predscores_city)
コード例 #2
0
def pair_train_test_sample(filenames,
                           mask_sample_region={
                               'train': None,
                               'test': None
                           },
                           cluster_Nos={
                               'train': None,
                               'test': None
                           },
                           district_Nos={
                               'train': None,
                               'test': None
                           },
                           chunk_size={
                               'train': None,
                               'test': None
                           }):
    """
    filenames: a dict with keys 'district'/'cluster' and 'group' with their filenames being the corresponding values
    """

    # load cluster info
    if 'cluster' in filenames.keys():
        cluster_label, _ = ld.load_clusters(filenames['cluster'])
        cluster_mask = {
            'train':
            np.in1d(cluster_label, cluster_Nos['train'])
            if cluster_Nos['train'] is not None else None,
            'test':
            np.in1d(cluster_label, cluster_Nos['test'])
            if cluster_Nos['test'] is not None else None
        }
    # load district info
    if 'district' in filenames.keys():
        district_label = ld.load_districts(filenames['district'])
        district_mask = {
            'train':
            np.in1d(district_label, district_Nos['train'])
            if district_Nos['train'] is not None else None,
            'test':
            np.in1d(district_label, district_Nos['test'])
            if district_Nos['test'] is not None else None
        }

    # load time interval info
    group_info = ld.load_train_test_group(filenames['group'])
    groups_train, groups_test = group_info['train_groups'], group_info[
        'test_groups']

    sample_mask = {'train': [], 'test': []}
    if mask_sample_region['train'] == 'cluster':
        M = cluster_mask['train']
    elif mask_sample_region['train'] == 'district':
        M = district_mask['train']
    for i in range(groups_train[0], groups_train[-1], chunk_size['train']):
        if i < groups_train[-1] and i + chunk_size['train'] > groups_train[-1]:
            # the end chunk may has smaller size
            sample_mask['train'].append(np.tile(M, groups_train[-1] + 1 - i))
        else:
            sample_mask['train'].append(np.tile(M, chunk_size['train']))

    if mask_sample_region['test'] == 'cluster':
        M = cluster_mask['test']
    elif mask_sample_region['test'] == 'district':
        M = district_mask['test']
    for i in range(groups_test[0], groups_test[-1], chunk_size['test']):
        if i < groups_test[-1] and i + chunk_size['test'] > groups_test[-1]:
            # the end chunk may has smaller size
            sample_mask['test'].append(np.tile(M, groups_test[-1] + 1 - i))
        else:
            sample_mask['test'].append(np.tile(M, chunk_size['test']))

    return sample_mask