Beispiel #1
0
    loc2 = np.array([lon2,lat2])
    
    X = np.append(X,[loc1,loc2], axis=0)
    y = np.append(y,[y[i],y[i]])
   

# =============================================================================
# First pass through model
# =============================================================================

# We can choose between a ga and a pso here

optimizer = 'ga'

print('Setting up the Kriging Model')
k = kriging(X, y)

k.train(optimizer = optimizer)
k.plot()

# new locations
# 51.494148, -0.008185
# 51.494416, -0.005447
# 51.490444, -0.008367
# 51.490532, -0.003583
#
#corner 1 - 51.509133, -0.006260
#corner 2 - 51.511024, -0.000543
#corner 3 - 51.511431, -0.006752

X1 = [ -0.006260, 51.509133]
Beispiel #2
0
def random_test(Number_test, test_city):

    #city_name  = {i[1]:i[0] for i in zip(random.sample(list(europe.city_ascii),10),['BER','LON','MAR','PXO','SOF','STO','Kiev', 'Oslo', 'Paris', 'Vienna'])}

    city_name = {
        'BER': 'Berlin',
        'LON': 'London',
        'MAR': 'Marseille',
        'PXO': 'Porto',
        'SOF': 'Sofia',
        'STO': 'Stockholm',
        'Kiev': 'Kiev',
        'Oslo': 'Oslo',
        'Paris': 'Paris',
        'Vienna': 'Vienna'
    }

    #test_city = 'Vienna'
    train_data = europe_sample[europe_sample['city'] != test_city]
    #sample = list(train_data.index)
    #random.shuffle(sample)
    #print(sample[0])
    #train_data = pd.DataFrame(index=sample,columns=train_data.columns,data=train_data.values)
    #train_data = train_data.rename(index={train_data.index[i]:sample[i] for i in range(len(train_data.index))})
    #print(train_data.index[0])
    test_data = europe_sample[europe_sample['city'] == test_city]

    #key_enrf = feature_selection_embeded(train_data[all_feature], train_data[['city']], feature_return='embeded_rf_feature')
    #key_walr = feature_selection_wrapper(train_data[all_feature], train_data[['city']])

    #key = list(set(key_enrf+key_walr))
    key = list(
        pd.read_table(
            '../feature_extration_result/feature_list_{}.txt'.format(tag),
            header=None)[0])
    # model
    clf = LogisticRegression(penalty="l2",
                             C=0.5,
                             multi_class="ovr",
                             solver='liblinear',
                             class_weight="balanced")
    tmp = train_data["city"].values.copy()
    # random.shuffle(tmp)
    x, y = train_data[key].values, tmp
    #with open('tmp.txt','a') as f:
    #    f.write('{}\n'.format(y))
    # training model
    clf.fit(x, y)
    # predict test data
    test_pre_proba = pd.DataFrame(index=test_data.index,
                                  columns=clf.classes_,
                                  data=clf.predict_proba(
                                      test_data[key].values))
    test_pre_result = pd.DataFrame(index=test_data.index,
                                   columns=['predict_result'],
                                   data=clf.predict(test_data[key].values))
    #test_pre_proba.to_csv('prob_{}.csv'.format(Number_test))
    # save feature data (calculate city bio-distance)
    #tmp.to_csv('feature_bin_data.csv')

    print(Number_test)

    # affine transform
    pca_data = pca_scatter(train_data, key, 'city')
    # calculate bio-centroids point
    city_label, city_centroids = calculate_centroids(pca_data)
    # transform geographic point into biological point
    to_pts = city_centroids[[list(city_label).index(i) for i in city_label]]
    from_pts = europe[europe.city_ascii.isin(
        [city_name[i] for i in city_label])][['x', 'y']].values

    src_pts = europe[['x', 'y']].values
    bio_pts = affine_transform(from_pts, to_pts, src_pts)

    europe['bio_x'] = [i[0] for i in bio_pts]
    europe['bio_y'] = [i[1] for i in bio_pts]

    # kriging interpolation
    bio_result = []
    geo_rseult = []
    for i in test_pre_proba.index:
        tmp_map = europe.copy()
        tmp_map['GEOprob'] = 0
        tmp_map['BIOprob'] = 0
        BIOtrain_point = []
        GEOtrain_point = []
        train_y = []
        for c, p in list(test_pre_proba.T[i].to_dict().items()):
            BIOtrain_point.append(
                list(tmp_map[tmp_map.city_ascii == city_name[c]][[
                    'bio_x', 'bio_y'
                ]].values[0]))
            GEOtrain_point.append(
                list(tmp_map[tmp_map.city_ascii == city_name[c]][[
                    'x', 'y'
                ]].values[0]))
            train_y.append(p)

        BIOtrain_point = np.array(BIOtrain_point)
        GEOtrain_point = np.array(GEOtrain_point)

        bio_kriging = kriging(BIOtrain_point, train_y)
        bio_kriging.train()

        geo_kriging = kriging(GEOtrain_point, train_y)
        geo_kriging.train()

        #for ind in range(tmp_map.shape[0]):
        #    tmp_map.BIOprob.iloc[ind] = bio_kriging.predict([tmp_map.iloc[ind]['bio_x'],tmp_map.iloc[ind]['bio_y']])
        #    tmp_map.GEOprob.iloc[ind] = geo_kriging.predict([tmp_map.iloc[ind]['x'],tmp_map.iloc[ind]['y']])
        #tmp_map[['city_ascii','GEOprob','BIOprob','x','y','bio_x','bio_y']].to_csv('../kriging_result/{}.csv'.format(i.split('_')[3]),index=False)
        #tmp_map['BIOprob'] = zscore(tmp_map['BIOprob'])
        #tmp_map['GEOprob'] = zscore(tmp_map['GEOprob'])

        #bio_result.append(tmp_map[tmp_map.city_ascii==test_city]['BIOprob'].values[0])
        #geo_rseult.append(tmp_map[tmp_map.city_ascii==test_city]['GEOprob'].values[0])
        bio_result.append(
            bio_kriging.predict([
                tmp_map[tmp_map.city_ascii ==
                        city_name[test_city]]['bio_x'].values[0],
                tmp_map[tmp_map.city_ascii ==
                        city_name[test_city]]['bio_y'].values[0]
            ]))
        geo_rseult.append(
            geo_kriging.predict([
                tmp_map[tmp_map.city_ascii == city_name[test_city]]
                ['x'].values[0], tmp_map[tmp_map.city_ascii ==
                                         city_name[test_city]]['y'].values[0]
            ]))
    if not os.path.isdir('../random_test/{}'.format(tag)):
        os.makedirs('../random_test/{}'.format(tag))
    with open(
            '../random_test/{}/bio_true_{}_result.txt'.format(tag, test_city),
            'a') as f:
        f.write('{}\n'.format(bio_result))
    with open(
            '../random_test/{}/geo_true_{}_result.txt'.format(tag, test_city),
            'a') as f:
        f.write('{}\n'.format(geo_rseult))
    return None
Beispiel #3
0
        #                             train_y,
        #                             variogram_model='gaussian',
        #                             verbose=False,
        #                             enable_plotting=False)

        # bio_z, _ = bio_kriging.execute('points',
        #                                 tmp_map['bio_x'].values,
        #                                 tmp_map['bio_y'].values)
        # tmp_map['BIOprob'] = (bio_z.data-bio_z.data.min())/(bio_z.data.max()-bio_z.data.min())

        # geo_z, _ = geo_kriging.execute('points',
        #                                 tmp_map['x'].values,
        #                                 tmp_map['y'].values)
        # tmp_map['GEOprob'] = (geo_z.data-geo_z.data.min())/(geo_z.data.max()-geo_z.data.min())

        bio_kriging = kriging(BIOtrain_point, train_y)
        bio_kriging.train()

        geo_kriging = kriging(GEOtrain_point, train_y)
        geo_kriging.train()

        tmp_map['BIOprob'] = [
            bio_kriging.predict([b_x, b_y])
            for b_x, b_y in tmp_map[['bio_x', 'bio_y']].values
        ]
        tmp_map['GEOprob'] = [
            geo_kriging.predict([g_x, g_y])
            for g_x, g_y in tmp_map[['x', 'y']].values
        ]

        if not os.path.isdir('../kriging_result/{}/'.format(tag)):
Beispiel #4
0
 def traincheap(self):
     self.kc = kriging(self.Xc, self.yc)
     self.kc.train()
     print
Beispiel #5
0
X = np.array([[ -0.008356,  51.515017],
       [ -0.042199,  51.522501],
       [ -0.019934,  51.510117],
       [ -0.033470,  51.540364],
       [ -0.013044,  51.523325]])


## Need to normalize the data - maybe on a 1-5 scale or not
y = np.array([4,8,6,2,7])

# We can choose between a ga and a pso here
optimizer = 'ga'

print('Setting up the Kriging Model')

k = kriging(X, y)
k.train()
k.plot()
print(k.calcuatemeanMSE())

for i in range(10):
    newpoints = k.infill(1, method='error')
    for point in newpoints:
        print('Adding point {}'.format(point))
        k.addPoint(point, y[0])
    k.train(optimizer=optimizer)

print('Now plotting final results...')
k.plot()

print(k.calcuatemeanMSE())