loc2 = np.array([lon2,lat2]) X = np.append(X,[loc1,loc2], axis=0) y = np.append(y,[y[i],y[i]]) # ============================================================================= # First pass through model # ============================================================================= # We can choose between a ga and a pso here optimizer = 'ga' print('Setting up the Kriging Model') k = kriging(X, y) k.train(optimizer = optimizer) k.plot() # new locations # 51.494148, -0.008185 # 51.494416, -0.005447 # 51.490444, -0.008367 # 51.490532, -0.003583 # #corner 1 - 51.509133, -0.006260 #corner 2 - 51.511024, -0.000543 #corner 3 - 51.511431, -0.006752 X1 = [ -0.006260, 51.509133]
def random_test(Number_test, test_city): #city_name = {i[1]:i[0] for i in zip(random.sample(list(europe.city_ascii),10),['BER','LON','MAR','PXO','SOF','STO','Kiev', 'Oslo', 'Paris', 'Vienna'])} city_name = { 'BER': 'Berlin', 'LON': 'London', 'MAR': 'Marseille', 'PXO': 'Porto', 'SOF': 'Sofia', 'STO': 'Stockholm', 'Kiev': 'Kiev', 'Oslo': 'Oslo', 'Paris': 'Paris', 'Vienna': 'Vienna' } #test_city = 'Vienna' train_data = europe_sample[europe_sample['city'] != test_city] #sample = list(train_data.index) #random.shuffle(sample) #print(sample[0]) #train_data = pd.DataFrame(index=sample,columns=train_data.columns,data=train_data.values) #train_data = train_data.rename(index={train_data.index[i]:sample[i] for i in range(len(train_data.index))}) #print(train_data.index[0]) test_data = europe_sample[europe_sample['city'] == test_city] #key_enrf = feature_selection_embeded(train_data[all_feature], train_data[['city']], feature_return='embeded_rf_feature') #key_walr = feature_selection_wrapper(train_data[all_feature], train_data[['city']]) #key = list(set(key_enrf+key_walr)) key = list( pd.read_table( '../feature_extration_result/feature_list_{}.txt'.format(tag), header=None)[0]) # model clf = LogisticRegression(penalty="l2", C=0.5, multi_class="ovr", solver='liblinear', class_weight="balanced") tmp = train_data["city"].values.copy() # random.shuffle(tmp) x, y = train_data[key].values, tmp #with open('tmp.txt','a') as f: # f.write('{}\n'.format(y)) # training model clf.fit(x, y) # predict test data test_pre_proba = pd.DataFrame(index=test_data.index, columns=clf.classes_, data=clf.predict_proba( test_data[key].values)) test_pre_result = pd.DataFrame(index=test_data.index, columns=['predict_result'], data=clf.predict(test_data[key].values)) #test_pre_proba.to_csv('prob_{}.csv'.format(Number_test)) # save feature data (calculate city bio-distance) #tmp.to_csv('feature_bin_data.csv') print(Number_test) # affine transform pca_data = pca_scatter(train_data, key, 'city') # calculate bio-centroids point city_label, city_centroids = calculate_centroids(pca_data) # transform geographic point into biological point to_pts = city_centroids[[list(city_label).index(i) for i in city_label]] from_pts = europe[europe.city_ascii.isin( [city_name[i] for i in city_label])][['x', 'y']].values src_pts = europe[['x', 'y']].values bio_pts = affine_transform(from_pts, to_pts, src_pts) europe['bio_x'] = [i[0] for i in bio_pts] europe['bio_y'] = [i[1] for i in bio_pts] # kriging interpolation bio_result = [] geo_rseult = [] for i in test_pre_proba.index: tmp_map = europe.copy() tmp_map['GEOprob'] = 0 tmp_map['BIOprob'] = 0 BIOtrain_point = [] GEOtrain_point = [] train_y = [] for c, p in list(test_pre_proba.T[i].to_dict().items()): BIOtrain_point.append( list(tmp_map[tmp_map.city_ascii == city_name[c]][[ 'bio_x', 'bio_y' ]].values[0])) GEOtrain_point.append( list(tmp_map[tmp_map.city_ascii == city_name[c]][[ 'x', 'y' ]].values[0])) train_y.append(p) BIOtrain_point = np.array(BIOtrain_point) GEOtrain_point = np.array(GEOtrain_point) bio_kriging = kriging(BIOtrain_point, train_y) bio_kriging.train() geo_kriging = kriging(GEOtrain_point, train_y) geo_kriging.train() #for ind in range(tmp_map.shape[0]): # tmp_map.BIOprob.iloc[ind] = bio_kriging.predict([tmp_map.iloc[ind]['bio_x'],tmp_map.iloc[ind]['bio_y']]) # tmp_map.GEOprob.iloc[ind] = geo_kriging.predict([tmp_map.iloc[ind]['x'],tmp_map.iloc[ind]['y']]) #tmp_map[['city_ascii','GEOprob','BIOprob','x','y','bio_x','bio_y']].to_csv('../kriging_result/{}.csv'.format(i.split('_')[3]),index=False) #tmp_map['BIOprob'] = zscore(tmp_map['BIOprob']) #tmp_map['GEOprob'] = zscore(tmp_map['GEOprob']) #bio_result.append(tmp_map[tmp_map.city_ascii==test_city]['BIOprob'].values[0]) #geo_rseult.append(tmp_map[tmp_map.city_ascii==test_city]['GEOprob'].values[0]) bio_result.append( bio_kriging.predict([ tmp_map[tmp_map.city_ascii == city_name[test_city]]['bio_x'].values[0], tmp_map[tmp_map.city_ascii == city_name[test_city]]['bio_y'].values[0] ])) geo_rseult.append( geo_kriging.predict([ tmp_map[tmp_map.city_ascii == city_name[test_city]] ['x'].values[0], tmp_map[tmp_map.city_ascii == city_name[test_city]]['y'].values[0] ])) if not os.path.isdir('../random_test/{}'.format(tag)): os.makedirs('../random_test/{}'.format(tag)) with open( '../random_test/{}/bio_true_{}_result.txt'.format(tag, test_city), 'a') as f: f.write('{}\n'.format(bio_result)) with open( '../random_test/{}/geo_true_{}_result.txt'.format(tag, test_city), 'a') as f: f.write('{}\n'.format(geo_rseult)) return None
# train_y, # variogram_model='gaussian', # verbose=False, # enable_plotting=False) # bio_z, _ = bio_kriging.execute('points', # tmp_map['bio_x'].values, # tmp_map['bio_y'].values) # tmp_map['BIOprob'] = (bio_z.data-bio_z.data.min())/(bio_z.data.max()-bio_z.data.min()) # geo_z, _ = geo_kriging.execute('points', # tmp_map['x'].values, # tmp_map['y'].values) # tmp_map['GEOprob'] = (geo_z.data-geo_z.data.min())/(geo_z.data.max()-geo_z.data.min()) bio_kriging = kriging(BIOtrain_point, train_y) bio_kriging.train() geo_kriging = kriging(GEOtrain_point, train_y) geo_kriging.train() tmp_map['BIOprob'] = [ bio_kriging.predict([b_x, b_y]) for b_x, b_y in tmp_map[['bio_x', 'bio_y']].values ] tmp_map['GEOprob'] = [ geo_kriging.predict([g_x, g_y]) for g_x, g_y in tmp_map[['x', 'y']].values ] if not os.path.isdir('../kriging_result/{}/'.format(tag)):
def traincheap(self): self.kc = kriging(self.Xc, self.yc) self.kc.train() print
X = np.array([[ -0.008356, 51.515017], [ -0.042199, 51.522501], [ -0.019934, 51.510117], [ -0.033470, 51.540364], [ -0.013044, 51.523325]]) ## Need to normalize the data - maybe on a 1-5 scale or not y = np.array([4,8,6,2,7]) # We can choose between a ga and a pso here optimizer = 'ga' print('Setting up the Kriging Model') k = kriging(X, y) k.train() k.plot() print(k.calcuatemeanMSE()) for i in range(10): newpoints = k.infill(1, method='error') for point in newpoints: print('Adding point {}'.format(point)) k.addPoint(point, y[0]) k.train(optimizer=optimizer) print('Now plotting final results...') k.plot() print(k.calcuatemeanMSE())