def osm_nominatim(DataLocation): # DataLocation = r'C:\Users\Wuga\Documents\DATASETS\SFREHPDATA\HousingSales2012.csv' geolocator=Nominatim() df=DO.readfile(DataLocation) print df df['Location'][:]=[x[0:x.find('#')] if x.find('#')!=-1 else x for x in df['Location']] print df complete=df['Location'][3]+" San Francisco" print complete longlat=geolocator.geocode(complete) print((longlat.latitude,longlat.longitude)) transed=[] #transed=[geolocator.geocode(x+" San Francisco", timeout=None) for x in df['Location'][:5]]#need to sleep for a sec for x in df['Location'][:5]: transed.append(geolocator.geocode(x+" San Francisco", timeout=10)) time.sleep(2) transed_la=[x.latitude if x is not None else 0 for x in transed] transed_lo=[x.longitude if x is not None else 0 for x in transed] cood_price=zip(df['Price'],transed_la,transed_lo) df_cood_price=pd.DataFrame(data=cood_price, columns=['Price','Latitude','Longitude']) df_cood_price.to_csv(Constants.filelocations.OSM_NOMINATIM_HOUSEPRICE, header= True, index=False) print df_cood_price return
def main(): print("Begin") print("-----------------------------------------------------") # print("获取原始数据") DataOperation.get_init_data() print("-----------------------------------------------------") # print("数据格式检查") if not DataOperation.data_check(): print("数据格式检查未通过,请检查文档") return print("数据格式检查通过") print("-----------------------------------------------------") # print("数据计算") OvertimePayCalculation.calculate_data() print("-----------------------------------------------------") # print("写入数据") DataOperation.set_data() print("-----------------------------------------------------") print("End")
def train(maxlen=100, embedding_dim=128): # 主训练/测试代码 start = time.time() l_trainX, r_trainX, ret_labels, l_topredictX, r_topredictX = do.load_data_bi_word2vec(maxlen=maxlen, words_keep=50000, validation_portion=0., embedding_dim=embedding_dim, ma="A") trainY = to_categorical(ret_labels, nb_classes=3) del ret_labels lnet = tflearn.input_data([None, maxlen, embedding_dim]) rnet = tflearn.input_data([None, maxlen, embedding_dim]) lnet = tflearn.gru(lnet, embedding_dim, dropout=0.8, return_seq=False, dynamic=True) rnet = tflearn.gru(rnet, embedding_dim, dropout=0.8, return_seq=False, dynamic=True) net = tflearn.layers.merge_outputs([lnet, rnet]) net = tflearn.fully_connected(net, 3, activation='softmax') net = tflearn.regression(net, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy') # Training model = tflearn.DNN(net, tensorboard_verbose=0) model.fit([l_trainX, r_trainX], trainY, validation_set=0.1, show_metric=True, batch_size=32) model.save('MODELS/E_W2V_GRU_TC{}_{}.dy'.format(embedding_dim, maxlen)) # model.load('MODELS/E_W2V_GRU_TC{}_{}.dy'.format(embedding_dim, maxlen)) del l_trainX del r_trainX del trainY idx2cla = {0: 'neu', 1: 'pos', 2: 'neg'} filename = "Result/result_{}.csv".format(datetime.datetime.now().strftime("%Y%m%d%H%M")) prefix = list(open('Result/A_AFTER_NRP_200', 'r').readlines()) f = open(filename, 'w') f.write('SentenceId,View,Opinion\n') a = [0, 5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 55000] b = [5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 55000, 65000] ANS = [] for i in range(12): ans = model.predict([l_topredictX[a[i]:b[i]], r_topredictX[a[i]:b[i]]]) ANS.extend([s for s in ans]) print("ANS.LENGTH: {}".format(len(ans))) for i, r in enumerate(ANS): f.write(prefix[i].strip()) idx = int(np.argmax(r)) f.write(idx2cla[idx]) k = "" for l in r: k += ',{:.4f}'.format(l) f.write(k) f.write('\n') f.close() end = time.time() print("TIME COST: {}".format(end-start)) outf = vote_by_score(filename) add(outf)
def analyze_forest(data, rate): file = open("_final_bigger_forest_{0}.csv".format(rate), "w") start_trees = 5 num_of_trees = start_trees percentage = [] data_learn, data_test = DataOperation.separate_data(DataOperation.bin_data(data, 5), 2) true_values = list(data_test.iloc[:, len(data_test.columns)-1]) classifier = RandomForest.RandomForest(start_trees, data_learn, rate) for _ in range(30): results = classifier.estimate_class(data_test) good, bad = 0, 0 for i in range(len(true_values)): if results[i] == true_values[i]: good += 1 else: bad += 1 percentage.append(good/(good+bad)*100) file.write("{0};{1}%\n".format(num_of_trees, good / (good + bad) * 100)) for _ in range(5): classifier.add_new_tree() num_of_trees += 1 file.close()
def google_nominatim(DataLocation): df=DO.readfile(DataLocation) print df geolocator=geocoder.google('110 OTTER COVE TERRACE San Francisco') geolocator.latlng print geolocator.latlng transed=[] for x in df['Location']: transed.append(geocoder.google(x+" San Francisco", timeout=10).latlng) time.sleep(2) print transed nptransed=np.array(transed) transed_la = nptransed[:,0] transed_lo = nptransed[:,1] cood_price=zip(df['Price'],transed_la,transed_lo) df_cood_price=pd.DataFrame(data=cood_price, columns=['Price','Latitude','Longitude']) df_cood_price.to_csv(Constants.filelocations.GOOGLE_NOMINATIM_HOUSEPRICE, header= True, index=False) return
def imgClassify(inputImg): #加载训练好的模型 model = torch.load('resnet.pkl') #print(model) #固定模型参数 model.eval() #输入格式调整 BATCH_SIZE = 1 my_data = DataOperation.MyDataset(inputImg, transform=transforms.ToTensor()) my_loader = DataOperation.Data.DataLoader(dataset=my_data, batch_size=BATCH_SIZE) for batch_index, (test_x, test_y) in enumerate(my_loader): test_output = model(test_x) pred_y = torch.max(test_output, 1)[1].data.numpy() classfy = pred_y return classfy
def voronoiplot(df): Datalist=[] for i in df.index: #print i,[df['Latitude'][i],df['Longitude'][i]] Datalist.append([float(df['Latitude'][i]),float(df['Longitude'][i])] ) points = np.array(Datalist) #print points vor = Voronoi(points) #voronoi_plot_2d(vor) region,vertices=Vorplots.voronoi_finite_polygons_2d(vor) print len(region) geo_json,data_csv=DO.geojsonwrite(vor,region,vertices,df) print data_csv target = open(Constants.filelocations.VORONOI_GEOJSON, 'w') target.write(geo_json) target.close() data_csv.to_csv(Constants.filelocations.GEOJSON_CSV_DATA, header= True, index=False) return # LOCATION='/Users/Wuga/Documents/DATA/SFREHPDATA/HousingSales2012PL_GOOGLE.csv' # df=DO.readgeofile(LOCATION) # train,test,train_index,test_index=DO.dataseperator(df) # print [float(train['Latitude'][1]),float(train['Longitude'][1])] # voronoiplot(train)
for i in range(400): position=[random.random()*10,random.random()*10] mu1=[3,3] mu2=[7,7] radius1=5 radius2=3.5 if (InCirclue(position, mu1, radius1) and (not InCirclue(position, mu1, radius2))) or (InCirclue(position, mu2, radius1) and (not InCirclue(position, mu2, radius2))): data_with_noise.append([7+random.random()/2,position[0],position[1]]) else: data_with_noise.append([6+random.random()/2,position[0],position[1]]) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') colors = np.abs(np.array(data_with_noise)[:,0]) data=np.array(data_with_noise) print colors #for idx,data in enumerate(data_with_noise): # ax.scatter(data[1], data[2],data[0],c=colors[idx]*20) ax.plot_trisurf(data[:,1],data[:,2],data[:,0],cmap=cm.jet, linewidth=0.2) ax.set_xlabel('X Label') ax.set_ylabel('Y Label') ax.set_zlabel('Z Label') plt.show() for i in range(len(data_with_noise)): data_with_noise[i][0]='$'+str(data_with_noise[i][0]) df=pd.DataFrame(data_with_noise,columns=['Price','Latitude','Longitude']) print df DO.write(df,'../../sysnthetic.csv')
import torch.nn as nn import torch.optim as optim import numpy as np #myfunction import DataOperation use_cuda = torch.cuda.is_available() # Hyper Parameters EPOCH = 50 # train the training data n times, to save time, we just train 1 epoch BATCH_SIZE = 50 LR = 0.001 # learning rate # 根据自己定义的那个MyDataset来创建数据集!注意是数据集!而不是loader迭代器 train_data = DataOperation.MyDataset('./StampDB/', 'train.txt', transform=transforms.ToTensor()) test_data = DataOperation.MyDataset('./StampDB/', 'test.txt', transform=transforms.ToTensor()) # valid_data = DataOperation.MyDataset('./StampDB/', 'valid.txt', transform=transforms.ToTensor()) train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) test_loader = Data.DataLoader(dataset=test_data, batch_size=BATCH_SIZE) # valid_loader = Data.DataLoader(dataset=valid_data,batch_size=BATCH_SIZE) model = models.resnet50(pretrained=False) #if torch 0.4.2 adp = torch.nn.AdaptiveAvgPool2d(list(np.array([1, 1])))
import DataOperation import Setting from Model import autoencoder_A, autoencoder_B def save_model(modelA, modelB): modelA.save("./modelA.h5") modelB.save("./modelB.h5") if __name__ == '__main__': for epoch in range(Setting.epochs): print('Epoch {} ......'.format(epoch)) warped_A, target_A = DataOperation.get_training_data_A() warped_B, target_B = DataOperation.get_training_data_B() loss_A = autoencoder_A.train_on_batch(warped_A, target_A) loss_B = autoencoder_B.train_on_batch(warped_B, target_B) print("lossA:{},lossB:{}".format(loss_A, loss_B)) if epoch + 1 % 10 == 0: save_model(autoencoder_A, autoencoder_B)
import numpy as np import cv2 import DataOperation import Setting import matplotlib.pyplot as plt images_A_paths = DataOperation.get_image_paths(Setting.IMAGE_PATH_A) images_B_paths = DataOperation.get_image_paths(Setting.IMAGE_PATH_B) print('Number of images_A is {}, number of images_B is {}'.format( len(images_A_paths), len(images_B_paths))) A_Images = DataOperation.load_images(images_A_paths[:3]) B_Images = DataOperation.load_images(images_B_paths[:3]) figure = np.concatenate([A_Images, B_Images], axis=0) print(figure.shape) figure = figure.reshape((2, 3) + figure.shape[1:]) print(figure.shape) figure = DataOperation.stack_image(figure) print(figure.shape) plt.imshow(cv2.cvtColor(figure, cv2.COLOR_RGB2BGR)) plt.show()
import numpy as np import cv2 import matplotlib.pyplot as plt import keras from PixelShuffler import PixelShuffler import DataOperation if __name__ == '__main__': autoencoder_A = keras.models.load_model( './modelA.h5', custom_objects={'PixelShuffler': PixelShuffler}) autoencoder_B = keras.models.load_model( './modelB.h5', custom_objects={'PixelShuffler': PixelShuffler}) warped_A, target_A = DataOperation.get_training_data_A() warped_B, target_B = DataOperation.get_training_data_B() test_A = target_A[0:3] test_B = target_B[0:3] # 进行拼接 原图 A - 解码器 A 生成的图 - 解码器 B 生成的图 figure_A = np.stack([ test_A, autoencoder_A.predict(test_A), autoencoder_B.predict(test_A), ], axis=1) # 进行拼接 原图 B - 解码器 B 生成的图 - 解码器 A 生成的图 figure_B = np.stack([ test_B, autoencoder_B.predict(test_B),
''' Created on 09-23-2015 @author: Wuga ''' import folium import geocoder import DataOperation as DO import DataPreprocess as DP import Vorplots as V import Constants import pandas as pd g=geocoder.osm('dublin,ireland') loca=g.latlng print loca LOCATION=Constants.filelocations.DUBLIN_2010 df=DO.readgeofile(LOCATION) train,test,train_index,test_index=DO.dataseperator(df) map_osm = folium.Map(location=loca, zoom_start=9, max_zoom=18) train=DP.elim(train) train = train.reset_index(drop=True) V.voronoiplot(DP.elim(train)) map_osm.geo_json(geo_path=r'autovoronoi.json', data_out='/Users/Wuga/Documents/DATA/SFREHPDATA/pricedata.json',data=pd.read_csv('/Users/Wuga/Documents/DATA/SFREHPDATA/pricedata.csv'),columns=['Id','Price'],key_on='feature.id', threshold_scale=[200000,250000,300000,350000,400000,500000], fill_color='YlOrRd', fill_opacity=0.5, line_opacity=0.5, legend_name='SF house price') map_osm.create_map(path=Constants.filelocations.MAP_HTML)