def countStatistics(): # 读取数据文件 my_file = ReadCsv("directory.csv").readCsv() # 清洗数据-处理缺省值 my_file = my_file.fillna("None") timeZone = my_file["Timezone"] # 处理重复数据并排序 timeZoneSort = sorted(set(timeZone)) # 根据 TImeZone 重新排列 group_by_time_zone = my_file.groupby(timeZone) # 计算相同时区出现的次数 time_zone_count = dict(group_by_time_zone["Timezone"].value_counts()) # 计算星巴克的总数 store_sum = sum(time_zone_count.values()) # 每个时区的星巴克的比例 rate_text = [(value / store_sum) for value in time_zone_count.values()] length = len(timeZoneSort) values_list = list(time_zone_count.values()) # 把时区的相关信息写入文件 with open("timeZone.csv", "w") as csvFile: writer = csv.writer(csvFile) writer.writerow(["Timezone", "Amount", "Rate"]) for x in range(length): writer.writerow([timeZoneSort[x], values_list[x], rate_text[x]]) file = ReadCsv("timeZone.csv").readCsv() table = FF.create_table(file) py.offline.plot(table, filename='countStatistics.html')
def timeZoneDistribution(): # 读取文件 file = ReadCsv("directory.csv") my_file = file.readCsv() lat, lon, store_number, store_name, address, postcode, phone_number = file.getCsvData( ) time_zone = my_file["Timezone"] # 显示文本 my_text = ("StoreNum: " + store_number + '</br>' + "store_name: " + store_name + '</br>' + "StreetAddress: " + address + '</br>' + "Postcode: " + postcode + '</br>' + "phone_number: " + phone_number + '</br>') # 添加数据到源文件 my_file['Text'] = my_text # 处理重复数据 time_zone_set = set(time_zone) # 根据 TimeZone 重新排列文件 group_by_time_zone = my_file.groupby(time_zone) # 存放Scattermapbox messages = [] for sub_time_zone in time_zone_set: new_time_zone_group = group_by_time_zone.get_group(sub_time_zone) data_temp = setLoopData(new_time_zone_group, sub_time_zone, randomColor()) messages.append(data_temp) data = Data(messages) layout = setLoopDataLayout(title="Timezone Distribution Map") fig = dict(data=data, layout=layout) py.offline.plot(fig, validate=False, filename='timeZoneDistribution.html')
def init(): data = ReadCsv("directory.csv").readCsv() grouped = data.groupby('Country').size() # series 类型,国家星巴克数量 # 创建关于国家名字数组 country = [] return country, grouped
def readFile(): file = ReadCsv("directory.csv") lat, lon, store_number, store_name, address, postcode, phone_number = file.getCsvData() list_semblance = [0] * len(lat) list_datas = [" "] * len(lat) result_list = [[a, b, c, d, e, f, g, h, i] for a, b, c, d, e, f, g, h, i in zip(lon, lat, store_number, store_name, address, postcode, phone_number, list_semblance, list_datas)] return result_list
def solveKMapData(): file = ReadCsv("directory.csv") lat, lon, store_number, store_name, store_address, postcode, phone_number = file.getCsvData( ) datas = [] i = 0 while i <= lat.__len__() - 1: c = store_number[i] d = store_name[i] f = store_address[i] g = postcode[i] h = phone_number[i] data = 'store_number:' + c + '</br>' + 'store_name:' + d + '</br>' + 'Postcode:' + g + '</br>' + 'phone_number:' + h + '</br>' + 'store_address:' + f datas.append(data) i = i + 1 return lat, lon, datas
def radiusTimeChange(la, lo): # 按照距离筛选数据 #读取表格数据 file = ReadCsv("directory.csv") lat, lon, store_number, store_name, store_address, postcode, phone_number = file.getCsvData( ) result_list = [[ a, b, c, d, e, f, g ] for a, b, c, d, e, f, g in zip(lat, lon, store_number, store_name, store_address, postcode, phone_number)] rlist = [1, 20, 100, 500, 2000, 10000, 40076] runTime = [] for i in rlist: start = time.clock() filter(la, lo, i, result_list) end = time.clock() runTime.append(end - start) trace = go.Scatter(x=rlist, y=runTime, name='RunTime Trace', line=dict(color=('rgb(22, 96, 167)'), width=4, dash='dot')) data = [trace] # Edit the layout layout = dict( title='随着r的增长查询时延的变化', xaxis=dict(title='r'), yaxis=dict(title='查询时延'), ) fig = dict(data=data, layout=layout) py.plot(fig, filename='styled-line')
def districtAmount(): # 把两位国家代码转换成三位代码 with open("CountryTwoLettersToThree.pickle", "rb") as file: three_country_code = pickle.load(file) from Configuration.ReadCsv import ReadCsv my_file = ReadCsv('directory.csv').readCsv() # 计算相同国家的次数 country_count = dict(my_file["Country"].value_counts()) # 存放每个国家的星巴克数量 country_num = [country_count[key] for key in country_count] # 转换国家代码 country_of_three_lettters_list = [ three_country_code[key] for key in country_count ] data = [ dict( type='choropleth', Colorscale=[[0, "rgb (255, 255,255)"], [0.3, "rgb(255,80,80)"], [0.35, "rgb(255, 160,160) "], [1, "rgb(255, 0,0) "]], z=country_num, reversescale=False, autocolorscale=False, locations=country_of_three_lettters_list, locationmode="ISO-3", colorbar=dict(autotick=False, title='星巴克<br>商店数量'), ) ] layout = dict( title="National Distribution Map", autosize=True, hovemode='closest', mapbox=dict( accesstoken= "pk.eyJ1IjoibW9oYWlsYW5nIiwiYSI6ImNqZm93cGs5bDF3OXMyeG1zdGhuejBoNTIifQ.fouiU5hKtls0ohPA7LHJEA", bearing=0, pitch=0, zoom=1, )) fig = dict(data=data, layout=layout) py.offline.plot(fig, validate=False, filename='distrinctAmount.html')
def topKSearch(latitude, longitude, k): file = ReadCsv("directory.csv") lat, lon, store_number, store_name, address, postcode, phone_number = file.getCsvData( ) # 初始半径 initial_x = 5 initial_y = 5 list_x = [10] * len(lon) list_y = [5] * len(lat) result_list = [[a, b, c, d, e, f, g, h, i] for a, b, c, d, e, f, g, h, i in zip( lon, lat, store_number, store_name, address, postcode, phone_number, list_x, list_y)] # 输入数据 input_x = latitude input_y = longitude input_k = k def haveRSine(x): # 经度1,纬度1,经度2,纬度2 (十进制度数) # 将十进制度数转化为弧度 lon1, lat1, lon2, lat2 = map(radians, [x[0], x[1], input_x, input_y]) # haversine公式 d_lon = lon2 - lon1 d_lat = lat2 - lat1 a = sin(d_lat / 2)**2 + cos(lat1) * cos(lat2) * sin(d_lon / 2)**2 c = 2 * asin(sqrt(a)) r = 6371 # 地球平均半径,单位为公里 return c * r * 1000 # 处理筛选特殊情况 def sovleFilter(x): if input_x + x[-2] > 180: return input_y - x[-1] <= x[1] <= input_y + x[-1] and (input_x - x[-2]) < x[0] < 180 or \ -180 < x[0] < -(180 - (input_x + x[-2] - 180)) elif input_x - x[-2] < -180: return input_y - x[-1] <= x[1] <= input_y + x[-1] and ( -180 < x[0] < (x[-2] + input_x) or (180 - abs( (input_x - x[-2] + 180)) < x[0] < -180)) return input_y - x[-1] <= x[1] <= input_y + x[-1] and input_x - x[ -2] <= x[0] <= input_x + x[-2] # 按照距离筛选数据 temp_list = result_list def new_filter(x1, y1): for x in temp_list: x[-2] = x1 x[-1] = y1 data = list(filter(sovleFilter, temp_list)) return data result = [] #存储所有符合条件的店铺信息 # 根据搜索到的数量逐步扩大半径 start = time.clock() while result.__len__() < input_k: result = new_filter(initial_x, initial_y) # 增大半径 initial_x += 5 initial_y += 5 final_result = sorted(result, key=haveRSine)[:input_k] # 取最近的前k项为最终结果 lat_result = [] #存储符合要求的纬度信息 lon_result = [] #存储符合要求的经度信息 for x in final_result: lon_result.append(x[0]) lat_result.append(x[1]) end = time.clock() return lon_result, lat_result, final_result, end - start
def timeZoneMap(): file = ReadCsv("directory.csv") df = file.readCsv() drawTimezoneMap(df, "draw")