コード例 #1
0
def countStatistics():
    # 读取数据文件
    my_file = ReadCsv("directory.csv").readCsv()
    # 清洗数据-处理缺省值
    my_file = my_file.fillna("None")
    timeZone = my_file["Timezone"]
    # 处理重复数据并排序
    timeZoneSort = sorted(set(timeZone))
    # 根据 TImeZone 重新排列
    group_by_time_zone = my_file.groupby(timeZone)
    # 计算相同时区出现的次数
    time_zone_count = dict(group_by_time_zone["Timezone"].value_counts())
    # 计算星巴克的总数
    store_sum = sum(time_zone_count.values())
    # 每个时区的星巴克的比例
    rate_text = [(value / store_sum) for value in time_zone_count.values()]

    length = len(timeZoneSort)
    values_list = list(time_zone_count.values())
    # 把时区的相关信息写入文件
    with open("timeZone.csv", "w") as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow(["Timezone", "Amount", "Rate"])
        for x in range(length):
            writer.writerow([timeZoneSort[x], values_list[x], rate_text[x]])
    file = ReadCsv("timeZone.csv").readCsv()
    table = FF.create_table(file)
    py.offline.plot(table, filename='countStatistics.html')
コード例 #2
0
def timeZoneDistribution():
    # 读取文件

    file = ReadCsv("directory.csv")
    my_file = file.readCsv()
    lat, lon, store_number, store_name, address, postcode, phone_number = file.getCsvData(
    )
    time_zone = my_file["Timezone"]
    # 显示文本
    my_text = ("StoreNum: " + store_number + '</br>' + "store_name: " +
               store_name + '</br>' + "StreetAddress: " + address + '</br>' +
               "Postcode: " + postcode + '</br>' + "phone_number: " +
               phone_number + '</br>')
    # 添加数据到源文件
    my_file['Text'] = my_text
    # 处理重复数据
    time_zone_set = set(time_zone)
    # 根据 TimeZone 重新排列文件
    group_by_time_zone = my_file.groupby(time_zone)

    # 存放Scattermapbox
    messages = []
    for sub_time_zone in time_zone_set:
        new_time_zone_group = group_by_time_zone.get_group(sub_time_zone)
        data_temp = setLoopData(new_time_zone_group, sub_time_zone,
                                randomColor())
        messages.append(data_temp)
    data = Data(messages)
    layout = setLoopDataLayout(title="Timezone Distribution Map")

    fig = dict(data=data, layout=layout)
    py.offline.plot(fig, validate=False, filename='timeZoneDistribution.html')
コード例 #3
0
def init():
    data = ReadCsv("directory.csv").readCsv()
    grouped = data.groupby('Country').size()  # series 类型,国家星巴克数量

    # 创建关于国家名字数组
    country = []
    return country, grouped
コード例 #4
0
 def readFile():
     file = ReadCsv("directory.csv")
     lat, lon, store_number, store_name, address, postcode, phone_number = file.getCsvData()
     list_semblance = [0] * len(lat)
     list_datas = [" "] * len(lat)
     result_list = [[a, b, c, d, e, f, g, h, i] for a, b, c, d, e, f, g, h, i in
                    zip(lon, lat, store_number, store_name, address, postcode, phone_number, list_semblance,
                        list_datas)]
     return result_list
コード例 #5
0
def solveKMapData():
    file = ReadCsv("directory.csv")
    lat, lon, store_number, store_name, store_address, postcode, phone_number = file.getCsvData(
    )
    datas = []
    i = 0
    while i <= lat.__len__() - 1:
        c = store_number[i]
        d = store_name[i]
        f = store_address[i]
        g = postcode[i]
        h = phone_number[i]
        data = 'store_number:' + c + '</br>' + 'store_name:' + d + '</br>' + 'Postcode:' + g + '</br>' + 'phone_number:' + h + '</br>' + 'store_address:' + f
        datas.append(data)
        i = i + 1
    return lat, lon, datas
コード例 #6
0
def radiusTimeChange(la, lo):
    # 按照距离筛选数据

    #读取表格数据
    file = ReadCsv("directory.csv")
    lat, lon, store_number, store_name, store_address, postcode, phone_number = file.getCsvData(
    )

    result_list = [[
        a, b, c, d, e, f, g
    ] for a, b, c, d, e, f, g in zip(lat, lon, store_number, store_name,
                                     store_address, postcode, phone_number)]

    rlist = [1, 20, 100, 500, 2000, 10000, 40076]
    runTime = []
    for i in rlist:
        start = time.clock()
        filter(la, lo, i, result_list)
        end = time.clock()
        runTime.append(end - start)

    trace = go.Scatter(x=rlist,
                       y=runTime,
                       name='RunTime Trace',
                       line=dict(color=('rgb(22, 96, 167)'),
                                 width=4,
                                 dash='dot'))
    data = [trace]

    # Edit the layout
    layout = dict(
        title='随着r的增长查询时延的变化',
        xaxis=dict(title='r'),
        yaxis=dict(title='查询时延'),
    )

    fig = dict(data=data, layout=layout)
    py.plot(fig, filename='styled-line')
コード例 #7
0
def districtAmount():
    # 把两位国家代码转换成三位代码
    with open("CountryTwoLettersToThree.pickle", "rb") as file:
        three_country_code = pickle.load(file)
    from Configuration.ReadCsv import ReadCsv
    my_file = ReadCsv('directory.csv').readCsv()
    # 计算相同国家的次数
    country_count = dict(my_file["Country"].value_counts())
    # 存放每个国家的星巴克数量
    country_num = [country_count[key] for key in country_count]
    # 转换国家代码
    country_of_three_lettters_list = [
        three_country_code[key] for key in country_count
    ]

    data = [
        dict(
            type='choropleth',
            Colorscale=[[0, "rgb (255, 255,255)"], [0.3, "rgb(255,80,80)"],
                        [0.35, "rgb(255, 160,160) "], [1, "rgb(255, 0,0) "]],
            z=country_num,
            reversescale=False,
            autocolorscale=False,
            locations=country_of_three_lettters_list,
            locationmode="ISO-3",
            colorbar=dict(autotick=False, title='星巴克<br>商店数量'),
        )
    ]

    layout = dict(
        title="National Distribution Map",
        autosize=True,
        hovemode='closest',
        mapbox=dict(
            accesstoken=
            "pk.eyJ1IjoibW9oYWlsYW5nIiwiYSI6ImNqZm93cGs5bDF3OXMyeG1zdGhuejBoNTIifQ.fouiU5hKtls0ohPA7LHJEA",
            bearing=0,
            pitch=0,
            zoom=1,
        ))

    fig = dict(data=data, layout=layout)
    py.offline.plot(fig, validate=False, filename='distrinctAmount.html')
コード例 #8
0
def topKSearch(latitude, longitude, k):
    file = ReadCsv("directory.csv")
    lat, lon, store_number, store_name, address, postcode, phone_number = file.getCsvData(
    )
    # 初始半径
    initial_x = 5
    initial_y = 5
    list_x = [10] * len(lon)
    list_y = [5] * len(lat)
    result_list = [[a, b, c, d, e, f, g, h, i]
                   for a, b, c, d, e, f, g, h, i in zip(
                       lon, lat, store_number, store_name, address, postcode,
                       phone_number, list_x, list_y)]

    # 输入数据
    input_x = latitude
    input_y = longitude
    input_k = k

    def haveRSine(x):  # 经度1,纬度1,经度2,纬度2 (十进制度数)
        # 将十进制度数转化为弧度
        lon1, lat1, lon2, lat2 = map(radians, [x[0], x[1], input_x, input_y])

        # haversine公式
        d_lon = lon2 - lon1
        d_lat = lat2 - lat1
        a = sin(d_lat / 2)**2 + cos(lat1) * cos(lat2) * sin(d_lon / 2)**2
        c = 2 * asin(sqrt(a))
        r = 6371  # 地球平均半径,单位为公里
        return c * r * 1000

    # 处理筛选特殊情况
    def sovleFilter(x):
        if input_x + x[-2] > 180:
            return input_y - x[-1] <= x[1] <= input_y + x[-1] and (input_x - x[-2]) < x[0] < 180 or \
                   -180 < x[0] < -(180 - (input_x + x[-2] - 180))
        elif input_x - x[-2] < -180:
            return input_y - x[-1] <= x[1] <= input_y + x[-1] and (
                -180 < x[0] < (x[-2] + input_x) or (180 - abs(
                    (input_x - x[-2] + 180)) < x[0] < -180))
        return input_y - x[-1] <= x[1] <= input_y + x[-1] and input_x - x[
            -2] <= x[0] <= input_x + x[-2]

    # 按照距离筛选数据
    temp_list = result_list

    def new_filter(x1, y1):
        for x in temp_list:
            x[-2] = x1
            x[-1] = y1
        data = list(filter(sovleFilter, temp_list))
        return data

    result = []  #存储所有符合条件的店铺信息

    # 根据搜索到的数量逐步扩大半径
    start = time.clock()
    while result.__len__() < input_k:
        result = new_filter(initial_x, initial_y)
        # 增大半径
        initial_x += 5
        initial_y += 5

    final_result = sorted(result, key=haveRSine)[:input_k]  # 取最近的前k项为最终结果
    lat_result = []  #存储符合要求的纬度信息
    lon_result = []  #存储符合要求的经度信息

    for x in final_result:
        lon_result.append(x[0])
        lat_result.append(x[1])
    end = time.clock()
    return lon_result, lat_result, final_result, end - start
コード例 #9
0
def timeZoneMap():
    file = ReadCsv("directory.csv")
    df = file.readCsv()
    drawTimezoneMap(df, "draw")