예제 #1
0
def draw_point_map(spark):
    # file 0_5M_nyc_taxi_and_building.csv could be obtained from arctern-turoial warehouse under zilliztech account. The link on github is https://github.com/zilliztech/arctern-tutorial
    df = spark.read.format("csv").option("header", True).option(
        "delimiter", ","
    ).schema(
        "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string"
    ).load("file:///tmp/0_5M_nyc_taxi_and_building.csv").cache()
    df.show(20, False)
    df.createOrReplaceTempView("nyc_taxi")
    # df.createOrReplaceGlobalTempView("nyc_taxi")

    res = spark.sql(
        "select pickup_latitude as x, pickup_longitude as y  from nyc_taxi")
    res.printSchema()
    res.createOrReplaceTempView("pickup")

    register_funcs(spark)
    res = spark.sql(
        "select ST_Transform(ST_Point(x, y), 'EPSG:4326','EPSG:3857' ) as pickup_point from pickup"
    )
    res.show(20, False)
    res.createOrReplaceTempView("project")

    res = spark.sql(
        "select Projection(pickup_point, 'POINT (4534000 -12510000)', 'POINT (4538000 -12513000)', 1024, 896) as point from project"
    )
    res.show(20, False)

    vega_point_map = VegaCircle2d(1900, 1410, 3, "#2DEF4A", 0.5)
    vega = vega_point_map.build()
    res = pointmap(res, vega)
    save_png(res, '/tmp/pointmap.png')

    spark.sql("show tables").show()
    spark.catalog.dropGlobalTempView("nyc_taxi")
예제 #2
0
def draw_point_map(spark):
    # file 0_5M_nyc_build.csv is generated from New York taxi data and taxi zone shapefile. Data is available at the following URL: https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page
    df = spark.read.format("csv").option("header", True).option(
        "delimiter", ","
    ).schema(
        "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string"
    ).load(data_path).cache()
    df.show(20, False)
    df.createOrReplaceTempView("nyc_taxi")
    # df.createOrReplaceGlobalTempView("nyc_taxi")

    res = spark.sql(
        "select pickup_latitude as x, pickup_longitude as y  from nyc_taxi")
    res.printSchema()
    res.createOrReplaceTempView("pickup")

    register_funcs(spark)
    res = spark.sql(
        "select ST_Transform(ST_Point(x, y), 'EPSG:4326','EPSG:3857' ) as pickup_point from pickup"
    )
    res.show(20, False)
    res.createOrReplaceTempView("project")

    res = spark.sql(
        "select Projection(pickup_point, 'POINT (4534000 -12510000)', 'POINT (4538000 -12513000)', 1024, 896) as point from project"
    )
    res.show(20, False)

    vega_point_map = VegaCircle2d(1900, 1410, 3, "#2DEF4A", 0.5)
    vega = vega_point_map.build()
    res = pointmap(res, vega)
    save_png(res, '/tmp/pointmap.png')

    spark.sql("show tables").show()
    spark.catalog.dropGlobalTempView("nyc_taxi")
예제 #3
0
def draw_point_map(spark):
    start_time = time.time()
    # file 0_5M_nyc_taxi_and_building.csv could be obtained from arctern-turoial warehouse under zilliztech account. The link on github is https://github.com/zilliztech/arctern-tutorial
    df = spark.read.format("csv").option("header", True).option(
        "delimiter", ","
    ).schema(
        "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string"
    ).load("file:///tmp/0_5M_nyc_taxi_and_building.csv").cache()
    df.createOrReplaceTempView("nyc_taxi")

    register_funcs(spark)
    res = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))"
    )

    vega = vega_pointmap(
        1024,
        896,
        bounding_box=[-73.998427, 40.730309, -73.954348, 40.780816],
        point_size=3,
        point_color="#2DEF4A",
        opacity=0.5,
        coordinate_system="EPSG:4326")
    res = pointmap(vega, res)
    save_png(res, '/tmp/pointmap.png')

    spark.sql("show tables").show()
    spark.catalog.dropGlobalTempView("nyc_taxi")
    print("--- %s seconds ---" % (time.time() - start_time))
예제 #4
0
def draw_point_map(spark):
    # file 0_5M_nyc_build.csv is generated from New York taxi data and taxi zone shapefile. Data is available at the following URL: https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page
    df = spark.read.format("csv").option("header", True).option(
        "delimiter", ","
    ).schema(
        "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string"
    ).load(data_path).cache()
    df.show(20, False)
    df.createOrReplaceTempView("nyc_taxi")

    register_funcs(spark)
    res = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')"
    )

    vega = vega_pointmap(1024, 896,
                         [-73.998427, 40.730309, -73.954348, 40.780816], 3,
                         "#2DEF4A", 0.5, "EPSG:4326")
    res = pointmap(res, vega)
    save_png(res, '/tmp/pointmap.png')

    spark.sql("show tables").show()
    spark.catalog.dropGlobalTempView("nyc_taxi")
예제 #5
0
파일: service.py 프로젝트: quziyan/arctern
def db_query():
    """
    /db/query handler
    """
    log.INSTANCE.info('POST /db/query: {}'.format(request.json))

    if not utils.check_json(request.json, 'id') \
            or not utils.check_json(request.json, 'query') \
            or not utils.check_json(request.json['query'], 'type') \
            or not utils.check_json(request.json['query'], 'sql'):
        return jsonify(status='error', code=-1, message='query format error')

    query_sql = request.json['query']['sql']
    query_type = request.json['query']['type']

    content = {}
    content['sql'] = query_sql
    content['err'] = False

    db_instance = db.CENTER.get(str(request.json['id']), None)
    if db_instance is None:
        return jsonify(status="error",
                       code=-1,
                       message='there is no database whose id equal to ' +
                       str(request.json['id']))

    if query_type == 'sql':
        res = db_instance.run_for_json(query_sql)
        data = []
        for row in res:
            obj = json.loads(row)
            data.append(obj)
        content['result'] = data
    else:
        if not utils.check_json(request.json['query'], 'params'):
            return jsonify(status='error',
                           code=-1,
                           message='query format error')
        query_params = request.json['query']['params']

        res = db_instance.run(query_sql)

        if query_type == 'point':
            vega = vega_pointmap(int(query_params['width']),
                                 int(query_params['height']),
                                 query_params['point']['bounding_box'],
                                 int(query_params['point']['point_size']),
                                 query_params['point']['point_color'],
                                 float(query_params['point']['opacity']),
                                 query_params['point']['coordinate_system'])
            data = pointmap(vega, res)
            content['result'] = data
        elif query_type == 'heat':
            vega = vega_heatmap(int(query_params['width']),
                                int(query_params['height']),
                                query_params['heat']['bounding_box'],
                                float(query_params['heat']['map_zoom_level']),
                                query_params['heat']['coordinate_system'],
                                query_params['heat']['aggregation_type'])
            data = heatmap(vega, res)
            content['result'] = data
        elif query_type == 'choropleth':
            vega = vega_choroplethmap(
                int(query_params['width']), int(query_params['height']),
                query_params['choropleth']['bounding_box'],
                query_params['choropleth']['color_gradient'],
                query_params['choropleth']['color_bound'],
                float(query_params['choropleth']['opacity']),
                query_params['choropleth']['coordinate_system'],
                query_params['choropleth']['aggregation_type'])
            data = choroplethmap(vega, res)
            content['result'] = data
        elif query_type == 'weighted':
            vega = vega_weighted_pointmap(
                int(query_params['width']), int(query_params['height']),
                query_params['weighted']['bounding_box'],
                query_params['weighted']['color_gradient'],
                query_params['weighted']['color_bound'],
                query_params['weighted']['size_bound'],
                float(query_params['weighted']['opacity']),
                query_params['weighted']['coordinate_system'])
            data = weighted_pointmap(vega, res)
            content['result'] = data
        elif query_type == 'icon':
            vega = vega_icon(int(query_params['width']),
                             int(query_params['height']),
                             query_params['icon']['bounding_box'],
                             query_params['icon']['icon_path'],
                             query_params['icon']['coordinate_system'])
            data = icon_viz(vega, res)
            content['result'] = data
        else:
            return jsonify(status="error",
                           code=-1,
                           message='{} not support'.format(query_type))

    return jsonify(status="success", code=200, data=content)
예제 #6
0
def db_query():
    """
    /db/query handler
    """
    if not utils.check_json(request.json, 'id') \
            or not utils.check_json(request.json, 'query') \
            or not utils.check_json(request.json['query'], 'type') \
            or not utils.check_json(request.json['query'], 'sql'):
        return jsonify(status='error', code=-1, message='query format error')

    query_sql = request.json['query']['sql']
    query_type = request.json['query']['type']

    content = {}
    content['sql'] = query_sql
    content['err'] = False

    if query_type == 'sql':
        res = spark.Spark.run_for_json(query_sql)
        data = []
        for row in res:
            obj = json.loads(row)
            data.append(obj)
        content['result'] = data
    else:
        if not utils.check_json(request.json['query'], 'params'):
            return jsonify(status='error',
                           code=-1,
                           message='query format error')
        query_params = request.json['query']['params']

        res = spark.Spark.run(query_sql)

        if query_type == 'point':
            vega = vega_pointmap(int(query_params['width']),
                                 int(query_params['height']),
                                 query_params['point']['bounding_box'],
                                 int(query_params['point']['stroke_width']),
                                 query_params['point']['stroke'],
                                 float(query_params['point']['opacity']),
                                 query_params['point']['coordinate'])
            data = pointmap(res, vega)
            content['result'] = data
        elif query_type == 'heat':
            vega = vega_heatmap(int(query_params['width']),
                                int(query_params['height']),
                                float(query_params['heat']['map_scale']),
                                query_params['heat']['bounding_box'],
                                query_params['heat']['coordinate'])
            data = heatmap(res, vega)
            content['result'] = data
        elif query_type == 'choropleth':
            vega = vega_choroplethmap(
                int(query_params['width']), int(query_params['height']),
                query_params['choropleth']['bounding_box'],
                query_params['choropleth']['color_style'],
                query_params['choropleth']['rule'],
                float(query_params['choropleth']['opacity']),
                query_params['choropleth']['coordinate'])
            data = choroplethmap(res, vega)
            content['result'] = data
        else:
            return jsonify(status="error",
                           code=-1,
                           message='{} not support'.format(query_type))
    return jsonify(status="success", code=200, data=content)
예제 #7
0
pos2 = (-73.945155, 40.783434)
limit_num = 200

# 绘制图层

pickup_sql = f"select st_point(pickup_longitude, pickup_latitude) as point from nyc_taxi where (pickup_longitude between {pos1[0]} and {pos2[0]}) and (pickup_latitude between {pos1[1]} and {pos2[1]}) limit {limit_num}"
pickup_df = spark.sql(pickup_sql)
# 根据查询结果绘制点图图层。点大小为 10,点颜色为 #2DEF4A,点不透明度为 1.0。
vega = vega_pointmap(1024,
                     384,
                     bounding_box=[pos1[0], pos1[1], pos2[0], pos2[1]],
                     point_size=10,
                     point_color="#2DEF4A",
                     opacity=1,
                     coordinate_system="EPSG:4326")
res = pointmap(vega, pickup_df)
save_png(res, '/tmp/arctern_pointmap.png')

# 在指定地理区域(经度范围:-73.991504 至 -73.945155;纬度范围:40.770759 至 40.783434)中随机选取 200 个坐标点,并将 fare_amount 作为颜色权重、total_amount 作为大小权重。
pickup_sql = f"select st_point(pickup_longitude, pickup_latitude) as point, fare_amount as color_weight, total_amount as size_weight from nyc_taxi where (pickup_longitude between {pos1[0]} and {pos2[0]}) and (pickup_latitude between {pos1[1]} and {pos2[1]}) limit {limit_num}"
pickup_df = spark.sql(pickup_sql)
# 根据查询结果绘制带权点图图层。点的颜色根据 color_weight 在 "#115f9a" ~ "#d0f400" 之间变化,点的大小根据 size_weight 在 3 ~ 15 之间变化。
vega = vega_weighted_pointmap(
    1024,
    384,
    bounding_box=[pos1[0], pos1[1], pos2[0], pos2[1]],
    color_gradient=["#115f9a", "#d0f400"],
    color_bound=[1, 50],
    size_bound=[3, 15],
    opacity=1.0,
    coordinate_system="EPSG:4326")
예제 #8
0
def run_test_point_map(spark):
    # file 0_5M_nyc_taxi_and_building.csv could be obtained from arctern-turoial warehouse under zilliztech account. The link on github is https://github.com/zilliztech/arctern-tutorial
    # file 0_10000_nyc_taxi_and_building.csv is from file 0_5M_nyc_taxi_and_building.csv first 10000 lines
    df = spark.read.format("csv").option("header", True).option("delimiter", ",").schema(
        "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, "
        "trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, "
        "dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, "
        "buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string").load(
        file_path).cache()
    df.createOrReplaceTempView("nyc_taxi")

    register_funcs(spark)
    res = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')")

    # 1 size:1024*896, point_size: 3, opacity: 0.5, color: #2DEF4A(green)
    vega_1 = vega_pointmap(1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], 3, "#2DEF4A", 0.5, "EPSG:4326")
    baseline1 = pointmap(res, vega_1)
    point_map1_1 = pointmap(res, vega_1)
    point_map1_2 = pointmap(res, vega_1)

    baseline_png1 = png_path + "point_map_nyc_1.png"
    save_png(baseline1, baseline_png1)
    save_png(point_map1_1, png_path + "test_point_map_nyc_1-1.png")
    save_png(point_map1_2, png_path + "test_point_map_nyc_1-2.png")

    # 2 #F50404(red)
    vega_2 = vega_pointmap(1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], 5, "#F50404", 0.5, "EPSG:4326")
    baseline2 = pointmap(res, vega_2)
    point_map2_1 = pointmap(res, vega_2)
    point_map2_2 = pointmap(res, vega_2)

    baseline_png2 = png_path + "point_map_nyc_2.png"
    save_png(baseline2, baseline_png2)
    save_png(point_map2_1, png_path + "test_point_map_nyc_2-1.png")
    save_png(point_map2_2, png_path + "test_point_map_nyc_2-2.png")

    # 3 color: #1455EE(blue)
    vega_3 = vega_pointmap(1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], 5, "#1455EE", 0.5, "EPSG:4326")
    baseline3 = pointmap(res, vega_3)
    point_map3_1 = pointmap(res, vega_3)
    point_map3_2 = pointmap(res, vega_3)

    baseline_png3 = png_path + "point_map_nyc_3.png"
    save_png(baseline3, baseline_png3)
    save_png(point_map3_1, png_path + "test_point_map_nyc_3-1.png")
    save_png(point_map3_2, png_path + "test_point_map_nyc_3-2.png")

    # 4 size:1024*896, point_size: 3, opacity: 1, color: #2DEF4A
    vega_4 = vega_pointmap(1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], 3, "#2DEF4A", 1.0, "EPSG:4326")
    baseline4 = pointmap(res, vega_4)
    point_map4_1 = pointmap(res, vega_4)
    point_map4_2 = pointmap(res, vega_4)

    baseline_png4 = png_path + "point_map_nyc_4.png"
    save_png(baseline4, baseline_png4)
    save_png(point_map4_1, png_path + "test_point_map_nyc_4-1.png")
    save_png(point_map4_2, png_path + "test_point_map_nyc_4-2.png")

    # 5 size:1024*896, point_size: 3, opacity: 0, color: #2DEF4A
    vega_5 = vega_pointmap(1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], 3, "#2DEF4A", 0.0, "EPSG:4326")
    baseline5 = pointmap(res, vega_5)
    point_map5_1 = pointmap(res, vega_5)
    point_map5_2 = pointmap(res, vega_5)

    baseline_png5 = png_path + "point_map_nyc_5.png"
    save_png(baseline5, baseline_png5)
    save_png(point_map5_1, png_path + "test_point_map_nyc_5-1.png")
    save_png(point_map5_2, png_path + "test_point_map_nyc_5-2.png")

    # 6 size:200*200, point_size: 3, opacity: 0.5, color: #2DEF4A
    vega_6 = vega_pointmap(200, 200, [-73.998427, 40.730309, -73.954348, 40.780816], 3, "#2DEF4A", 0.5, "EPSG:4326")
    baseline6 = pointmap(res, vega_6)
    point_map6_1 = pointmap(res, vega_6)
    point_map6_2 = pointmap(res, vega_6)

    baseline_png6 = png_path + "point_map_nyc_6.png"
    save_png(baseline6, baseline_png6)
    save_png(point_map6_1, png_path + "test_point_map_nyc_6-1.png")
    save_png(point_map6_2, png_path + "test_point_map_nyc_6-2.png")

    spark.catalog.dropGlobalTempView("nyc_taxi")

    assert run_diff_png(baseline_png1, png_path + "test_point_map_nyc_1-1.png")
    assert run_diff_png(baseline_png1, png_path + "test_point_map_nyc_1-2.png")
    assert run_diff_png(baseline_png2, png_path + "test_point_map_nyc_2-1.png")
    assert run_diff_png(baseline_png2, png_path + "test_point_map_nyc_2-2.png")
    assert run_diff_png(baseline_png3, png_path + "test_point_map_nyc_3-1.png")
    assert run_diff_png(baseline_png3, png_path + "test_point_map_nyc_3-2.png")
    assert run_diff_png(baseline_png4, png_path + "test_point_map_nyc_4-1.png")
    assert run_diff_png(baseline_png4, png_path + "test_point_map_nyc_4-2.png")
    assert run_diff_png(baseline_png5, png_path + "test_point_map_nyc_5-1.png")
    assert run_diff_png(baseline_png5, png_path + "test_point_map_nyc_5-2.png")
    assert run_diff_png(baseline_png6, png_path + "test_point_map_nyc_6-1.png")
    assert run_diff_png(baseline_png6, png_path + "test_point_map_nyc_6-2.png")