def date_munge():
  crimes_path = pyunit_utils.locate("smalldata/chicago/chicagoCrimes10k.csv.zip")
  # crimes_path = "smalldata/chicago/chicagoCrimes10k.csv.zip"

  tmps0 = pyunit_utils.temp_ctr() # Expected 0
  rest0 = pyunit_utils.rest_ctr() # Expected 0

  # /3/ImportFiles
  # /3/ParseSetup
  # /3/Parse
  crimes = h2o.import_file(path=crimes_path)

  # /3/Frames/chicagoCrimes10k.hex - head 10 rows, basic stats
  # /99/Rapids, parms: {ast=(tmp= py_1 (:= chicagoCrimes10k.hex (as.Date (cols_py chicagoCrimes10k.hex "Date") "%m/%d/%Y %I:%M:%S %p") 2 []))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=chicagoCrimes10k.hex}
  # /3/Frames/py_1, route: /3/Frames/(?<frameid>.*), parms: {frame_id=py_1, row_count=10}
  crimes["Date"]      = crimes["Date"].as_date("%m/%d/%Y %I:%M:%S %p")

  # /99/Rapids, parms: {ast=(tmp= py_2 (append py_1 (day (cols_py py_1 "Date")) "Day"))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=py_1}
  # /3/Frames/(?<frameid>.*), parms: {frame_id=py_2, row_count=10}
  crimes["Day"]       = crimes["Date"].day()

  # /99/Rapids, parms: {ast=(tmp= py_3 (append py_2 (+ (month (cols_py py_2 "Date")) 1) "Month"))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=py_2}
  # /3/Frames/(?<frameid>.*), parms: {frame_id=py_3, row_count=10}
  # /99/Rapids, parms: {ast=(tmp= py_4 (:= py_3 (+ (year (cols_py py_3 "Date")) 1900) 17 []))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=py_3}
  # /3/Frames/(?<frameid>.*), parms: {frame_id=py_4, row_count=10}
  # /99/Rapids, parms: {ast=(tmp= py_5 (append py_4 (week (cols_py py_4 "Date")) "WeekNum"))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=py_4}
  # /3/Frames/(?<frameid>.*), parms: {frame_id=py_5, row_count=10}
  # /99/Rapids, parms: {ast=(tmp= py_6 (append py_5 (dayOfWeek (cols_py py_5 "Date")) "WeekDay"))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=py_5}
  # /3/Frames/py_6, route: {frame_id=py_6, row_count=10}
  # /99/Rapids(append py_6 (hour (cols_py py_6 "Date")) "HourOfDay"))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=py_6}
  # /3/Frames/(?<frameid>.*), parms: {frame_id=py_7, row_count=10}
  crimes["Month"]     = crimes["Date"].month() + 1    # Since H2O indexes from 0
  crimes["Year"]      = crimes["Date"].year() + 1900  # Start of epoch is 1900
  crimes["WeekNum"]   = crimes["Date"].week()
  crimes["WeekDay"]   = crimes["Date"].dayOfWeek()
  crimes["HourOfDay"] = crimes["Date"].hour()

  # /99/Rapids, parms: {ast=(tmp= py_8 (append py_7 (| (== (cols_py py_7 "WeekDay") "Sun") (== (cols_py py_7 "WeekDay") "Sat")) "Weekend"))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=py_7}
  # /3/Frames/(?<frameid>.*), parms: {frame_id=py_8, row_count=10}
  crimes["Weekend"] = (crimes["WeekDay"] == "Sun") | (crimes["WeekDay"] == "Sat")

  # /99/Rapids, parms: {ast=(tmp= py_9 (append py_8 (cut (cols_py py_8 "Month") [0 2 5 7 10 12] ["Winter" "Spring" "Summer" "Autumn" "Winter"] FALSE TRUE 3) "Season"))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=py_8}
  # /3/Frames/(?<frameid>.*), parms: {frame_id=py_9, row_count=10}
  crimes["Season"]  = crimes["Month"].cut([0, 2, 5, 7, 10, 12], ["Winter", "Spring", "Summer", "Autumn", "Winter"])

  # /99/Rapids, parms: {ast=(tmp= py_10 (cols py_9 -3))}
  # DELETE /3/DKV/(?<key>.*), parms: {key=py_9}
  # /3/Frames/(?<frameid>.*), parms: {frame_id=py_10, row_count=10}
  crimes = crimes.drop("Date")

  crimes.describe()

  # DELETE /3/DKV/(?<key>.*), parms: {key=py_10}

  tmps1 = pyunit_utils.temp_ctr(); ntmps = tmps1-tmps0
  rest1 = pyunit_utils.rest_ctr(); nrest = rest1-rest0
  print(("Number of temps used: ",ntmps))
  print(("Number of RESTs used: ",nrest))
  assert ntmps <= 15
  assert nrest <= 20
Beispiel #2
0
def date_munge():
    crimes_path = pyunit_utils.locate("smalldata/chicago/chicagoCrimes10k.csv.zip")
    # crimes_path = "smalldata/chicago/chicagoCrimes10k.csv.zip"

    hc = h2o.connection()
    tmps0 = pyunit_utils.temp_ctr()

    # GET /3/ImportFiles
    # POST /3/ParseSetup
    # POST /3/Parse
    # GET /3/Job/{job_id}  (multiple times)
    # GET /3/Frames/crimes
    crimes = h2o.import_file(path=crimes_path, destination_frame="crimes")

    rest1 = hc.requests_count

    crimes["Day"] = crimes["Date"].day()
    crimes["Month"] = crimes["Date"].month() + 1    # Since H2O indexes from 0
    crimes["Year"] = crimes["Date"].year() + 1900  # Start of epoch is 1900
    crimes["WeekNum"] = crimes["Date"].week()
    crimes["WeekDay"] = crimes["Date"].dayOfWeek()
    crimes["HourOfDay"] = crimes["Date"].hour()
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    crimes["Weekend"] = (crimes["WeekDay"] == "Sun") | (crimes["WeekDay"] == "Sat")
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    crimes["Season"] = crimes["Month"].cut([0, 2, 5, 7, 10, 12], ["Winter", "Spring", "Summer", "Autumn", "Winter"])
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    crimes = crimes.drop("Date")
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    # POST /4/sessions
    # POST /99/Rapids  {ast:(tmp= py8 (cols (append
    #                        (tmp= py7 (append
    #                         (tmp= py6 (append
    #                          (tmp= py5 (append
    #                           (tmp= py4 (append
    #                            (tmp= py3 (:=
    #                             (tmp= py2 (append
    #                              (tmp= py1 (append crimes (day (cols_py chicagoCrimes10k.hex "Date")) "Day")
    #                               ) (+ (month (cols_py py1 "Date")) 1) "Month"))
    #                                 (+ (year (cols_py py2 "Date")) 1900) 17 []))
    #                                    (week (cols_py py3 "Date")) "WeekNum"))
    #                                    (dayOfWeek (cols_py py4 "Date")) "WeekDay"))
    #                                    (hour (cols_py py5 "Date")) "HourOfDay"))
    #                                 (| (== (cols_py py6 "WeekDay") "Sun")
    #                                    (== (cols_py py6 "WeekDay") "Sat")) "Weekend"))
    #                         (cut (cols_py py7 "Month") [0 2 5 7 10 12]
    #                           ["Winter" "Spring" "Summer" "Autumn" "Winter"] FALSE TRUE 3) "Season") -3))}
    # GET /3/Frames/py8
    crimes.describe()
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    ntmps = pyunit_utils.temp_ctr() - tmps0
    nrest = pyunit_utils.rest_ctr() - rest1
    print("Number of temps used: %d" % ntmps)
    print("Number of RESTs used: %d" % nrest)
    assert ntmps == 8
    assert nrest == 3
def date_munge():
    crimes_path = pyunit_utils.locate(
        "smalldata/chicago/chicagoCrimes10k.csv.zip")
    # crimes_path = "smalldata/chicago/chicagoCrimes10k.csv.zip"

    tmps0 = pyunit_utils.temp_ctr()  # Expected 0
    rest0 = pyunit_utils.rest_ctr()  # Expected 0

    # /3/ImportFiles
    # /3/ParseSetup
    # /3/Parse
    crimes = h2o.import_file(path=crimes_path)

    # /3/Frames/chicagoCrimes10k.hex - head 10 rows, basic stats
    # /99/Rapids, parms: {ast=(tmp= py_1 (:= chicagoCrimes10k.hex (as.Date (cols_py chicagoCrimes10k.hex "Date") "%m/%d/%Y %I:%M:%S %p") 2 []))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=chicagoCrimes10k.hex}
    # /3/Frames/py_1, route: /3/Frames/(?<frameid>.*), parms: {frame_id=py_1, row_count=10}
    crimes["Date"] = crimes["Date"].as_date("%m/%d/%Y %I:%M:%S %p")

    # /99/Rapids, parms: {ast=(tmp= py_2 (append py_1 (day (cols_py py_1 "Date")) "Day"))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=py_1}
    # /3/Frames/(?<frameid>.*), parms: {frame_id=py_2, row_count=10}
    crimes["Day"] = crimes["Date"].day()

    # /99/Rapids, parms: {ast=(tmp= py_3 (append py_2 (+ (month (cols_py py_2 "Date")) 1) "Month"))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=py_2}
    # /3/Frames/(?<frameid>.*), parms: {frame_id=py_3, row_count=10}
    # /99/Rapids, parms: {ast=(tmp= py_4 (:= py_3 (+ (year (cols_py py_3 "Date")) 1900) 17 []))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=py_3}
    # /3/Frames/(?<frameid>.*), parms: {frame_id=py_4, row_count=10}
    # /99/Rapids, parms: {ast=(tmp= py_5 (append py_4 (week (cols_py py_4 "Date")) "WeekNum"))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=py_4}
    # /3/Frames/(?<frameid>.*), parms: {frame_id=py_5, row_count=10}
    # /99/Rapids, parms: {ast=(tmp= py_6 (append py_5 (dayOfWeek (cols_py py_5 "Date")) "WeekDay"))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=py_5}
    # /3/Frames/py_6, route: {frame_id=py_6, row_count=10}
    # /99/Rapids(append py_6 (hour (cols_py py_6 "Date")) "HourOfDay"))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=py_6}
    # /3/Frames/(?<frameid>.*), parms: {frame_id=py_7, row_count=10}
    crimes["Month"] = crimes["Date"].month() + 1  # Since H2O indexes from 0
    crimes["Year"] = crimes["Date"].year() + 1900  # Start of epoch is 1900
    crimes["WeekNum"] = crimes["Date"].week()
    crimes["WeekDay"] = crimes["Date"].dayOfWeek()
    crimes["HourOfDay"] = crimes["Date"].hour()

    # /99/Rapids, parms: {ast=(tmp= py_8 (append py_7 (| (== (cols_py py_7 "WeekDay") "Sun") (== (cols_py py_7 "WeekDay") "Sat")) "Weekend"))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=py_7}
    # /3/Frames/(?<frameid>.*), parms: {frame_id=py_8, row_count=10}
    crimes["Weekend"] = (crimes["WeekDay"] == "Sun") | (crimes["WeekDay"]
                                                        == "Sat")

    # /99/Rapids, parms: {ast=(tmp= py_9 (append py_8 (cut (cols_py py_8 "Month") [0 2 5 7 10 12] ["Winter" "Spring" "Summer" "Autumn" "Winter"] FALSE TRUE 3) "Season"))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=py_8}
    # /3/Frames/(?<frameid>.*), parms: {frame_id=py_9, row_count=10}
    crimes["Season"] = crimes["Month"].cut(
        [0, 2, 5, 7, 10, 12],
        ["Winter", "Spring", "Summer", "Autumn", "Winter"])

    # /99/Rapids, parms: {ast=(tmp= py_10 (cols py_9 -3))}
    # DELETE /3/DKV/(?<key>.*), parms: {key=py_9}
    # /3/Frames/(?<frameid>.*), parms: {frame_id=py_10, row_count=10}
    crimes = crimes.drop("Date")

    crimes.describe()

    # DELETE /3/DKV/(?<key>.*), parms: {key=py_10}

    tmps1 = pyunit_utils.temp_ctr()
    ntmps = tmps1 - tmps0
    rest1 = pyunit_utils.rest_ctr()
    nrest = rest1 - rest0
    print("Number of temps used: ", ntmps)
    print("Number of RESTs used: ", nrest)
    assert ntmps <= 10
    assert nrest < 30
Beispiel #4
0
def date_munge():
    crimes_path = pyunit_utils.locate(
        "smalldata/chicago/chicagoCrimes10k.csv.zip")
    # crimes_path = "smalldata/chicago/chicagoCrimes10k.csv.zip"

    hc = h2o.connection()
    tmps0 = pyunit_utils.temp_ctr()

    # GET /3/ImportFiles
    # POST /3/ParseSetup
    # POST /3/Parse
    # GET /3/Job/{job_id}  (multiple times)
    # GET /3/Frames/crimes
    crimes = h2o.import_file(path=crimes_path, destination_frame="crimes")

    rest1 = hc.requests_count

    crimes["Day"] = crimes["Date"].day()
    crimes["Month"] = crimes["Date"].month() + 1  # Since H2O indexes from 0
    crimes["Year"] = crimes["Date"].year() + 1900  # Start of epoch is 1900
    crimes["WeekNum"] = crimes["Date"].week()
    crimes["WeekDay"] = crimes["Date"].dayOfWeek()
    crimes["HourOfDay"] = crimes["Date"].hour()
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    crimes["Weekend"] = (crimes["WeekDay"] == "Sun") | (crimes["WeekDay"]
                                                        == "Sat")
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    crimes["Season"] = crimes["Month"].cut(
        [0, 2, 5, 7, 10, 12],
        ["Winter", "Spring", "Summer", "Autumn", "Winter"])
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    crimes = crimes.drop("Date")
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    # POST /4/sessions
    # POST /99/Rapids  {ast:(tmp= py8 (cols (append
    #                        (tmp= py7 (append
    #                         (tmp= py6 (append
    #                          (tmp= py5 (append
    #                           (tmp= py4 (append
    #                            (tmp= py3 (:=
    #                             (tmp= py2 (append
    #                              (tmp= py1 (append crimes (day (cols_py chicagoCrimes10k.hex "Date")) "Day")
    #                               ) (+ (month (cols_py py1 "Date")) 1) "Month"))
    #                                 (+ (year (cols_py py2 "Date")) 1900) 17 []))
    #                                    (week (cols_py py3 "Date")) "WeekNum"))
    #                                    (dayOfWeek (cols_py py4 "Date")) "WeekDay"))
    #                                    (hour (cols_py py5 "Date")) "HourOfDay"))
    #                                 (| (== (cols_py py6 "WeekDay") "Sun")
    #                                    (== (cols_py py6 "WeekDay") "Sat")) "Weekend"))
    #                         (cut (cols_py py7 "Month") [0 2 5 7 10 12]
    #                           ["Winter" "Spring" "Summer" "Autumn" "Winter"] FALSE TRUE 3) "Season") -3))}
    # GET /3/Frames/py8
    crimes.describe()
    print("# of REST calls used: %d" % (hc.requests_count - rest1))

    ntmps = pyunit_utils.temp_ctr() - tmps0
    nrest = pyunit_utils.rest_ctr() - rest1
    print("Number of temps used: %d" % ntmps)
    print("Number of RESTs used: %d" % nrest)
    assert ntmps == 8
    assert nrest == 3