コード例 #1
0
def dump_xml(obs1, xmldata, iteration):
    """  dump the xml to a file for deugging """
    trace_print(1, "dumpxml_entry")
    if (obs1.dump_xml_flag == True):
        trace_print(1, "dump_xml")
        file = "xml_dump" + str(iteration) + ".xml"
        fh = open(file, 'wb')
        fh.write(xmldata)
        fh.close()
コード例 #2
0
def get_last_csv_row(st_file):
    """ helper function to get last row of csv file """
    try:
        with open(st_file, "r", encoding="utf-8", errors="ignore") as csv_1:
            final_line = csv_1.readlines()[-1]
            trace_print(1, "final line:", final_line)
            csv_1.close()
            return final_line
    except:
        trace_print(3, "csv file not found... continue...")
        return ""
コード例 #3
0
def main_obs_loop(obs1_list):
    """ main loop - runs schedule and test for cut csv condition """
    run_minutes = datetime.now().minute
    if ((run_minutes == 59)):
        # every hour check to see if need to cut
        trace_print(1, "Num minutes running: ", str(run_minutes))
        foreach_obs(obs_cut_csv_file, obs1_list)
    else:
        trace_print(1, "run pending")
        schedule.run_pending()
        # schedule.run_all()
    time.sleep(60)
コード例 #4
0
 def check_resume_file(obs_setting):
     today = datetime.now()
     day_1 = timedelta(hours=24)
     tomorrow = today + day_1
     trace_print(4, "station_id", obs_setting.station_id)
     today_glob = create_station_glob_filter(obs_setting.station_id, "csv",
                                             today)
     # Guam or Hawaii might be actually ahead.
     tomorrow_glob = create_station_glob_filter(obs_setting.station_id,
                                                "csv", tomorrow)
     last_file = hunt_for_noaa_csv_files(obs_setting.data_dir,
                                         tomorrow_glob)
     if len(last_file) < 1:
         last_file = hunt_for_noaa_csv_files(obs_setting.data_dir,
                                             today_glob)
     return last_file
コード例 #5
0
def obs_sanity_check(obs1, xml_data, data_row):
    """ checks wind to see if value is present """
    # df[['observation_time','wind_mph','wind_dir','wind_string']]
    table_col_list = [9, 19, 17, 16]
    for col in table_col_list:
        if (data_row[col].startswith("<no") == True):
            now = datetime.now()
            midnight = now.replace(hour=0, minute=0, second=0, microsecond=0)
            seconds = (now - midnight).seconds
            obs1.dump_xml_flag = True
            dump_xml(obs1, xml_data, seconds)
            obs1.dump_xml_flag = False
            trace_print(4, "potential bad xml - see xml dump at ",
                        str(seconds))
            return False
        trace_print(1, "data checked: ", str(data_row[col]))
    return True
コード例 #6
0
def create_station_file_name2(
        station="https://w1.weather.gov/xml/current_obs/KDCA.xml", ext='csv'):
    """ 
    create_station_file from observation time 
    """
    w_xml = get_weather_from_NOAA(station)
    if (obs_check_xml_data(w_xml) == False):
        return ""
    headers, row = get_data_from_NOAA_xml(w_xml)
    obs_date = get_obs_time(row[9])

    station_id = station[-8:-4]
    year, month, day, hour, min, am = map(
        str,
        obs_date.strftime("%Y %m %d %H %M %p").split())
    file_n = station_id + '_Y' + year + '_M' + \
        month + '_D' + day + '_H' + hour + "." + ext
    trace_print(4, "my_p", str(am))
    return file_n
コード例 #7
0
def get_data_from_NOAA_xml(xmldata):
    """ parse noaa observatin data from xml into list """
    tree = ET.fromstring(xmldata)
    h1 = []
    r1 = []
    r1_final = []
    global csv_headers
    trace_print(4, "parsing NOAA xml")
    for child in tree:
        h1.append(child.tag)
        r1.append(child.text)
    for ch in csv_headers:
        if not r1:
            r1_final.append('')
        elif (ch in h1):
            r1_final.append(transform_observation(ch, r1.pop(0)))
        else:
            r1_final.append('<no_value_provided>')
    h1 = csv_headers
    return h1, r1_final
コード例 #8
0
def duration_cut_check(t_last, hour_cycle):
    """ see if new file is to be created or cut """
    trace_print(1, "Duration check")
    t_now = datetime.now()
    if t_now.year > t_last.year:
        trace_print(1, "Duration year check")
        return True
    if t_now.month > t_last.month:
        trace_print(1, "Duration month check")
        return True
    if t_now.day > t_last.day:
        trace_print(1, "Duration day check")
        return True
    if (t_now.hour - t_last.hour == 0):
        return False
    if (hour_cycle > 0):
        if ((t_now.hour - t_last.hour) % hour_cycle == 0):
            trace_print(1, "Duration cycle check at ", str(hour_cycle))
            return True
    return False
コード例 #9
0
def weather_obs_app_append(obs1):
    """ append top level """
    content = get_weather_from_NOAA(obs1.primary_station)
    if (obs_check_xml_data(content) == False):
        return False
    xmld1 = get_data_from_NOAA_xml(content)
    dump_xml(obs1, content, datetime.now().minute)
    """
    test if last row and what is coming in are equal
  """
    # if --resume is specified - then we need to set prior to current.
    try:
        obs1.prior_obs_time = obs1.current_obs_time
    except:
        obs1.prior_obs_time = get_obs_time(xmld1[1][9])
    obs1.current_obs_time = get_obs_time(xmld1[1][9])
    trace_print(4, "current_obs_time(append):  ", str(obs1.current_obs_time))
    trace_print(4, "prior_obs_time(append): ", str(obs1.prior_obs_time))
    if (duplicate_observation(obs1, xmld1[1])):
        trace_print(3, 'duplicate append, exit up')
        # error on double start
        obs1.prior_obs_time = obs1.current_obs_time
        return
    weather_csv_driver(obs1, 'a', obs1.station_file, xmld1[0], xmld1[1])
    return
コード例 #10
0
def check_parms1(obs_setting, args):
    """ check standalong parms """
    if (args.duration):
        obs_setting.duration_interval = int(args.duration)
        duration_interval = int(args.interval)
        trace_print(1, "duration interval: ", str(args.duration))
    if (args.cut):
        obs_setting.set_cut_process()
        trace_print(1, "cut specified")
    if (args.append):
        obs_setting.set_append_processing()
        trace_print(1, "append specified")
        # collect asssumes append
    if (args.resume):
        obs_setting.set_resume_processing()
        trace_print(1, "resume specified")
    return True
コード例 #11
0
def get_obs_time(obs_date):
    t_str = obs_date
    if (obs_time_debug):
        trace_print(4, "Local observation time ( get_obs_time): ", t_str)
        # actual timezone is not important for obs file output.
        # obs_date = datetime.strptime( t_str[:20], "%b %d %Y, %I:%M %p ")
        obs_date = parser.parse(t_str[:20])
        # adjust stamp for specific test
        obs_date = obs_date + timedelta(hours=obs_debug_t_delta)
        trace_print(4, "Debug obs_date:", str(obs_date))
        return obs_date
    trace_print(4, "Local observation time ( get_obs_time): ", t_str)
    # actual timezone is not important for obs file output.
    obs_date = parser.parse(t_str[:20])
    # obs_date = datetime.strptime( t_str[:20], "%b %d %Y, %I:%M %p ")
    trace_print(4, "get_obs_time return()")
    return obs_date
コード例 #12
0
def get_weather_from_NOAA(station):
    """ simple get xml data, and print the md5 """
    trace_print(4, "url request")
    try:
        with urllib.request.urlopen(station) as response:
            xml = response.read()
        trace_print(4, "xml md5: ", hashlib.md5(xml).hexdigest())
    except:
        trace_print(4, "URL request error")
        xml = ""
    return xml
コード例 #13
0
def duplicate_observation(obs1, current_obs):
    """ test last line of csv for duplicate """
    """ finds observation times and compares"""
    r_csv_file = get_obs_csv_path(obs1, obs1.station_file)
    last_one = get_last_csv_row(r_csv_file)
    if (len(last_one) < 4):
        return False
    last_obs = last_one.split(',\"')
    last_obs_dt = last_obs[7]
    last_obs_dt = last_obs_dt[:-1]
    trace_print(1, "last_obs:", last_obs_dt, "len ", str(len(last_obs_dt)))
    trace_print(1, "current_obs: ", current_obs[6], " ", current_obs[9],
                "len ", str(len(current_obs[9])))
    if (current_obs[9] == last_obs_dt):
        trace_print(1, "Is equal")
        return True
    return False
コード例 #14
0
def run_cut_operation(obs1, obs_cut_time):
    trace_print(4, "running cut operation")
    # sychronize obs_time for new day - so file name will be corrrect
    # last observation at 11:50 or so - add 10 minutes for file create.
    obs1.station_file = create_station_file_name(obs1.station_id, "csv",
                                                 obs_cut_time)
    # start a new day cycle
    obs1.prior_obs_time = obs_cut_time
    obs1.current_obs_time = obs_cut_time
    trace_print(4, "New Station file (cut):", obs1.station_file)
    # create new file with cannocial headers
    weather_csv_driver(obs1, 'c', obs1.station_file, csv_headers, [])
    schedule.cancel_job(obs1.job1)
    # we rassigned the next station file
    # new writes should go there.
    obs1.job1 = None
    t_begin = datetime.now()
    trace_print(4, "Time of last cut:",
                t_begin.strftime("%A, %d. %B %Y %I:%M%p"))
    # this will reschedule job with new file.
    weather_obs_app_start(obs1)
コード例 #15
0
def obs_check_xml_data(xmldata):
    if (len(xmldata) < 4):
        trace_print(4, "No XML data to process")
        return False
    else:
        return True
コード例 #16
0
def weather_obs_init():
    """ init the app, get args and establish globals """
    parser = argparse.ArgumentParser(description='NOAA weather obsevation')
    parser.add_argument('--init', help='Initialize CSV')
    parser.add_argument('--station', help='URL of station')
    parser.add_argument('--collect',
                        help='Run collectiion in background - Y/N',
                        action="store_true")
    parser.add_argument('--append', help='Append data to CSV file - specifed')
    parser.add_argument('-d',
                        '--duration',
                        help='Duration cycle - default - 24 hours ')
    parser.add_argument('-c', '--cut', action="store_true")
    parser.add_argument('-x', '--xml', action="store_true")
    parser.add_argument('-r',
                        '--resume',
                        help='resume append and cut',
                        action="store_true")
    parser.add_argument('-j', '--json', help="generate json data to file")
    parser.add_argument('-f',
                        '--file',
                        help="read stations from file specified")
    parser.add_argument('--dir', help='data directory offet- default is cwd ')
    args = parser.parse_args()
    trace_print(1, "parsing args...")
    # cannocial header
    # can't depend on xml feed to complete every value
    global csv_headers

    def check_resume_file(obs_setting):
        today = datetime.now()
        day_1 = timedelta(hours=24)
        tomorrow = today + day_1
        trace_print(4, "station_id", obs_setting.station_id)
        today_glob = create_station_glob_filter(obs_setting.station_id, "csv",
                                                today)
        # Guam or Hawaii might be actually ahead.
        tomorrow_glob = create_station_glob_filter(obs_setting.station_id,
                                                   "csv", tomorrow)
        last_file = hunt_for_noaa_csv_files(obs_setting.data_dir,
                                            tomorrow_glob)
        if len(last_file) < 1:
            last_file = hunt_for_noaa_csv_files(obs_setting.data_dir,
                                                today_glob)
        return last_file

    def check_params2(obs_setting, args):
        obs_setting.station_file = create_station_file_name2(
            obs_setting.primary_station)
        if (args.dir):
            obs_setting.set_data_dir(args.dir)
        #    obs_setting.data_dir = args.dir
        #    if (os.path.exists( os.getcwd() + os.sep + obs_setting.data_dir)):
        #        trace_print(4, "data dir exists: ", str(obs_setting.data_dir))
        #    else:
        #        trace_print(1, "Data dir does not exist")
        #        os.mkdir( os.getcwd() + os.sep + obs_setting.data_dir)
        #        trace_print(1, " directory created")
        if (obs_setting.append_data_specified == False):
            trace_print(4, "Station filename: ", obs_setting.station_file)
        obs_setting.init_csv = True
        # initialize a CSV until we prove we are appending.
        if (args.init):
            obs_setting.set_init_processing(args.init)
        if (obs_setting.append_data_specified == True):
            obs_setting.station_file = args.append
            obs_setting.init_csv = False
            if (obs_setting.resume == True):
                trace_print(4, "resume here")
                #now = datetime.now()
                #file_id = obs_setting.station_id + "_Y" + str(now.year)
                # file_id = obs_setting.station_file
                # TODO - support yesterday, today, and tomorrow.
                # Guam is actually tomorrow in many cases
                # so resume will not work if just today and yesterday
                # 24 hours +/- otherwise just create a new file
                data_path = obs_setting.get_data_dir_path()
                trace_print(3, "data path ", data_path)
                obs_setting.station_file = check_resume_file(obs_setting)
                trace_print(3, "station_file", obs_setting.station_file)
                if (len(obs_setting.station_file) < 4):
                    obs_setting.station_file = create_station_file_name2(
                        obs_setting.primary_station)
                    obs_setting.init_csv = True
                    obs_setting.append_data = False
                    obs_setting.append_data_specified = True
                    trace_print(3, "Resume - No file file on current day")
            trace_print(4, "Station id ( append ): ", obs_setting.station_file)
        if (args.xml == True):
            obs_setting.set_xml_dump_flag(True)
        if (args.collect):
            trace_print(4, "collect in station setting")
            obs_setting.collect_data = True
            if (obs_setting.init_csv
                    == False) and (obs_setting.append_data_specified == False):
                obs_setting.station_file = create_station_file_name2(
                    obs_setting.primary_station)
                trace_print(4, "Station filename (collect): ",
                            obs_setting.station_file)
        return True

    if (args.file):
        try:
            with open(args.file, "r") as obs_file1:
                obs_entry_list = obs_file1.readlines()
                trace_print(4, str(obs_entry_list))
        except:
            print("Unable to open: ", args.file)
        setting_list = []
        # entries must be on the first 47 lines - no more or less - discard \n or other stuff
        for entry in obs_entry_list:
            setting_list.append(ObsSetting(entry[0:47]))
        trace_print(4, str(setting_list))
        for entry in setting_list:
            check_parms1(entry, args)
            trace_print(4, "Station id:  ", entry.station_id)
            check_params2(entry, args)
        return setting_list
    # check station and fill out appropriate values
    if (args.station):
        obs_setting = ObsSetting(args.station)
        check_parms1(obs_setting, args)
        trace_print(4, "Station id:  ", obs_setting.station_id)
        check_params2(obs_setting, args)
    else:
        trace_print(3, "Error: No station given - please use --station")
        trace_print(3, " see readme")
        sys.exit(4)
    obs_setting_list = []
    obs_setting_list.append(obs_setting)
    return obs_setting_list
コード例 #17
0
 def set_xml_dump_flag(self, flag):
     self.dump_xml_flag = flag
     trace_print(7, "Dump xml flag: ", str(self.dump_xml_flag))
コード例 #18
0
def weather_obs_app_start(obs1):
    """ top level start of collection """
    # if appending and scheduling - skip over to collect
    trace_print(3, "weather_obs_app_starT() enter ")
    if (obs1.append_data != True):
        content = get_weather_from_NOAA(obs1.primary_station)
        if (obs_check_xml_data(content) == False):
            return False
        xmld1 = get_data_from_NOAA_xml(content)
        obs_string = xmld1[1][9]
        trace_print(4, "raw observation string: ", obs_string)
        obs_time_stamp = get_obs_time(obs_string)
        obs1.prior_obs_time = obs_time_stamp
        obs1.current_obs_time = obs_time_stamp
        trace_print(4, "current_obs_time(start):  ",
                    str(obs1.current_obs_time))
        trace_print(4, "prior_obs_time:(start) ", str(obs1.prior_obs_time))
        weather_csv_driver(obs1, 'w', obs1.station_file, xmld1[0], xmld1[1])
        trace_print(4, "Initializing new file (app_start): ",
                    str(obs1.station_file))
        dump_xml(obs1, content, datetime.now().minute)
    if (obs1.collect_data == True):
        if obs1.job1:
            trace_print(4, "schedule job set - exit()")
            return
        trace_print(4, "schedule job @ ", str(obs1.primary_station), " -> ",
                    str(obs1.station_file))
        obs1.append_data = True
        obs1.job1 = schedule.every().hour.at(":20").do(weather_collect_driver,
                                                       obs1)
    return
コード例 #19
0
def weather_collect_driver(obs1):
    """ Appends ( only ) csv file with data from obs xml """
    trace_print(4, "weather_collect_driver")
    xmldata = get_weather_from_NOAA(obs1.primary_station)
    if (obs_check_xml_data(xmldata) == False):
        return False
    outdata = get_data_from_NOAA_xml(xmldata)
    # check data and dump xml for post-mortem
    # data feed from noaa has unexpected output
    # check to see if wind is missing.
    obs_sanity_check(obs1, xmldata, outdata[1])
    # use for cut logic.
    # if local time crossed midnight - cut a new file.
    # save prior - obs_time_prior
    # curent to - obs_time_curent.
    trace_print(4, "current_obs_time(driver_before):  ",
                str(obs1.current_obs_time))
    trace_print(4, "prior_obs_time(driver_before): ", str(obs1.prior_obs_time))
    # if it comes in at zero hour ( mindnight) then reset current and prior
    obs1.prior_obs_time = obs1.current_obs_time
    obs1.current_obs_time = get_obs_time(outdata[1][9])
    if (obs1.prior_obs_time.hour == 23):
        trace_print(4, "Special driver processing at hour 23")
        obs1.prior_obs_time = obs1.current_obs_time
    trace_print(4, "current_obs_time(driver):  ", str(obs1.current_obs_time))
    trace_print(4, "prior_obs_time(driver): ", str(obs1.prior_obs_time))
    if (duplicate_observation(obs1, outdata[1])):
        trace_print(3, " duplicate in collect.  exiting...")
        return True
    weather_csv_driver(obs1, 'a', obs1.station_file, outdata[0], outdata[1])

    obs1.obs_iteration = obs1.obs_iteration + 1
    dump_xml(obs1, xmldata, obs1.obs_iteration)
    return True
コード例 #20
0
def weather_csv_driver(obs1, mode, csv_file, w_header, w_row):
    """ write out csv data - mode is append, write or cut """
    cut_mode = False
    trace_print(4, 'csv_driver')
    #   if ( mode != 'w' ) and  ( mode != 'a' ):
    #     trace_print( 1, " mode is invalid")
    #     return False
    if (len(csv_file) < 4):
        print("CSV file must contain station name")
        return False
    if (mode == 'c'):
        # cut file request is active
        # denote the special mode and change it to write.
        cut_mode = True
        mode = 'w'
    r_csv_file = get_obs_csv_path(obs1, csv_file)
    trace_print(4, "data_dir location: ", str(r_csv_file))
    # newline parm so that excel in windows doesn't have blank line in csv
    # https://stackoverflow.com/questions/3348460/csv-file-written-with-python-has-blank-lines-between-each-row
    with open(r_csv_file, mode, newline='') as weather_file:
        weather_writer = csv.writer(weather_file,
                                    delimiter=',',
                                    quotechar='"',
                                    quoting=csv.QUOTE_ALL)
        if (mode == 'w'):
            trace_print(4, "csv_driver: header")
            weather_writer.writerow(w_header)
            if (cut_mode == False):
                trace_print(4, "csv_driver: row_with_header")
                weather_writer.writerow(w_row)
        elif (mode == 'a'):
            trace_print(4, "csv_drver: row_only")
            weather_writer.writerow(w_row)
    # do I really need close??? with does this
    weather_file.close()
    csv_write_time = datetime.now()
    trace_print(4, "csv_write_time: ",
                csv_write_time.strftime("%A, %d. %B %Y %I:%M%p"))
    return True
コード例 #21
0
 def check_params2(obs_setting, args):
     obs_setting.station_file = create_station_file_name2(
         obs_setting.primary_station)
     if (args.dir):
         obs_setting.set_data_dir(args.dir)
     #    obs_setting.data_dir = args.dir
     #    if (os.path.exists( os.getcwd() + os.sep + obs_setting.data_dir)):
     #        trace_print(4, "data dir exists: ", str(obs_setting.data_dir))
     #    else:
     #        trace_print(1, "Data dir does not exist")
     #        os.mkdir( os.getcwd() + os.sep + obs_setting.data_dir)
     #        trace_print(1, " directory created")
     if (obs_setting.append_data_specified == False):
         trace_print(4, "Station filename: ", obs_setting.station_file)
     obs_setting.init_csv = True
     # initialize a CSV until we prove we are appending.
     if (args.init):
         obs_setting.set_init_processing(args.init)
     if (obs_setting.append_data_specified == True):
         obs_setting.station_file = args.append
         obs_setting.init_csv = False
         if (obs_setting.resume == True):
             trace_print(4, "resume here")
             #now = datetime.now()
             #file_id = obs_setting.station_id + "_Y" + str(now.year)
             # file_id = obs_setting.station_file
             # TODO - support yesterday, today, and tomorrow.
             # Guam is actually tomorrow in many cases
             # so resume will not work if just today and yesterday
             # 24 hours +/- otherwise just create a new file
             data_path = obs_setting.get_data_dir_path()
             trace_print(3, "data path ", data_path)
             obs_setting.station_file = check_resume_file(obs_setting)
             trace_print(3, "station_file", obs_setting.station_file)
             if (len(obs_setting.station_file) < 4):
                 obs_setting.station_file = create_station_file_name2(
                     obs_setting.primary_station)
                 obs_setting.init_csv = True
                 obs_setting.append_data = False
                 obs_setting.append_data_specified = True
                 trace_print(3, "Resume - No file file on current day")
         trace_print(4, "Station id ( append ): ", obs_setting.station_file)
     if (args.xml == True):
         obs_setting.set_xml_dump_flag(True)
     if (args.collect):
         trace_print(4, "collect in station setting")
         obs_setting.collect_data = True
         if (obs_setting.init_csv
                 == False) and (obs_setting.append_data_specified == False):
             obs_setting.station_file = create_station_file_name2(
                 obs_setting.primary_station)
             trace_print(4, "Station filename (collect): ",
                         obs_setting.station_file)
     return True
コード例 #22
0
 def set_duration(self, duration):
     self.duration_interval = int(duration)
     trace_print(7, "duration interval: ", str(self.duration_interval))
コード例 #23
0
def weather_obs_app():
    obs1_list = weather_obs_init()
    # currently all options are same as first entry
    obs1 = obs1_list[0]
    if (obs1.init_csv == True):
        trace_print(4, "Init... ")
        foreach_obs(weather_obs_app_start, obs1_list)
    if (obs1.append_data_specified == True):
        if (obs1.resume == True):
            trace_print(1, "resume - with append")
        trace_print(1, "Appending data")
        # resume sets init_csv - have to retest again
        # resume sets thsi when a new file has to be created
        # resume starts next day.
        # try to resume same day - if not start a new day csv
        if (obs1.init_csv == False):
            trace_print(4, "Append processing start")
            foreach_obs(weather_obs_app_append, obs1_list)
    if (obs1.collect_data == True):
        run_minutes = 0
        t_begin = datetime.now()
        trace_print(4, "starting time: ",
                    t_begin.strftime("%A, %d. %B %Y %I:%M%p"))
        if (obs1.append_data_specified == True):
            foreach_obs(weather_obs_app_start, obs1_list)
        delay_t = 60 - t_begin.minute
        trace_print(4, "minutes till the next hour: ", str(delay_t))
        while True:
            main_obs_loop(obs1_list)
コード例 #24
0
 def _trace(self, s, *t1):
     jstr = ''.join(t1)
     msg1 = " " + s + jstr
     trace_print(4, self.station_id, msg1)