def execute(self): # for the job_runs database start_time = dt.datetime.now() query = "SELECT Id FROM jobs_dim WHERE job_name='calculate_hexGrid_migrate'" job_id = [n for (n, ) in dbt.gsod_db_reader(query)][0] # prepare the option for the chrome driver options = webdriver.ChromeOptions() options.add_argument('headless') options.add_argument('--no-sandbox') # start chrome browser browser = webdriver.Chrome(chrome_options=options) # loop through dates start_date = dt.date(2020, 1, 1) end_date = (dt.datetime.now() - dt.timedelta(days=14)).date() # date of migration - 14 for this_date in daterange(start_date, end_date): if st.DEBUG: URL = 'http://127.0.0.1:8000/calculate-hexGrid/' + str( this_date) + '/' if str(this_date) == '2020-01-10': break else: # PRODUCTION URL = 'https://portfolio.sinto-ling.ca/gsod/calculate-hexGrid/' + str( this_date) + '/' print(URL) browser.get(URL) # wait for the "done" id to be generated time.sleep(1200) delay = 500 try: myElem = WebDriverWait(browser, delay).until( EC.presence_of_element_located((By.ID, 'done'))) print("Calculations and API requests completed!") print(myElem.get_attribute('outerHTML')) except TimeoutException: print("Loading took too much time!", this_date) dbt.log_gsod_job_run(job_id, str(this_date), start_time, 'FAILED') else: # after each completion, take a 10 minute break dbt.log_gsod_job_run(job_id, str(this_date), start_time, 'COMPLETED') time.sleep(600) return True
def execute(self): start_time = dt.datetime.now() station_id = 'GHCND:USW00094054' station = Station.objects.get(id=station_id) # run a get for the 7 days of two weeks (just in case it has not be updated) start_date = dt.date(2020, 1, 1) # start_date = (dt.datetime.now() - dt.timedelta(days=21)).date() end_date = (dt.datetime.now() - dt.timedelta(days=14)).date() # for the job_runs database query = "SELECT Id FROM jobs_dim WHERE job_name='load_ghcnd'" job_id = [n for (n, ) in dbt.gsod_db_reader(query)][0] job_var = str(start_date) + ' - ' + str(end_date) failed = 0 try: response = json.loads( get_request('GHCND', 'station', [station_id], None, None, str(start_date), str(end_date), 0)) except Exception as e: print('Error Response:', station_id, e) failed += 1 else: try: num_results = response['metadata']['resultset']['count'] results = response['results'] # print(num_results, results) except Exception as e2: print('Error in Result:', station_id, e2) failed += 1 else: # write to database if less than 1000 results if num_results <= 1000: write_to_db(results, station) # if results over 1000, then keep going elif num_results > 1000: rmd = math.ceil(num_results / 1000) for i in range(0, rmd): response = json.loads( get_request('GHCND', 'station', [station_id], None, None, str(start_date), str(end_date), i * 1000)) results = response['results'] # write to database write_to_db(results, station) # once all complete, write to job_run if failed == 0: dbt.log_gsod_job_run(job_id, job_var, start_time, 'COMPLETED') elif failed <= 1: dbt.log_gsod_job_run(job_id, job_var, start_time, 'SOME FAILURES - CHECK LOGS') else: dbt.log_gsod_job_run(job_id, job_var, start_time, 'FAILED') return True
def execute(self): # for the job_runs database start_time = dte.datetime.now() query = "SELECT Id FROM jobs_dim WHERE job_name='calculate_hexGrid_migrate'" job_id = [n for (n, ) in dbt.gsod_db_reader(query)][0] log_file = 'gsod/seleniumLog/' + str( dte.datetime.now().date()) + '.log' bbox = [-126, 24, -66.5, 50] # USA cellSide = 15 # get ALL Weather Stations stations = Station.objects.all() data_types = ['TMAX', 'TMIN'] # ['PRCP', 'SNOW', 'SNWD', 'TMAX', 'TMIN'] # get ghcnd info for specific day: 2020-05-16 and datatype=TMAX this_date = '2020-01-02' st_json = [] for s in stations: if s.us_state == 'Alaska' or s.us_state == 'Hawaii': continue # if s.us_state != 'Alaska': # only get alaska # continue # create dictionary to load info to template view new_dict = { 'type': 'Feature', 'geometry': { 'type': 'Point', 'coordinates': [s.longitude, s.latitude] }, 'properties': {} } # generate dict based on all listed data types for d in data_types: try: ghcnd = GHCND.objects.get(station__id=s.id, date=this_date, datatype=d) except Exception as e: continue else: new_dict['properties'][d] = ghcnd.value / 10 # add dict to list st_json.append(new_dict) try: hexGrid = hc.hexgrid_constructor(bbox, cellSide, st_json, 8, (24 + 50) / 2) print("Calculations completed!") filename = 'hexGrid_' + str(this_date) + '.json' try: with open('gsod/posts/' + filename, 'w') as outfile: json.dump(hexGrid, outfile, indent=4) except Exception as e: print('POST write to file: Failed', e) with open(log_file, 'a') as outfile: outfile.write('POST write to file: Failed' + str(e) + '\n') status = False else: print('POST write to file: Success!') with open(log_file, 'a') as outfile: outfile.write('POST write to file: Success!' + '\n') status = True except TimeoutException: print("Loading took too much time!", this_date) dbt.log_gsod_job_run(job_id, str(this_date), start_time, 'FAILED') status = False else: # after each completion, take a 10 minute break dbt.log_gsod_job_run(job_id, str(this_date), start_time, 'COMPLETED') return status