Beispiel #1
0
    def execute(self):

        start_time = dt.datetime.now()
        station_id = 'GHCND:USW00094054'
        station = Station.objects.get(id=station_id)

        # run a get for the 7 days of two weeks (just in case it has not be updated)
        start_date = dt.date(2020, 1, 1)
        # start_date = (dt.datetime.now() - dt.timedelta(days=21)).date()
        end_date = (dt.datetime.now() - dt.timedelta(days=14)).date()

        # for the job_runs database
        query = "SELECT Id FROM jobs_dim WHERE job_name='load_ghcnd'"
        job_id = [n for (n, ) in dbt.gsod_db_reader(query)][0]
        job_var = str(start_date) + ' - ' + str(end_date)

        failed = 0
        try:
            response = json.loads(
                get_request('GHCND', 'station', [station_id], None, None,
                            str(start_date), str(end_date), 0))
        except Exception as e:
            print('Error Response:', station_id, e)
            failed += 1
        else:
            try:
                num_results = response['metadata']['resultset']['count']
                results = response['results']
                # print(num_results, results)
            except Exception as e2:
                print('Error in Result:', station_id, e2)
                failed += 1
            else:
                # write to database if less than 1000 results
                if num_results <= 1000:
                    write_to_db(results, station)

                # if results over 1000, then keep going
                elif num_results > 1000:
                    rmd = math.ceil(num_results / 1000)
                    for i in range(0, rmd):
                        response = json.loads(
                            get_request('GHCND', 'station', [station_id],
                                        None, None, str(start_date),
                                        str(end_date), i * 1000))
                        results = response['results']

                        # write to database
                        write_to_db(results, station)

        # once all complete, write to job_run
        if failed == 0:
            dbt.log_gsod_job_run(job_id, job_var, start_time, 'COMPLETED')
        elif failed <= 1:
            dbt.log_gsod_job_run(job_id, job_var, start_time,
                                 'SOME FAILURES - CHECK LOGS')
        else:
            dbt.log_gsod_job_run(job_id, job_var, start_time, 'FAILED')

        return True
    def execute(self):

        # for the job_runs database
        start_time = dt.datetime.now()
        query = "SELECT Id FROM jobs_dim WHERE job_name='calculate_hexGrid_migrate'"
        job_id = [n for (n, ) in dbt.gsod_db_reader(query)][0]

        # prepare the option for the chrome driver
        options = webdriver.ChromeOptions()
        options.add_argument('headless')
        options.add_argument('--no-sandbox')

        # start chrome browser
        browser = webdriver.Chrome(chrome_options=options)

        # loop through dates
        start_date = dt.date(2020, 1, 1)
        end_date = (dt.datetime.now() -
                    dt.timedelta(days=14)).date()  # date of migration - 14
        for this_date in daterange(start_date, end_date):

            if st.DEBUG:
                URL = 'http://127.0.0.1:8000/calculate-hexGrid/' + str(
                    this_date) + '/'
                if str(this_date) == '2020-01-10':
                    break
            else:
                # PRODUCTION
                URL = 'https://portfolio.sinto-ling.ca/gsod/calculate-hexGrid/' + str(
                    this_date) + '/'

            print(URL)
            browser.get(URL)

            # wait for the "done" id to be generated
            time.sleep(1200)
            delay = 500
            try:
                myElem = WebDriverWait(browser, delay).until(
                    EC.presence_of_element_located((By.ID, 'done')))
                print("Calculations and API requests completed!")
                print(myElem.get_attribute('outerHTML'))
            except TimeoutException:
                print("Loading took too much time!", this_date)
                dbt.log_gsod_job_run(job_id, str(this_date), start_time,
                                     'FAILED')
            else:
                # after each completion, take a 10 minute break
                dbt.log_gsod_job_run(job_id, str(this_date), start_time,
                                     'COMPLETED')
                time.sleep(600)

        return True
    def execute(self):

        # for the job_runs database
        start_time = dte.datetime.now()
        query = "SELECT Id FROM jobs_dim WHERE job_name='calculate_hexGrid_migrate'"
        job_id = [n for (n, ) in dbt.gsod_db_reader(query)][0]
        log_file = 'gsod/seleniumLog/' + str(
            dte.datetime.now().date()) + '.log'

        bbox = [-126, 24, -66.5, 50]  # USA
        cellSide = 15

        # get ALL Weather Stations
        stations = Station.objects.all()
        data_types = ['TMAX',
                      'TMIN']  # ['PRCP', 'SNOW', 'SNWD', 'TMAX', 'TMIN']

        # get ghcnd info for specific day: 2020-05-16 and datatype=TMAX
        this_date = '2020-01-02'

        st_json = []
        for s in stations:
            if s.us_state == 'Alaska' or s.us_state == 'Hawaii':
                continue
            # if s.us_state != 'Alaska':  # only get alaska
            #     continue

            # create dictionary to load info to template view
            new_dict = {
                'type': 'Feature',
                'geometry': {
                    'type': 'Point',
                    'coordinates': [s.longitude, s.latitude]
                },
                'properties': {}
            }

            # generate dict based on all listed data types
            for d in data_types:
                try:
                    ghcnd = GHCND.objects.get(station__id=s.id,
                                              date=this_date,
                                              datatype=d)
                except Exception as e:
                    continue
                else:
                    new_dict['properties'][d] = ghcnd.value / 10

                    # add dict to list
                    st_json.append(new_dict)

        try:
            hexGrid = hc.hexgrid_constructor(bbox, cellSide, st_json, 8,
                                             (24 + 50) / 2)
            print("Calculations completed!")
            filename = 'hexGrid_' + str(this_date) + '.json'
            try:
                with open('gsod/posts/' + filename, 'w') as outfile:
                    json.dump(hexGrid, outfile, indent=4)
            except Exception as e:
                print('POST write to file: Failed', e)
                with open(log_file, 'a') as outfile:
                    outfile.write('POST write to file: Failed' + str(e) + '\n')
                status = False
            else:
                print('POST write to file: Success!')
                with open(log_file, 'a') as outfile:
                    outfile.write('POST write to file: Success!' + '\n')
                status = True
        except TimeoutException:
            print("Loading took too much time!", this_date)
            dbt.log_gsod_job_run(job_id, str(this_date), start_time, 'FAILED')
            status = False
        else:
            # after each completion, take a 10 minute break
            dbt.log_gsod_job_run(job_id, str(this_date), start_time,
                                 'COMPLETED')

        return status
Beispiel #4
0
# part of the August 2020 data migration
# add gsod jobs for daily/weekly and migration
from gsod.oper import database_transactions as dbt

# add the load_ghcnd jobs
result = dbt.add_gsod_job('load_ghcnd_migrate')
print('Add load_ghcnd_migrate:', result)
result = dbt.add_gsod_job('load_ghcnd')
print('Add load_ghcnd:', result)

# add the calculate_hexGrid jobs
result = dbt.add_gsod_job('calculate_hexGrid_migrate')
print('Add calculate_hexGrid_migrate:', result)
result = dbt.add_gsod_job('calculate_hexGrid')
print('Add calculate_hexGrid:', result)

# check result
query = 'SELECT * FROM jobs_dim'
result = dbt.gsod_db_reader(query)
print(result)

# second check
query = "SELECT Id FROM jobs_dim WHERE job_name='load_ghcnd_migrate'"
job_id = [n for (n, ) in dbt.gsod_db_reader(query)][0]
print(job_id)