def scrape_last_n_years_of_k_players(self,
                                         n,
                                         k,
                                         mode,
                                         singOrDubs="singles"):
        last_ten_years = get_dates()
        dates_by_year = get_dates_by_year()

        if not mode in self.modes:
            print("Error: Invalid Mode. Options are " + str(self.modes))
        else:
            filename = self.generate_filename(singOrDubs, mode)
            output_file = open(filename, "w")
            output_file.write(self.file_header[mode])

            searchedPlayers = []
            for year in last_ten_years[:n]:
                str_year = str(year)
                next_url = "https://www.atptour.com/en/rankings/" + singOrDubs + "?rankDate=" + str_year + "-12-" + dates_by_year[
                    year] + "&rankRange=0-5000"

                if self.debug: print(year)
                if self.debug: print(next_url)

                self.browser.get(next_url)
                self.scrape_func[mode](k, output_file, next_url, str_year,
                                       searchedPlayers)

            output_file.close()
            self.browser.quit()
def transfer_files(start_date=None, end_date=datetime.date.today()):
    """!
    Transfer daily ACCESS-G files from NCI to network location.
    - Need an NCI login and private ssh key with NCI - or, password input.
    - If password input, have to run this from the terminal and not with an IDE's "run".

    Run without arguments to update - only transfer files newer than the newest file.

    @param start_date: starting date for files to download
    @param end_date: end date for files to download (not inclusive)
    """

    my_hostname = 'raijin.nci.org.au'
    my_username = '******'
    #my_password = getpass()
    private_key = '~/.ssh/id_rsa'

    if not start_date:
        start_date = get_start_date(settings.ACCESS_G_PATH)

    today = datetime.date.today()
    yesterday = today - datetime.timedelta(days=1)

    if start_date >= today or (start_date == yesterday
                               and datetime.datetime.now().hour < 8):
        # The previous day's 1200 file is uploaded to NCI at ~7.30am each day
        return (print('ACCESS-G downloaded files are already up to date'))

    dates = get_dates(start_date, end_date)

    with pysftp.Connection(host=my_hostname,
                           username=my_username,
                           private_key=private_key) as sftp:
        print("Connection succesfully established ... ")

        # Switch to a remote directory
        sftp.cwd('/g/data3/lb4/ops_aps2/access-g/0001/')

        nc_filename = 'accum_prcp.nc'
        hour = settings.ACCESS_HOUR

        localPath = 'temp/'

        for date in dates:
            new_file_name = settings.access_g_filename(date)
            remoteFilePath = date + '/' + hour + '/fc/sfc/' + nc_filename
            localFilePath = localPath + new_file_name
            sftp.get(remoteFilePath, localFilePath)

            australiaFile = limit_coordinates(localFilePath)
            australiaFile.to_netcdf(networkPath + new_file_name)

            print('File: ' + new_file_name + ' written')
Beispiel #3
0
def update_reports():
    global MAX_REPORT_STALENESS
    global last_report_update

    if (time() - last_report_update) < MAX_REPORT_STALENESS:
        return

    global report_dates
    global reports_json

    report_dates = dateutil.get_dates()

    with open('config/reports.json') as reports_file:
        reports_json = json.load(reports_file)
        last_report_update = time()
Beispiel #4
0
def plot_dist_xtime(identifier="164952497", window_size=seconds_in_month):
    """
    :param identifier: id of user
    :param model: method for calculating frequency
    :param window_size: in seconds or number of messages
    :return: activity of first user and the second
    """
    dates.create_single_file(identifier)
    plt.figure(figsize=(10, 7.), dpi=200)
    diffs, timing = dates.making_difference_sorted(
        dates.get_directed_dates(identifier, from_me=True))
    # ans will return activity of user1 in this case, x axis is time and y is activity
    # with sliding window approach
    ans = build_with_xtime(
        timing, diffs, window_size)  # window_size is seconds only for xtime
    x_user1 = np.array(ans[0])
    y_user1 = np.array(ans[1])
    plt.plot(x_user1, y_user1, label="User1 activity", lw=2.)

    diffs, timing = dates.making_difference_sorted(
        dates.get_directed_dates(identifier, from_me=False))
    # ans will return activity of user2 in this case, x axis is time and y is activity
    # with sliding window approach
    ans = build_with_xtime(
        timing, diffs, window_size)  # window_size is seconds only for xtime
    x_user2 = np.array(ans[0])
    y_user2 = np.array(ans[1])
    plt.plot(x_user2, y_user2, label="User2 activity", lw=2.)
    print(x_user1)
    diffs, timing = dates.making_difference_sorted(dates.get_dates(identifier))
    ans = build_with_xtime(
        timing, diffs, window_size)  # window_size is seconds only for xtime
    plt.plot(ans[0], ans[1], label="Summary activity", lw=2.)

    plt.legend()
    plt.xlabel("months", fontsize=27)
    plt.ylabel("frequency", fontsize=27)
    plt.show()
    plt.figure(figsize=(10, 7.), dpi=200)
    y = np.interp(x_user2, x_user1, y_user1) - y_user2
    plt.plot(x_user2, y, label="difference in activity", lw=2.)
    plt.xlabel("months", fontsize=27)
    plt.ylabel("difference", fontsize=27)
    #print("vk.com/id{id} игнорит {count} сообщений в месяцn\n".format(id=identifier, count=np.mean(y)*window_size))
    plt.show()
    return [x_user1, y_user1], [x_user2, y_user2]
Beispiel #5
0
def pull_data():
    dates_list = get_dates(LAST_DAY)

    for date in dates_list:
        csv_url = DATA_URL + date + '.csv'
        json_file = OUTPUT_PATH + date + '.json'

        with closing(urlopen(csv_url)) as infile:
            with open(json_file, 'w') as outfile:

                data = infile.read().decode('ascii', 'ignore')
                datafile = StringIO(data)
                dict_reader = csv.DictReader(datafile)

                outfile.write('[\n')

                for row in dict_reader:
                    # only write US states to output
                    if row['Province/State'] in LIST_OF_STATES:
                        outfile.write(json.dumps(row, indent=4))
                        outfile.write(',\n')

                outfile.write(']\n')
Beispiel #6
0
## Adds your timetable from `data.txt` to Google Calendar.
from __future__ import print_function
import os

import json
import datetime
import sys
import re
from icalendar import Calendar, Event

import dates
WORKING_DAYS = dates.get_dates()

import build_event

import argparse
import getpass

parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input")
parser.add_argument("-o", "--output")
args = parser.parse_args()

DEBUG = False
GENERATE_ICS = True
TIMETABLE_DICT_RE = '([0-9]{1,2}):([0-9]{1,2}):([AP])M-([0-9]{1,2}):([0-9]{1,2}):([AP])M'
timetable_dict_parser = re.compile(TIMETABLE_DICT_RE)

INPUT_FILENAME = args.input if args.input else "data.txt"
if not os.path.exists(INPUT_FILENAME):
    print("Input file", INPUT_FILENAME, "does not exist.")
Beispiel #7
0
 def test_get_dates(self):
     result = dates.get_dates(self.start_date, self.end_date)
     assert result == self.dates
Beispiel #8
0
import os

import json
import datetime
import sys

# this script works only with Python 3
if sys.version_info[0] != 3:
    print ("This script works only with Python 3")
    sys.exit(1)

import re
from icalendar import Calendar, Event

import dates
WORKING_DAYS = dates.get_dates()

import build_event
from update_subjects_json  import update_sub_list

import argparse
import getpass

parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input")
parser.add_argument("-o", "--output")
args = parser.parse_args()

DEBUG = False
GENERATE_ICS = True
TIMETABLE_DICT_RE ='([0-9]{1,2}):([0-9]{1,2}):([AP])M-([0-9]{1,2}):([0-9]{1,2}):([AP])M'
Beispiel #9
0
import yaml

from functions import get_airports_raw_data, get_connections_from_stations_data, execute_request
from dates import get_dates

print('Starting main.py to run Ryanpy!')

RequestOneFlight = namedtuple('RequestOneFlight', ['orig', 'dest', 'date'])

with open("Config.yml", 'r') as ymlfile:
    config = yaml.load(ymlfile, Loader=yaml.SafeLoader)

date_from, date_to, duration = config['dates']['fromdate'], config['dates']['todate'], config['dates']['duration']
departure_airports = config['airports']['departureairports']

date_list = get_dates(date_from, date_to)
flight_list = []

data_conections = get_connections_from_stations_data(get_airports_raw_data())
#print(data_conections['FKB'])
for i in range(len(departure_airports)):
    print("Looking for connections starting form: {}.".format(departure_airports[i]))
    print("All destinations: {}.".format(data_conections[departure_airports[i]]))
    #TODO: create request - which can given as paramater to function
    for item in data_conections[departure_airports[i]]:
        print("Looking for connections starting form: {} to {}.".format(departure_airports[i], item))
        list_two_way = []
        requestOneFlight = RequestOneFlight(
            orig=departure_airports[i],
            dest=item,
            date=date_list[0]
def test_get_dates():
  data = dates.get_dates()
  assert len(data) == 4
  for row in data:
    assert len(row) == 3
    assert isinstance(row[1], datetime.date)
Beispiel #11
0
def aggregate_netcdf(update_only=True,
                     start_date=None,
                     end_date=None,
                     smips=False,
                     accessg=False):

    if smips or accessg:
        if smips:
            aggregate_file = aggregated_smips
            path = settings.SMIPS_DEST_PATH
            if not end_date:
                end_date = settings.yesterday
            files = settings.smips_filename
        elif accessg:
            aggregate_file = aggregated_access_g
            path = settings.ACCESS_G_PATH
            if not end_date:
                end_date = datetime.date.today()
            files = settings.access_g_filename
        else:
            return print('Run with smips=True or accessg=True')

        if update_only:
            if not start_date:
                if accessg:
                    nc = xr.open_dataset(path + aggregate_file,
                                         decode_times=False)
                    latest = nc.time.values[-1]
                    start = datetime.date(1900, 1, 1)
                    start_date = start + datetime.timedelta(
                        int(latest)) + datetime.timedelta(days=1)
                    nc.close()
                    if start_date >= datetime.date.today():
                        return print(
                            'ACCESS-G aggregation is already up to date')

                elif smips:
                    nc = xr.open_dataset(path + aggregate_file)
                    latest = nc.time.values[-1]
                    start_date = convert_date(latest) + datetime.timedelta(
                        days=1)
                    nc.close()
                    if start_date >= settings.yesterday:
                        return print('SMIPS aggregation is already up to date')

            dates = get_dates(start_date=start_date, end_date=end_date)
            files = [path + files(date) for date in dates]

        else:
            if smips:
                files = [file for file in glob.glob(path + '*/*.nc')]
            elif accessg:
                files = [
                    file for file in glob.glob(path + '*/*12.nc')
                ]  # there's one file in the access-g directories that's called cdo.nc

        if len(files) <= 0:
            return print('File aggregation is up to date')
        add_to_netcdf_cube_from_files(end_date=end_date,
                                      cubename=aggregate_file,
                                      files=files)