Example #1
0
def get_date(date):
    '''
    parses dates from strange
    '''
    if pd.isnull(date):
        return

    parsed = parsedate(date)

    if parsed:
        if parsed.year < 2017:
            return

        parsed = pd.to_datetime(parsed)
    else:
        for repl in replace_dates:
            date = re.sub(repl[0], repl[1], date)
        parsed = pd.to_datetime(parsedate(date))
        if not parsed:
            return now

    if parsed.tz:
        if now.tz_convert(parsed.tz) < parsed:
            parsed = parsed.tz_localize(None).tz_localize('Europe/Kiev')
    else:
        parsed = parsed.tz_localize('Europe/Kiev')

    parsed = parsed.tz_convert('Europe/Kiev')

    if parsed > (now - pd.DateOffset(days=1)) and parsed <= now:
        return parsed
Example #2
0
def test_mktimerange_annual():

    assert mktimerange(TimeResolution.ANNUAL, parsedate('2019')) == \
           (Timestamp('2019-01-01 00:00:00'), Timestamp('2019-12-31 00:00:00'))

    assert mktimerange(TimeResolution.ANNUAL, parsedate('2010'), parsedate('2020')) == \
           (Timestamp('2010-01-01 00:00:00'), Timestamp('2020-12-31 00:00:00'))
Example #3
0
def test_mktimerange_monthly():

    assert mktimerange(TimeResolution.MONTHLY, parsedate('2020-05')) == \
           (Timestamp('2020-05-01 00:00:00'), Timestamp('2020-05-31 00:00:00'))

    assert mktimerange(TimeResolution.MONTHLY, parsedate('2017-01'), parsedate('2019-12')) == \
           (Timestamp('2017-01-01 00:00:00'), Timestamp('2019-12-31 00:00:00'))
Example #4
0
 def fn(datestr):
     logger.debug("Calculating the age of '%s'", datestr)
     try:
         ret = datetime.now() - parsedate(str(datestr))
     except TypeError:
         ret = datetime.now(timezone.utc) - parsedate(str(datestr))
     logger.debug("Age of '%s' is %s", datestr, repr(ret))
     return ret
Example #5
0
def run():
    """
    Usage:
      dwd stations --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--format=<format>]
      dwd readings --station=<station> --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--date=<date>] [--format=<format>]
      dwd about [parameters] [resolutions] [periods]
      dwd --version
      dwd (-h | --help)

    Options:
      --station=<station>           Comma-separated list of station identifiers
      --parameter=<parameter>       Parameter/variable, e.g. "kl", "air_temperature", "precipitation", etc.
      --resolution=<resolution>     Dataset resolution: "annual", "monthly", "daily", "hourly", "minute_10", "minute_1"
      --period=<period>             Dataset period: "historical", "recent", "now"
      --persist                     Save and restore data to filesystem w/o going to the network
      --date=<date>                 Date for filtering data. Can be either a single date(time) or
                                    an ISO-8601 time interval, see https://en.wikipedia.org/wiki/ISO_8601#Time_intervals.
      --format=<format>             Output format. [Default: json]
      --version                     Show version information
      --debug                       Enable debug messages
      -h --help                     Show this screen


    Examples:

      # Get list of stations for daily climate summary data in JSON format
      dwd stations --parameter=kl --resolution=daily --period=recent

      # Get list of stations for daily climate summary data in CSV format
      dwd stations --parameter=kl --resolution=daily --period=recent --format=csv

      # Get daily climate summary data for stations 44 and 1048
      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent

      # Optionally save/restore to/from disk in order to avoid asking upstream servers each time
      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --persist

      # Limit output to specific date
      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01

      # Limit output to specified date range in ISO-8601 time interval format
      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01/2020-05-05

      # The real power horse: Acquire data across historical+recent data sets
      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=historical,recent --date=1969-01-01/2020-06-11

      # Acquire monthly data for 2020-05
      dwd readings --station=44,1048 --parameter=kl --resolution=monthly --period=recent,historical --date=2020-05

      # Acquire monthly data from 2017-01 to 2019-12
      dwd readings --station=44,1048 --parameter=kl --resolution=monthly --period=recent,historical --date=2017-01/2019-12

      # Acquire annual data for 2019
      dwd readings --station=44,1048 --parameter=kl --resolution=annual --period=recent,historical --date=2019

      # Acquire annual data from 2010 to 2020
      dwd readings --station=44,1048 --parameter=kl --resolution=annual --period=recent,historical --date=2010/2020

    """

    # Read command line options.
    options = normalize_options(
        docopt(run.__doc__, version=f'dwd {__version__}'))

    # Setup logging.
    debug = options.get('debug')
    log_level = logging.INFO
    if debug:
        log_level = logging.DEBUG
    setup_logging(log_level)

    if options.about:
        about(options)
        return

    if options.stations:
        df = metadata_for_dwd_data(
            parameter=options.parameter,
            time_resolution=options.resolution,
            period_type=options.period,
            write_file=options.persist,
        )

    elif options.readings:
        request = DWDStationRequest(
            station_ids=read_list(options.station),
            # TODO: Would like to say "climate_summary" instead of "kl" here.
            parameter=options.parameter,
            time_resolution=options.resolution,
            period_type=read_list(options.period),
            humanize_column_names=True,
        )
        data = request.collect_data(
            write_file=options.persist,
            prefer_local=options.persist,
        )
        data = list(data)
        if not data:
            log.error('No data available for given constraints')
            sys.exit(1)
        df = pd.concat(data)

    if options.readings:

        # Filter by station.
        #print(df[df['STATION_ID'] == 1048])

        if options.date:

            # Filter by time interval.
            if '/' in options.date:
                date_from, date_to = options.date.split('/')
                date_from = parsedate(date_from)
                date_to = parsedate(date_to)
                if request.time_resolution in (TimeResolution.ANNUAL,
                                               TimeResolution.MONTHLY):
                    date_from, date_to = mktimerange(request.time_resolution,
                                                     date_from, date_to)
                    expression = (
                        date_from <= df[DWDMetaColumns.FROM_DATE.value]) & (
                            df[DWDMetaColumns.TO_DATE.value] <= date_to)
                else:
                    expression = (date_from <=
                                  df[DWDMetaColumns.DATE.value]) & (
                                      df[DWDMetaColumns.DATE.value] <= date_to)
                df = df[expression]

            # Filter by date.
            else:
                date = parsedate(options.date)
                if request.time_resolution in (TimeResolution.ANNUAL,
                                               TimeResolution.MONTHLY):
                    date_from, date_to = mktimerange(request.time_resolution,
                                                     date)
                    expression = (
                        date_from <= df[DWDMetaColumns.FROM_DATE.value]) & (
                            df[DWDMetaColumns.TO_DATE.value] <= date_to)
                else:
                    expression = (date == df[DWDMetaColumns.DATE.value])
                df = df[expression]

    # Make column names lowercase.
    df = df.rename(columns=str.lower)

    # Output as JSON.
    if options.format == 'json':
        output = df.to_json(orient='records', date_format='iso', indent=4)

    # Output as CSV.
    elif options.format == 'csv':
        output = df.to_csv(index=False, date_format='%Y-%m-%dT%H-%M-%S')

    # Output as XLSX.
    elif options.format == 'excel':
        # TODO: Obtain output file name from command line.
        log.info('Writing "output.xlsx"')
        df.to_excel('output.xlsx', index=False)
        return

    else:
        log.error('Output format must be one of "json", "csv", "excel".')
        sys.exit(1)

    print(output)
Example #6
0
def test_mktimerange_invalid():

    with pytest.raises(NotImplementedError):
        mktimerange(TimeResolution.DAILY, parsedate('2020-05-01'))