def get_date(date): ''' parses dates from strange ''' if pd.isnull(date): return parsed = parsedate(date) if parsed: if parsed.year < 2017: return parsed = pd.to_datetime(parsed) else: for repl in replace_dates: date = re.sub(repl[0], repl[1], date) parsed = pd.to_datetime(parsedate(date)) if not parsed: return now if parsed.tz: if now.tz_convert(parsed.tz) < parsed: parsed = parsed.tz_localize(None).tz_localize('Europe/Kiev') else: parsed = parsed.tz_localize('Europe/Kiev') parsed = parsed.tz_convert('Europe/Kiev') if parsed > (now - pd.DateOffset(days=1)) and parsed <= now: return parsed
def test_mktimerange_annual(): assert mktimerange(TimeResolution.ANNUAL, parsedate('2019')) == \ (Timestamp('2019-01-01 00:00:00'), Timestamp('2019-12-31 00:00:00')) assert mktimerange(TimeResolution.ANNUAL, parsedate('2010'), parsedate('2020')) == \ (Timestamp('2010-01-01 00:00:00'), Timestamp('2020-12-31 00:00:00'))
def test_mktimerange_monthly(): assert mktimerange(TimeResolution.MONTHLY, parsedate('2020-05')) == \ (Timestamp('2020-05-01 00:00:00'), Timestamp('2020-05-31 00:00:00')) assert mktimerange(TimeResolution.MONTHLY, parsedate('2017-01'), parsedate('2019-12')) == \ (Timestamp('2017-01-01 00:00:00'), Timestamp('2019-12-31 00:00:00'))
def fn(datestr): logger.debug("Calculating the age of '%s'", datestr) try: ret = datetime.now() - parsedate(str(datestr)) except TypeError: ret = datetime.now(timezone.utc) - parsedate(str(datestr)) logger.debug("Age of '%s' is %s", datestr, repr(ret)) return ret
def run(): """ Usage: dwd stations --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--format=<format>] dwd readings --station=<station> --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--date=<date>] [--format=<format>] dwd about [parameters] [resolutions] [periods] dwd --version dwd (-h | --help) Options: --station=<station> Comma-separated list of station identifiers --parameter=<parameter> Parameter/variable, e.g. "kl", "air_temperature", "precipitation", etc. --resolution=<resolution> Dataset resolution: "annual", "monthly", "daily", "hourly", "minute_10", "minute_1" --period=<period> Dataset period: "historical", "recent", "now" --persist Save and restore data to filesystem w/o going to the network --date=<date> Date for filtering data. Can be either a single date(time) or an ISO-8601 time interval, see https://en.wikipedia.org/wiki/ISO_8601#Time_intervals. --format=<format> Output format. [Default: json] --version Show version information --debug Enable debug messages -h --help Show this screen Examples: # Get list of stations for daily climate summary data in JSON format dwd stations --parameter=kl --resolution=daily --period=recent # Get list of stations for daily climate summary data in CSV format dwd stations --parameter=kl --resolution=daily --period=recent --format=csv # Get daily climate summary data for stations 44 and 1048 dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent # Optionally save/restore to/from disk in order to avoid asking upstream servers each time dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --persist # Limit output to specific date dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01 # Limit output to specified date range in ISO-8601 time interval format dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01/2020-05-05 # The real power horse: Acquire data across historical+recent data sets dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=historical,recent --date=1969-01-01/2020-06-11 # Acquire monthly data for 2020-05 dwd readings --station=44,1048 --parameter=kl --resolution=monthly --period=recent,historical --date=2020-05 # Acquire monthly data from 2017-01 to 2019-12 dwd readings --station=44,1048 --parameter=kl --resolution=monthly --period=recent,historical --date=2017-01/2019-12 # Acquire annual data for 2019 dwd readings --station=44,1048 --parameter=kl --resolution=annual --period=recent,historical --date=2019 # Acquire annual data from 2010 to 2020 dwd readings --station=44,1048 --parameter=kl --resolution=annual --period=recent,historical --date=2010/2020 """ # Read command line options. options = normalize_options( docopt(run.__doc__, version=f'dwd {__version__}')) # Setup logging. debug = options.get('debug') log_level = logging.INFO if debug: log_level = logging.DEBUG setup_logging(log_level) if options.about: about(options) return if options.stations: df = metadata_for_dwd_data( parameter=options.parameter, time_resolution=options.resolution, period_type=options.period, write_file=options.persist, ) elif options.readings: request = DWDStationRequest( station_ids=read_list(options.station), # TODO: Would like to say "climate_summary" instead of "kl" here. parameter=options.parameter, time_resolution=options.resolution, period_type=read_list(options.period), humanize_column_names=True, ) data = request.collect_data( write_file=options.persist, prefer_local=options.persist, ) data = list(data) if not data: log.error('No data available for given constraints') sys.exit(1) df = pd.concat(data) if options.readings: # Filter by station. #print(df[df['STATION_ID'] == 1048]) if options.date: # Filter by time interval. if '/' in options.date: date_from, date_to = options.date.split('/') date_from = parsedate(date_from) date_to = parsedate(date_to) if request.time_resolution in (TimeResolution.ANNUAL, TimeResolution.MONTHLY): date_from, date_to = mktimerange(request.time_resolution, date_from, date_to) expression = ( date_from <= df[DWDMetaColumns.FROM_DATE.value]) & ( df[DWDMetaColumns.TO_DATE.value] <= date_to) else: expression = (date_from <= df[DWDMetaColumns.DATE.value]) & ( df[DWDMetaColumns.DATE.value] <= date_to) df = df[expression] # Filter by date. else: date = parsedate(options.date) if request.time_resolution in (TimeResolution.ANNUAL, TimeResolution.MONTHLY): date_from, date_to = mktimerange(request.time_resolution, date) expression = ( date_from <= df[DWDMetaColumns.FROM_DATE.value]) & ( df[DWDMetaColumns.TO_DATE.value] <= date_to) else: expression = (date == df[DWDMetaColumns.DATE.value]) df = df[expression] # Make column names lowercase. df = df.rename(columns=str.lower) # Output as JSON. if options.format == 'json': output = df.to_json(orient='records', date_format='iso', indent=4) # Output as CSV. elif options.format == 'csv': output = df.to_csv(index=False, date_format='%Y-%m-%dT%H-%M-%S') # Output as XLSX. elif options.format == 'excel': # TODO: Obtain output file name from command line. log.info('Writing "output.xlsx"') df.to_excel('output.xlsx', index=False) return else: log.error('Output format must be one of "json", "csv", "excel".') sys.exit(1) print(output)
def test_mktimerange_invalid(): with pytest.raises(NotImplementedError): mktimerange(TimeResolution.DAILY, parsedate('2020-05-01'))