예제 #1
0
def test_calc_grid_standard(lons, lats):
    shape = np.meshgrid(lons,lats)[0].shape
    target_grid = calc_grid(lons, lats, decimals=4)
    assert type(target_grid) == dict
    assert target_grid['mask'].shape == shape
예제 #2
0
def vic2nc(options, global_atts, domain_dict, fields):
    """ Convert ascii VIC files to netCDF format"""

    # determine run mode
    if (options['memory_mode'] == 'standard') \
            and (options['chunksize'] in ['all', 'All', 'ALL', 0]):
        memory_mode = 'big_memory'
    else:
        memory_mode = options['memory_mode']

    print("\n-------------------------------")
    print("Configuration File Options")
    print("-------------OPTIONS-------------")
    for pair in options.items():
        print("{0}: {1}".format(*pair))
    print('Fields: {0}'.format(", ".join(fields.keys())))
    if domain_dict:
        print("-------------DOMAIN--------------")
        for pair in domain_dict.items():
            print("{0}: {1}".format(*pair))
    print("--------GLOBAL_ATTRIBUTES--------")
    for pair in global_atts.items():
        print("{0}: {1}".format(*pair))
    print("--------RUN MODE--------")
    print('Memory Mode: {0}'.format(memory_mode))
    if memory_mode == 'standard':
        print('Chunksize={0}'.format(options['chunksize']))
    print("---------------------------------\n")
    # ---------------------------------------------------------------- #

    # ---------------------------------------------------------------- #
    # Make output directory
    if not os.path.exists(options['out_directory']):
        os.makedirs(options['out_directory'])
    # ---------------------------------------------------------------- #

    # ---------------------------------------------------------------- #
    # Make pairs (i.e. find inds)
    files = glob(options['input_files'])
    points = get_file_coords(files)
    # ---------------------------------------------------------------- #

    # ---------------------------------------------------------------- #
    # Get target grid information
    if domain_dict:
        domain = read_domain(domain_dict)
        target_grid_file = path.split(domain_dict['filename'])[1]
        global_atts['target_grid_file'] = target_grid_file
    else:
        # must be a regular grid, build from file names
        domain = calc_grid(points.get_lats(), points.get_lons())
        target_grid_file = None
        domain_dict = {'y_x_dims': ['lat', 'lon']}
    # ---------------------------------------------------------------- #

    # ---------------------------------------------------------------- #
    # Get grid index locations
    points = get_grid_inds(domain, points)
    # ---------------------------------------------------------------- #

    # ---------------------------------------------------------------- #
    # Get timestamps
    if options['input_file_format'].lower() == 'ascii':
        if ('bin_start_date' in options
            and 'bin_end_date' in options
                and 'bin_dt_sec' in options):
            vic_datelist, vic_ordtime = make_dates(
                options['bin_start_date'],
                options['bin_end_date'],
                options['bin_dt_sec'],
                calendar=options['calendar'])
        else:
            vic_datelist = get_dates(files[0])
            vic_ordtime = date2num(vic_datelist, TIMEUNITS,
                                   calendar=options['calendar'])

    elif options['input_file_format'].lower() in ['binary', 'netcdf']:
        vic_datelist, vic_ordtime = make_dates(options['bin_start_date'],
                                               options['bin_end_date'],
                                               options['bin_dt_sec'],
                                               calendar=options['calendar'])

    else:
        raise ValueError('Unknown input file format: {}. Valid options are \
                         ascii or binary'.format(options['input_file_format']))
    # ---------------------------------------------------------------- #

    # ---------------------------------------------------------------- #
    # Determine time segmentation
    if options['start_date']:
        start_date = datetime.strptime(options['start_date'], TIMESTAMPFORM)
        if start_date < vic_datelist[0]:
            print("WARNING: Start date in configuration file is before "
                  "first date in file.")
            start_date = vic_datelist[0]
            print('WARNING: New start date is {0}'.format(start_date))
    else:
        start_date = vic_datelist[0]

    if options['end_date']:
        end_date = datetime.strptime(options['end_date'], TIMESTAMPFORM)
        if end_date > vic_datelist[-1]:
            print("WARNING: End date in configuration file is after "
                  "last date in file.")
            end_date = vic_datelist[-1]
            print('WARNING: New end date is {0}'.format(end_date))
    else:
        end_date = vic_datelist[-1]

    # Ordinal Time
    start_ord = date2num(start_date, TIMEUNITS, calendar=options['calendar'])
    end_ord = date2num(end_date, TIMEUNITS, calendar=options['calendar'])

    print("netCDF Start Date: {0}".format(start_date))
    print("netCDF End Date: {0}".format(end_date))

    segment_dates = []
    if options['time_segment'] == 'day':
        # calendar insensitive
        num_segments = np.ceil(end_ord - start_ord)
        if start_date.hour == 0:
            segment_dates = num2date(np.arange(start_ord, end_ord + 1, 1),
                                     TIMEUNITS, calendar=options['calendar'])
        else:
            # allow start at time other than 0
            temp = [start_ord].append(np.arange(np.ceil(start_ord),
                                      end_ord + 1, 1))
            segment_dates = num2date(temp, TIMEUNITS,
                                     calendar=options['calendar'])
    elif options['time_segment'] == 'month':
        num_segments = (end_date.year - start_date.year) * 12 \
            + end_date.month - start_date.month + 1
        month = start_date.month
        year = start_date.year
        for i in pyrange(num_segments + 1):
            segment_dates.append(datetime(year, month, 1))
            month += 1
            if month == 13:
                month = 1
                year += 1
    elif options['time_segment'] == 'year':
        num_segments = end_date.year - start_date.year + 1
        year = start_date.year
        for i in pyrange(num_segments + 1):
            segment_dates.append(datetime(year, 1, 1))
            year += 1
    elif options['time_segment'] == 'decade':
        num_segments = (end_date.year - start_date.year) / 10 + 1
        year = start_date.year
        for i in pyrange(num_segments + 1):
            segment_dates.append(datetime(year, 1, 1))
            year += 10
    elif options['time_segment'] == 'all':
        num_segments = 1
        segment_dates = [start_date, end_date]
    else:
        raise ValueError('Unknown timesegment options \
                         {0}'.format(options['time_segment']))
    print("Number of files: {0}".format(len(segment_dates) - 1))
    assert len(segment_dates) == num_segments + 1

    # Make sure the first and last dates are start/end_date
    segment_dates[0] = start_date
    segment_dates[-1] = end_date + timedelta(minutes=1)
    # ---------------------------------------------------------------- #

    # ---------------------------------------------------------------- #
    # Setup Segments
    segments = deque()

    for num in pyrange(num_segments):
        # Segment time bounds
        t0 = segment_dates[num]
        t1 = segment_dates[num + 1]

        # Get segment inds
        i0 = bisect_left(vic_datelist, t0)
        i1 = bisect_left(vic_datelist, t1)

        # Make segment filename (with path)
        if options['time_segment'] == 'day':
            filename = "{0}.{1}.nc".format(options['out_file_prefix'],
                                           t0.strftime('%Y-%m-%d'))
        elif options['time_segment'] == 'month':
            filename = "{0}.{1}.nc".format(options['out_file_prefix'],
                                           t0.strftime('%Y-%m'))
        elif options['time_segment'] == 'year':
            filename = "{0}.{1}.nc".format(options['out_file_prefix'],
                                           t0.strftime('%Y'))
        elif options['time_segment'] == 'all':
            filename = "{0}.{1}-{2}.nc".format(options['out_file_prefix'],
                                               t0.strftime('%Y%m%d'),
                                               t1.strftime('%Y%m%d'))

        filename = path.join(options['out_directory'], filename)

        # Setup segment and initialize netcdf
        segment = Segment(num, i0, i1, options['out_file_format'],
                          filename, memory_mode=memory_mode)
        segment.nc_globals(**global_atts)
        segment.nc_time(t0, t1, vic_ordtime, options['calendar'])
        segment.nc_dimensions(snow_bands=options['snow_bands'],
                              veg_tiles=options['veg_tiles'],
                              soil_layers=options['soil_layers'])

        segment.nc_domain(domain)
        segment.nc_fields(fields,
                          domain_dict['y_x_dims'], options['precision'])

        print(repr(segment))
        segments.append(segment)
    # ---------------------------------------------------------------- #

    # ---------------------------------------------------------------- #
    # Get column numbers and names (will help speed up reading)
    names = []
    usecols = []
    dtypes = []
    bin_dtypes = []
    bin_mults = []

    if options['precision'] == 'double':
        prec = NC_DOUBLE
    else:
        prec = NC_FLOAT

    for name, field in fields.items():

        if not np.isscalar(field['column']):
            # multiple levels
            for i, col in enumerate(field['column']):
                names.append(name + str(i))
                usecols.append(col)
            if 'type' in field:
                if type(field['type']) == list:
                    dtypes.extend(field['type'])
                else:
                    dtypes.extend([field['type']] * len(field['column']))
            else:
                dtypes.append([prec] * len(field['column']))

            if options['input_file_format'].lower() == 'binary':
                if 'bin_dtype' in field:
                    if type(field['bin_dtype']) == list:
                        bin_dtypes.extend(field['bin_dtype'])
                    else:
                        bin_dtypes.extend([field['bin_dtype']] *
                                          len(field['column']))
                else:
                    raise ValueError('bin_dtype not in field: {}'.format(name))

                if 'bin_mult' in field:
                    if type(field['bin_mult']) == list:
                        bin_mults.extend(field['bin_mult'])
                    else:
                        bin_mults.extend([field['bin_mult']] *
                                         len(field['column']))
                else:
                    bin_mults.extend([1.0] * len(field['column']))
        else:
            # no levels
            names.append(name)
            usecols.append(field['column'])

            if 'type' in field:
                dtypes.append(field['type'])
            else:
                dtypes.append(prec)

            if options['input_file_format'].lower() == 'binary':
                if 'bin_dtype' in field:
                    bin_dtypes.append(field['bin_dtype'])
                else:
                    raise ValueError('bin_dtype not in field: {}'.format(name))

                if 'bin_mult' in field:
                    bin_mults.append(field['bin_mult'])
                else:
                    bin_mults.append(1.0)

    print('setting point attributes (fileformat, names, usecols, and dtypes)')
    # pandas.read_table does not 'honor' the order of the columns in usecols
    # it simply uses them in ascending order. So the names need to be sorted
    # the same way. For example, if the columns in the VIC file are:
    # 3: prcp; 4: evap; 5: runoff; 6; baseflow; 7: sm1; 8: sm2; 9: sm3; 10: swe
    # and this is parsed from the configuration file as
    # usecols = [3, 4, 5, 6, 10, 7, 8, 9]
    # names=['prcp', 'evap', 'runoff', 'baseflow', 'swe', 'sm1', 'sm2', 'sm3']
    # then without sorting, the netcdf file will have the wrong variables:
    # nc_swe will contain sm1, nc_sm1 will contain sm2, nc_sm2: sm3 and
    # nc_swe: sm3
    # the following will ensure that the names are sorted in increasing column
    # order. Note that sorted(usecols) is not strictly necessary, since
    # apparently that is done in read_table, but it keeps the names and columns
    # in the same order
    names = [x for (y, x) in sorted(pyzip(usecols, names))]
    usecols = sorted(usecols)
    points.set_names(names)
    points.set_usecols(usecols)
    points.set_dtypes(dtypes)
    # set binary attributes
    if options['input_file_format'].lower() == 'binary':
        points.set_bin_dtypes(bin_dtypes)
        points.set_bin_mults(bin_mults)
    points.set_fileformat(options['input_file_format'])
    print('done')
    # ---------------------------------------------------------------- #

    # ---------------------------------------------------------------- #
    if memory_mode == 'big_memory':
        # ------------------------------------------------------------ #
        # run in big memory mode
        for i, segment in enumerate(segments):
            segments[i].allocate()

        while points:
            point = points.popleft()
            point.open()
            point.read()
            point.close()

            for segment in segments:
                segment.nc_add_data_to_array(point)

        for segment in segments:
            segment.nc_write_data_from_array()
            segment.nc_close()
        # ------------------------------------------------------------ #

    elif memory_mode == 'standard':
        # ------------------------------------------------------------ #
        # Open VIC files and put data into netcdfs

        chunk = Plist()
        while points:
            point = points.popleft()
            point.open()
            point.read()
            point.close()
            chunk.append(point)
            if len(chunk) > int(options['chunksize']) or len(points) == 0:
                for segment in segments:
                    segment.nc_add_data_standard(chunk)
                chunk = Plist()
            del point
        # ------------------------------------------------------------ #

        # ------------------------------------------------------------ #
        # Close the netcdf files
        for segment in segments:
            segment.nc_close()
        # ------------------------------------------------------------ #
    elif memory_mode == 'original':
        # ------------------------------------------------------------ #
        # Run in original memory mode (a.k.a. vic2nc.c mode)
        # Open all files
        for point in points:
            point.open()

        while segments:
            segment = segments.popleft()
            segment.allocate()
            count = segment.count

            for point in points:
                point.read(count)
                segment.nc_add_data_to_array(point)

            segment.nc_write_data_from_array()
            segment.nc_close()

        for point in points:
            point.close()
        # ------------------------------------------------------------ #

    return
예제 #3
0
def test_calc_grid_standard(lons, lats):
    shape = np.meshgrid(lons, lats)[0].shape
    target_grid = calc_grid(lons, lats, decimals=4)
    assert type(target_grid) == dict
    assert target_grid['mask'].shape == shape