コード例 #1
0
ファイル: utils.py プロジェクト: OasisWang/yatsm
def csvfile_to_dataframe(input_file, date_format='%Y%j'):
    """ Return sorted filenames of images from input text file

    Args:
      input_file (str): text file of dates and files
      date_format (str): format of dates in file

    Returns:
      pd.DataFrame: pd.DataFrame of dates, sensor IDs, and filenames

    """
    df = pd.read_csv(input_file)

    # Guess and convert date field
    date_col = [i for i, n in enumerate(df.columns) if 'date' in n.lower()]
    if not date_col:
        raise KeyError('Could not find date column in input file')
    if len(date_col) > 1:
        logger.warning('Multiple date columns found in input CSV file. '
                       'Using %s' % df.columns[date_col[0]])
    date_col = df.columns[date_col[0]]

    df[date_col] = pd.to_datetime(
        df[date_col], format=date_format).map(lambda x: dt.toordinal(x))

    return df
コード例 #2
0
ファイル: utils.py プロジェクト: valpasq/yatsm
def csvfile_to_dataframe(input_file, date_format='%Y%j'):
    """ Return sorted filenames of images from input text file

    Args:
      input_file (str): text file of dates and files
      date_format (str): format of dates in file

    Returns:
      pd.DataFrame: pd.DataFrame of dates, sensor IDs, and filenames

    """
    df = pd.read_csv(input_file)

    # Guess and convert date field
    date_col = [i for i, n in enumerate(df.columns) if 'date' in n.lower()]
    if not date_col:
        raise KeyError('Could not find date column in input file')
    if len(date_col) > 1:
        logger.warning('Multiple date columns found in input CSV file. '
                       'Using %s' % df.columns[date_col[0]])
    date_col = df.columns[date_col[0]]

    df[date_col] = pd.to_datetime(
        df[date_col], format=date_format).map(lambda x: dt.toordinal(x))

    return df
コード例 #3
0
ファイル: cache.py プロジェクト: jmorton/yatsm
def read_cache_file(cache_filename, image_IDs=None):
    """ Returns image data from a cache file

    If `image_IDs` is not None this function will try to ensure data from cache
    file come from the list of image IDs provided. If cache file does not
    contain a list of image IDs, it will skip the check and return cache data.

    Args:
      cache_filename (str): cache filename
      image_IDs (iterable, optional): list of image IDs corresponding to data
        in cache file. If not specified, function will not check for
        correspondence (default: None)

    Returns:
      np.ndarray, or None: Return Y as np.ndarray if possible and if the
        cache file passes the consistency check specified by `image_IDs`; else
        None

    """
    try:
        cache = np.load(cache_filename)
    except IOError:
        return None

    if _image_ID_str in cache.files and image_IDs is not None:
        if not np.array_equal(image_IDs, cache[_image_ID_str]):
            logger.warning('Cache file data in {f} do not match images '
                           'specified'.format(f=cache_filename))
            return None

    return cache['Y']
コード例 #4
0
ファイル: cache.py プロジェクト: nmatton/yatsm
def read_cache_file(cache_filename, image_IDs=None):
    """ Returns image data from a cache file

    If ``image_IDs`` is not None this function will try to ensure data from
    cache file come from the list of image IDs provided. If cache file does not
    contain a list of image IDs, it will skip the check and return cache data.

    Args:
        cache_filename (str): cache filename
        image_IDs (iterable, optional): list of image IDs corresponding to data
            in cache file. If not specified, function will not check for
            correspondence (default: None)

    Returns:
        np.ndarray, or None: Return Y as np.ndarray if possible and if the
            cache file passes the consistency check specified by ``image_IDs``,
            else None

    """
    try:
        cache = np.load(cache_filename)
    except IOError:
        return None

    if _image_ID_str in cache.files and image_IDs is not None:
        if not np.array_equal(image_IDs, cache[_image_ID_str]):
            logger.warning('Cache file data in {f} do not match images '
                           'specified'.format(f=cache_filename))
            return None

    return cache['Y']
コード例 #5
0
ファイル: cache.py プロジェクト: jmorton/yatsm
def test_cache(dataset_config):
    """ Test cache directory for ability to read from or write to

    Args:
      dataset_config (dict): dictionary of dataset configuration options

    Returns:
      (read_cache, write_cache): tuple of bools describing ability to read from
        and write to cache directory

    """
    # Try to find / use cache
    read_cache = False
    write_cache = False

    cache_dir = dataset_config.get('cache_line_dir')
    if cache_dir:
        # Test existence
        if os.path.isdir(cache_dir):
            if os.access(cache_dir, os.R_OK):
                read_cache = True
            if os.access(cache_dir, os.W_OK):
                write_cache = True
            if read_cache and not write_cache:
                logger.warning('Cache directory exists but is not writable')
        else:
            # If it doesn't already exist, can we create it?
            try:
                os.makedirs(cache_dir)
            except:
                logger.warning('Could not create cache directory')
            else:
                read_cache = True
                write_cache = True

    logger.debug(
        'Attempt reading in from cache directory?: {b}'.format(b=read_cache))
    logger.debug(
        'Attempt writing to cache directory?: {b}'.format(b=write_cache))

    return read_cache, write_cache
コード例 #6
0
ファイル: cache.py プロジェクト: nmatton/yatsm
def test_cache(dataset_config):
    """ Test cache directory for ability to read from or write to

    Args:
        dataset_config (dict): dictionary of dataset configuration options

    Returns:
        tuple: tuple of bools describing ability to read from and write to
            cache directory

    """
    # Try to find / use cache
    read_cache = False
    write_cache = False

    cache_dir = dataset_config.get('cache_line_dir')
    if cache_dir:
        # Test existence
        if os.path.isdir(cache_dir):
            if os.access(cache_dir, os.R_OK):
                read_cache = True
            if os.access(cache_dir, os.W_OK):
                write_cache = True
            if read_cache and not write_cache:
                logger.warning('Cache directory exists but is not writable')
        else:
            # If it doesn't already exist, can we create it?
            try:
                os.makedirs(cache_dir)
            except:
                logger.warning('Could not create cache directory')
            else:
                read_cache = True
                write_cache = True

    logger.debug('Attempt reading in from cache directory?: {b}'.format(
        b=read_cache))
    logger.debug('Attempt writing to cache directory?: {b}'.format(
        b=write_cache))

    return read_cache, write_cache
コード例 #7
0
ファイル: config_parser.py プロジェクト: jmorton/yatsm
def parse_config_file(config_file):
    """ Parses config file into dictionary of attributes """

    config = configparser.ConfigParser(allow_no_value=True)
    config.readfp(StringIO.StringIO(defaults))
    config.read(config_file)

    version = config.get('metadata', 'version')

    # Warn on difference in minor or major version
    mm_config_version = version.split('.')[0:2]
    mm_yatsm_version = __version__.split('.')[0:2]
    if mm_config_version[0] != mm_yatsm_version[0] or \
            mm_config_version[1] != mm_yatsm_version[1]:
        logger.warning('Config file version does not match YATSM version')
        logger.warning('    config file: v{v}'.format(v=version))
        logger.warning('    YATSM: v{v}'.format(v=__version__))

    dataset_config = {}
    yatsm_config = {}

    dataset_config.update(parse_dataset_config(config))
    yatsm_config.update(parse_algorithm_config(config))
    yatsm_config.update(parse_phenology_config(config))
    dataset_config.update(parse_classification_config(config))

    return (dataset_config, yatsm_config)
コード例 #8
0
ファイル: config_parser.py プロジェクト: jmorton/yatsm
def parse_config_file(config_file):
    """ Parses config file into dictionary of attributes """

    config = configparser.ConfigParser(allow_no_value=True)
    config.readfp(StringIO.StringIO(defaults))
    config.read(config_file)

    version = config.get('metadata', 'version')

    # Warn on difference in minor or major version
    mm_config_version = version.split('.')[0:2]
    mm_yatsm_version = __version__.split('.')[0:2]
    if mm_config_version[0] != mm_yatsm_version[0] or \
            mm_config_version[1] != mm_yatsm_version[1]:
        logger.warning('Config file version does not match YATSM version')
        logger.warning('    config file: v{v}'.format(v=version))
        logger.warning('    YATSM: v{v}'.format(v=__version__))

    dataset_config = {}
    yatsm_config = {}

    dataset_config.update(parse_dataset_config(config))
    yatsm_config.update(parse_algorithm_config(config))
    yatsm_config.update(parse_phenology_config(config))
    dataset_config.update(parse_classification_config(config))

    return (dataset_config, yatsm_config)
コード例 #9
0
ファイル: utils.py プロジェクト: OasisWang/yatsm
def iter_records(records, warn_on_empty=False, yield_filename=False):
    """ Iterates over records, returning result NumPy array

    Args:
      records (list): List containing filenames of results
      warn_on_empty (bool, optional): Log warning if result contained no
        result records (default: False)
      yield_filename (bool, optional): Yield the filename and the record

    Yields:
      np.ndarray or tuple: Result saved in record and the filename, if desired

    """
    n_records = len(records)

    for _i, r in enumerate(records):
        # Verbose progress
        if np.mod(_i, 100) == 0:
            logger.debug('{0:.1f}%'.format(_i / n_records * 100))
        # Open output
        try:
            rec = np.load(r)['record']
        except (ValueError, AssertionError, IOError) as e:
            logger.warning('Error reading a result file (may be corrupted) '
                           '({}): {}'.format(r, str(e)))
            continue

        if rec.shape[0] == 0:
            # No values in this file
            if warn_on_empty:
                logger.warning('Could not find results in {f}'.format(f=r))
            continue

        if yield_filename:
            yield rec, r
        else:
            yield rec
コード例 #10
0
ファイル: utils.py プロジェクト: valpasq/yatsm
def iter_records(records, warn_on_empty=False, yield_filename=False):
    """ Iterates over records, returning result NumPy array

    Args:
      records (list): List containing filenames of results
      warn_on_empty (bool, optional): Log warning if result contained no
        result records (default: False)
      yield_filename (bool, optional): Yield the filename and the record

    Yields:
      np.ndarray or tuple: Result saved in record and the filename, if desired

    """
    n_records = len(records)

    for _i, r in enumerate(records):
        # Verbose progress
        if np.mod(_i, 100) == 0:
            logger.debug('{0:.1f}%'.format(_i / n_records * 100))
        # Open output
        try:
            rec = np.load(r)['record']
        except (ValueError, AssertionError, IOError) as e:
            logger.warning('Error reading a result file (may be corrupted) '
                           '({}): {}'.format(r, str(e)))
            continue

        if rec.shape[0] == 0:
            # No values in this file
            if warn_on_empty:
                logger.warning('Could not find results in {f}'.format(f=r))
            continue

        if yield_filename:
            yield rec, r
        else:
            yield rec
コード例 #11
0
ファイル: cache.py プロジェクト: jmorton/yatsm
def update_cache_file(images,
                      image_IDs,
                      old_cache_filename,
                      new_cache_filename,
                      line,
                      reader,
                      reader_kwargs={}):
    """ Modify an existing cache file to contain data within `images`

    This should be useful for updating a set of cache files to reflect
    modifications to the timeseries dataset without completely reading the
    data into another cache file.

    For example, the cache file could be updated to reflect the deletion of
    a misregistered or cloudy image. Another common example would be for
    updating cache files to include newly acquired observations.

    Note that this updater will not handle updating cache files to include
    new bands.

    Args:
      images (iterable): list of new image filenames
      image_IDs (iterable): list of new image identifying strings
      old_cache_filename (str): filename of cache file to update
      new_cache_filename (str): filename of new cache file which includes
        modified data
      line (int): the line of data to be updated
      reader (callable): GDAL or BIP image reader function from `yatsm.readers`
      reader_kwargs (dict): additional keyword arguments for `reader` other
        than the filenames to read and the line to read

    Raises:
      ValueError: Raise error if old cache file does not record `image_IDs`

    """
    images = np.asarray(images)
    image_IDs = np.asarray(image_IDs)

    # Cannot proceed if old cache file doesn't store filenames
    old_cache = np.load(old_cache_filename)
    if _image_ID_str not in old_cache.files:
        raise ValueError('Cannot update cache.'
                         'Old cache file does not store image IDs.')
    old_IDs = old_cache[_image_ID_str]
    old_Y = old_cache['Y']
    nband, _, ncol = old_Y.shape

    # Create new Y and add in values retained from old cache
    new_Y = np.zeros((nband, image_IDs.size, ncol), dtype=old_Y.dtype.type)
    new_IDs = np.zeros(image_IDs.size, dtype=image_IDs.dtype)

    # Check deletions -- find which indices to retain in new cache
    retain_old = np.where(np.in1d(old_IDs, image_IDs))[0]
    if retain_old.size == 0:
        logger.warning('No image IDs in common in old cache file.')
    else:
        logger.debug('    retaining {r} of {n} images'.format(
            r=retain_old.size, n=old_IDs.size))
        # Find indices of old data to insert into new data
        idx_old_IDs = np.argsort(old_IDs)
        sorted_old_IDs = old_IDs[idx_old_IDs]
        idx_IDs = np.searchsorted(sorted_old_IDs,
                                  image_IDs[np.in1d(image_IDs, old_IDs)])

        retain_old = idx_old_IDs[idx_IDs]

        # Indices to insert into new data
        retain_new = np.where(np.in1d(image_IDs, old_IDs))[0]

        new_Y[:, retain_new, :] = old_Y[:, retain_old, :]
        new_IDs[retain_new] = old_IDs[retain_old]

    # Check additions -- find which indices we need to insert
    insert = np.where(np.in1d(image_IDs, old_IDs) == False)[0]

    if retain_old.size == 0 and insert.size == 0:
        raise ValueError('Cannot update cache file -- '
                         'no data retained or added')

    # Read in the remaining data from disk
    if insert.size > 0:
        logger.debug(
            'Inserting {n} new images into cache'.format(n=insert.size))
        insert_Y = reader(images[insert], line, **reader_kwargs)
        new_Y[:, insert, :] = insert_Y
        new_IDs[insert] = image_IDs[insert]

    np.testing.assert_equal(new_IDs, image_IDs)

    # Save
    write_cache_file(new_cache_filename, new_Y, image_IDs)
コード例 #12
0
ファイル: cache.py プロジェクト: nmatton/yatsm
def update_cache_file(images, image_IDs,
                      old_cache_filename, new_cache_filename,
                      line, reader):
    """ Modify an existing cache file to contain data within `images`

    This should be useful for updating a set of cache files to reflect
    modifications to the timeseries dataset without completely reading the
    data into another cache file.

    For example, the cache file could be updated to reflect the deletion of
    a misregistered or cloudy image. Another common example would be for
    updating cache files to include newly acquired observations.

    Note that this updater will not handle updating cache files to include
    new bands.

    Args:
        images (iterable): list of new image filenames
        image_IDs (iterable): list of new image identifying strings
        old_cache_filename (str): filename of cache file to update
        new_cache_filename (str): filename of new cache file which includes
            modified data
        line (int): the line of data to be updated
        reader (callable): GDAL or BIP image reader function from
            :mod:`yatsm.io.stack_line_readers`

    Raises:
        ValueError: Raise error if old cache file does not record ``image_IDs``

    """
    images = np.asarray(images)
    image_IDs = np.asarray(image_IDs)

    # Cannot proceed if old cache file doesn't store filenames
    old_cache = np.load(old_cache_filename)
    if _image_ID_str not in old_cache.files:
        raise ValueError('Cannot update cache.'
                         'Old cache file does not store image IDs.')
    old_IDs = old_cache[_image_ID_str]
    old_Y = old_cache['Y']
    nband, _, ncol = old_Y.shape

    # Create new Y and add in values retained from old cache
    new_Y = np.zeros((nband, image_IDs.size, ncol),
                     dtype=old_Y.dtype.type)
    new_IDs = np.zeros(image_IDs.size, dtype=image_IDs.dtype)

    # Check deletions -- find which indices to retain in new cache
    retain_old = np.where(np.in1d(old_IDs, image_IDs))[0]
    if retain_old.size == 0:
        logger.warning('No image IDs in common in old cache file.')
    else:
        logger.debug('    retaining {r} of {n} images'.format(
            r=retain_old.size, n=old_IDs.size))
        # Find indices of old data to insert into new data
        idx_old_IDs = np.argsort(old_IDs)
        sorted_old_IDs = old_IDs[idx_old_IDs]
        idx_IDs = np.searchsorted(sorted_old_IDs,
                                  image_IDs[np.in1d(image_IDs, old_IDs)])

        retain_old = idx_old_IDs[idx_IDs]

        # Indices to insert into new data
        retain_new = np.where(np.in1d(image_IDs, old_IDs))[0]

        new_Y[:, retain_new, :] = old_Y[:, retain_old, :]
        new_IDs[retain_new] = old_IDs[retain_old]

    # Check additions -- find which indices we need to insert
    insert = np.where(np.in1d(image_IDs, old_IDs, invert=True))[0]

    if retain_old.size == 0 and insert.size == 0:
        raise ValueError('Cannot update cache file -- '
                         'no data retained or added')

    # Read in the remaining data from disk
    if insert.size > 0:
        logger.debug('Inserting {n} new images into cache'.format(
            n=insert.size))
        insert_Y = reader.read_row(images[insert], line)
        new_Y[:, insert, :] = insert_Y
        new_IDs[insert] = image_IDs[insert]

    np.testing.assert_equal(new_IDs, image_IDs)

    # Save
    write_cache_file(new_cache_filename, new_Y, image_IDs)