def directory_to_streams(directory, config=None):
    """Read in a directory of data to a list of streams.

    Note:
    If the directory only includes files that are readable by this library
    then the task is rather simple. However, often times data directories
    include random subdirectories and/or zip files, which we try to crawl in
    a sensible fashion.

    Args:
        directory (str):
            Directory of ground motion files (streams).
        config (dict):
            Configuration options.

    Returns:
        tuple: (List of obspy streams,
                List of unprocessed files,
                List of errors associated with trying to read unprocessed
                files).
    """
    # Use a temp dir so that we don't modify data on disk since that may not be
    # expected or desired in all cases. We create the temporary directory in
    # the parent directory, which permits using shutil.copytree to duplicate
    # the data prior to processing.
    intermediate_dir = tempfile.mkdtemp(dir=os.path.dirname(directory))
    temp_dir = os.path.join(intermediate_dir, "directory_to_streams")
    try:
        shutil.copytree(directory, temp_dir)
        flatten_directory(temp_dir)
        # ---------------------------------------------------------------------
        # Read streams
        # ---------------------------------------------------------------------
        streams = []
        unprocessed_files = []
        unprocessed_file_errors = []
        for file_path in glob.glob(os.path.join(temp_dir, "*")):
            file_name = os.path.basename(file_path)
            file_ext = os.path.splitext(file_name)[1].lower()
            if file_ext not in EXT_IGNORE:
                try:
                    logging.debug(f"Attempting to read: {file_path}")
                    streams += read_data(file_path, config=config)
                except BaseException as ex:
                    logging.info(f"Failed to read file: {file_name}")
                    unprocessed_files += [file_path]
                    unprocessed_file_errors += [ex]

    except BaseException as e:
        raise e
    finally:
        try:
            shutil.rmtree(intermediate_dir)
        except OSError:
            shutil.rmtree(intermediate_dir)

    return streams, unprocessed_files, unprocessed_file_errors
Exemple #2
0
def directory_to_streams(directory):
    """Read in a directory of data to a list of streams.

    Note:
    If the directory only includes files that are readable by this library
    then the task is rather simple. However, often times data directories
    include random subdirectories and/or zip files, which we try to crawl in
    a sensible fashion.

    Args:
        directory (str):
            Directory of ground motion files (streams).

    Returns:
        tuple: (List of obspy streams,
                List of unprocessed files,
                List of errors associated with trying to read unprocessed
                files).
    """
    # Use a temp dir so that we don't modify data on disk since that may not be
    # expected or desired in all cases.
    temp_dir = os.path.join(tempfile.mkdtemp(), 'directory_to_streams')
    try:
        shutil.copytree(directory, temp_dir)
        flatten_directory(temp_dir)
        # -------------------------------------------------------------------------
        # Read streams
        # -------------------------------------------------------------------------
        streams = []
        unprocessed_files = []
        unprocessed_file_errors = []
        for file_path in glob.glob(os.path.join(temp_dir, "*")):
            file_ext = os.path.splitext(file_path)[1].lower()
            if file_ext not in EXT_IGNORE:
                try:
                    logging.debug('Attempting to read: %s' % file_path)
                    streams += read_data(file_path)
                except Exception as ex:
                    unprocessed_files += [file_path]
                    unprocessed_file_errors += [ex]

    except Exception as e:
        raise e
    finally:
        shutil.rmtree(temp_dir)

    return streams, unprocessed_files, unprocessed_file_errors
def directory_to_streams(directory):
    """Read in a directory of data to a list of streams.

    Note:
    If the directory only includes files that are readable by this library
    then the task is rather simple. However, often times data directories
    include random subdirectories and/or zip files, which we try to crawl in
    a sensible fashion.

    Args:
        directory (str):
            Directory of ground motion files (streams).

    Returns:
        tuple: (List of obspy streams,
                List of unprocessed files,
                List of errors associated with trying to read unprocessed
                files).
    """

    # Use a temp dir so that we don't modify data on disk since that may not be
    # expected or desired in all cases.
    temp_dir = os.path.join(tempfile.mkdtemp(), 'directory_to_streams')
    try:
        shutil.copytree(directory, temp_dir)
        flatten_directory(temp_dir)
        # -------------------------------------------------------------------------
        # Read streams
        # -------------------------------------------------------------------------
        streams = []
        unprocessed_files = []
        unprocessed_file_errors = []
        for file_path in glob.glob(os.path.join(temp_dir, "*")):
            file_ext = os.path.splitext(file_path)[1].lower()
            if file_ext not in EXT_IGNORE:
                try:
                    logging.debug('Attempting to read: %s' % file_path)
                    streams += read_data(file_path)
                except Exception as ex:
                    unprocessed_files += [file_path]
                    unprocessed_file_errors += [ex]
    except Exception as e:
        raise e
    finally:
        shutil.rmtree(temp_dir)

    return streams, unprocessed_files, unprocessed_file_errors