Exemple #1
0
    def test_scan_function_and_scanner_class(self, all_files, image_path):
        """
        Test scan function and Scanner class (in one test to keep overhead of
        copying files down)
        """
        scanner = Scanner()
        path1 = image_path.parent / 'scan1.png'
        path2 = image_path.parent / 'scan2.png'
        # Copy files to a temp folder to avoid wildcard scans.
        with TemporaryWorkingDirectory():
            for filename in all_files:
                shutil.copy(filename, os.curdir)

            scanner.parse(os.curdir)
            scan(paths=os.curdir, plot=path1)

        scanner.plot(path2)
Exemple #2
0
    def test_scan_function_and_scanner_class(self):
        """
        Test scan function and Scanner class (in one test to keep overhead of
        copying files down)
        """
        scanner = Scanner()
        # Copy files to a temp folder to avoid wildcard scans.
        with TemporaryWorkingDirectory():
            for filename in self.all_files:
                shutil.copy(filename, os.curdir)

            scanner.parse(os.curdir)

            with ImageComparison(self.path, 'scan.png') as ic:
                scan(paths=os.curdir, plot=ic.name)

        with ImageComparison(self.path, 'scan.png') as ic:
            scanner.plot(ic.name)
Exemple #3
0
def main(argv=None):
    MatplotlibBackend.switch_backend("AGG", sloppy=False)
    parser = ArgumentParser(
        prog='obspy-sds-report', description=__doc__,
        formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument(
        '-r', '--sds-root', dest='sds_root', required=True,
        help='Root folder of SDS archive.')
    parser.add_argument(
        '-o', '--output', dest='output', required=True,
        help='Full path (absolute or relative) of output files, without '
             'suffix (e.g. ``/tmp/sds_report``).')
    parser.add_argument(
        '-u', '--update', dest='update', default=False, action="store_true",
        help='Only update latency information, reuse previously computed list '
             'of streams to check and data percentage and gap count. Many '
             'other options e.g. regarding stream selection (``--id``, '
             ' ``--location``, ..) and time span of data quality checks '
             '(``--check-quality-days``) will be without effect if this '
             'option is specified. Only updating latency is significantly '
             'faster than a full analysis run, a normal use case is to do a '
             'full run once or twice per day and update latency every 5 or '
             'ten minutes. An exception is raised if an update is specified '
             'but the necessary file is not yet present.')
    parser.add_argument(
        '-l', '--location', dest='locations', action="append",
        help='Location codes to look for (e.g. ``""`` for empty location code '
             'or ``"00"``). This option can be provided multiple times and '
             'must be specified at least once for a full run (i.e. without '
             '``--update`` option). While network/station combinations are '
             'automatically discovered, only streams whose location codes are '
             'provided here will be discovered and taken into account and '
             'ultimately displayed.')
    parser.add_argument(
        '-c', '--channel', dest='channels', action="append",
        help='Channel codes to look for (e.g. specified three times with '
             '``HHZ``, ``EHZ`` and ``ELZ`` to cover all stations that serve '
             'a broad-band, short-period or low gain vertical channel). '
             'This option can be provided multiple times and must be '
             'specified at least once for a full run (i.e. without '
             '``--update`` option). Only one stream per '
             'network/station/location combination will be displayed, '
             'selected by the lowest latency.')
    parser.add_argument(
        '-i', '--id', dest='ids', action="append", default=[],
        help='SEED IDs of streams that should be included in addition to the '
             'autodiscovery of streams controlled by ``--location`` and '
             '``--channel`` options (e.g. ``IU.ANMO..LHZ``). '
             'This option can be provided multiple times.')
    parser.add_argument(
        '--skip', dest='skip', action="append", default=[],
        help='Networks or stations that should be skipped (e.g. ``IU`` or '
             '``IU.ANMO``). This option can be provided multiple times.')
    parser.add_argument(
        '-f', '--format', default="MSEED", choices=ENTRY_POINTS['waveform'],
        help='Waveform format of SDS archive. Should be "MSEED" in most '
             'cases. Use ``None`` or empty string for format autodection '
             '(slower and should not be necessary in most all cases). '
             'Warning: formats that do not support ``headonly`` '
             'option in ``read()`` operation will be significantly slower).')
    parser.add_argument(
        '--check-backwards-days', dest='check_back_days', default=30,
        type=int, help='Check for latency backwards for this many days.')
    parser.add_argument(
        '--check-quality-days', dest='check_quality_days', default=7,
        type=int, help='Calculate and plot data availability and number of '
                       'gaps for a period of this many days.')
    parser.add_argument(
        '--latency-warn', dest='latency_warn', default=3600,
        type=float, help='Latency warning threshold in seconds.')
    parser.add_argument(
        '--latency-warn-color', dest='latency_warn_color', default="#FFFF33",
        help='Latency warning threshold color (valid HTML color string).')
    parser.add_argument(
        '--latency-error', dest='latency_error', default=24*3600,
        type=float, help='Latency error threshold in seconds.')
    parser.add_argument(
        '--latency-error-color', dest='latency_error_color', default="#E41A1C",
        help='Latency error threshold color (valid HTML color string).')
    parser.add_argument(
        '--percentage-warn', dest='percentage_warn', default=99.5, type=float,
        help='Data availability percentage warning threshold (``0`` to '
             '``100``).')
    parser.add_argument(
        '--gaps-warn', dest='gaps_warn', default=20, type=int,
        help='Gap/overlap number warning threshold.')
    parser.add_argument(
        '--data-quality-warn-color', dest='data_quality_warn_color',
        default="#377EB8",
        help='Data quality (percentage/gap count) warning color '
             '(valid HTML color string).')
    parser.add_argument(
        '--outdated-color', dest='outdated_color', default="#808080",
        help='Color for streams that have no data in check range '
             '(valid HTML color string).')
    parser.add_argument(
        '--ok-color', dest='ok_color', default="#4DAF4A",
        help='Color for streams that pass all checks (valid HTML color '
             'string).')
    parser.add_argument(
        '--background-color', dest='background_color', default="#999999",
        help='Color for background of page (valid HTML color string).')
    parser.add_argument(
        '-V', '--version', action='version', version='%(prog)s ' + __version__)

    args = parser.parse_args(argv)

    now = UTCDateTime()
    stop_time = now - args.check_back_days * 24 * 3600
    client = Client(args.sds_root)
    dtype_streamfile = np.dtype("U10, U30, U10, U10, f8, f8, i8")
    availability_check_endtime = now - 3600
    availability_check_starttime = (
        availability_check_endtime - (args.check_quality_days * 24 * 3600))
    streams_file = args.output + ".txt"
    html_file = args.output + ".html"
    scan_file = args.output + ".png"
    if args.format.upper() == "NONE" or args.format == "":
        args.format = None

    # check whether to set up list of streams to check or use existing list
    # update list of streams once per day at nighttime
    if args.update:
        if not os.path.isfile(streams_file):
            msg = ("Update flag specified, but no output of previous full run "
                   "was present in the expected location (as determined by "
                   "``--output`` flag: {})").format(streams_file)
            raise IOError(msg)
        # use existing list of streams and availability information, just
        # update latency
        nslc = np.loadtxt(streams_file, delimiter=",", dtype=dtype_streamfile)
    else:
        if not args.locations or not args.channels:
            msg = ("At least one location code ``--location`` and at least "
                   "one channel code ``--channel`` must be specified.")
            raise ObsPyException(msg)
        nsl = set()
        # get all network/station combinations in SDS archive
        for net, sta in client.get_all_stations():
            if net in args.skip or ".".join((net, sta)) in args.skip:
                continue
            # for all combinations of user specified location and channel codes
            # check if data is in SDS archive
            for loc in args.locations:
                for cha in args.channels:
                    if client.has_data(net, sta, loc, cha):
                        # for now omit channel information, we only include the
                        # channel with lowest latency later on
                        nsl.add((net, sta, loc))
                        break
        nsl = sorted(nsl)
        nslc = []
        # determine which channel to check for each network/station/location
        # combination
        for net, sta, loc in nsl:
            latency = []
            # check latency of all channels that should be checked
            for cha in args.channels:
                latency_ = client.get_latency(net, sta, loc, cha,
                                              stop_time=stop_time)
                latency.append(latency_ or np.inf)
            # only include the channel with lowest latency in our stream list
            cha = args.channels[np.argmin(latency)]
            latency = np.min(latency)
            nslc.append((net, sta, loc, cha, latency))
        for id in args.ids:
            net, sta, loc, cha = id.split(".")
            latency = client.get_latency(net, sta, loc, cha,
                                         stop_time=stop_time)
            latency = latency or np.inf
            nslc.append((net, sta, loc, cha, latency))
        nslc_ = []
        # request and assemble availability information.
        # this takes pretty long (on network/slow file systems),
        # so we only do it during a full run here, not during update
        for net, sta, loc, cha, latency in nslc:
            percentage, gap_count = client.get_availability_percentage(
                net, sta, loc, cha, availability_check_starttime,
                availability_check_endtime)
            nslc_.append((net, sta, loc, cha, latency, percentage, gap_count))
        nslc = nslc_
        # write stream list and availability information to file
        nslc = np.array(sorted(nslc), dtype=dtype_streamfile)
        np.savetxt(streams_file, nslc, delimiter=",",
                   fmt=["%s", "%s", "%s", "%s", "%f", "%f", "%d"])
        # generate obspy-scan image
        files = []
        seed_ids = set()
        for nslc_ in nslc:
            net, sta, loc, cha, latency, _, _ = nslc_
            if np.isinf(latency) or latency > args.check_back_days * 24 * 3600:
                continue
            seed_ids.add(".".join((net, sta, loc, cha)))
            files += client._get_filenames(
                net, sta, loc, cha, availability_check_starttime,
                availability_check_endtime)
        scan(files, format=args.format, starttime=availability_check_starttime,
             endtime=availability_check_endtime, plot=scan_file, verbose=False,
             recursive=True, ignore_links=False, seed_ids=seed_ids,
             print_gaps=False)

    # request and assemble current latency information
    data = []
    for net, sta, loc, cha, latency, percentage, gap_count in nslc:
        if args.update:
            latency = client.get_latency(net, sta, loc, cha,
                                         stop_time=stop_time)
            latency = latency or np.inf
        data.append((net, sta, loc, cha, latency, percentage, gap_count))

    # separate out the long dead streams
    data_normal = []
    data_outdated = []
    for data_ in data:
        latency = data_[4]
        if np.isinf(latency) or latency > args.check_back_days * 24 * 3600:
            data_outdated.append(data_)
        else:
            data_normal.append(data_)

    # write html output to file
    html = _format_html(args, data_normal, data_outdated)
    with open(html_file, "wt") as fh:
        fh.write(html)
Exemple #4
0
def main(argv=None):
    MatplotlibBackend.switch_backend("AGG", sloppy=False)
    parser = ArgumentParser(
        prog='obspy-sds-report', description=__doc__,
        formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument(
        '-r', '--sds-root', dest='sds_root', required=True,
        help='Root folder of SDS archive.')
    parser.add_argument(
        '-o', '--output', dest='output', required=True,
        help='Full path (absolute or relative) of output files, without '
             'suffix (e.g. ``/tmp/sds_report``).')
    parser.add_argument(
        '-u', '--update', dest='update', default=False, action="store_true",
        help='Only update latency information, reuse previously computed list '
             'of streams to check and data percentage and gap count. Many '
             'other options e.g. regarding stream selection (``--id``, '
             ' ``--location``, ..) and time span of data quality checks '
             '(``--check-quality-days``) will be without effect if this '
             'option is specified. Only updating latency is significantly '
             'faster than a full analysis run, a normal use case is to do a '
             'full run once or twice per day and update latency every 5 or '
             'ten minutes. An exception is raised if an update is specified '
             'but the necessary file is not yet present.')
    parser.add_argument(
        '-l', '--location', dest='locations', action="append",
        help='Location codes to look for (e.g. ``""`` for empty location code '
             'or ``"00"``). This option can be provided multiple times and '
             'must be specified at least once for a full run (i.e. without '
             '``--update`` option). While network/station combinations are '
             'automatically discovered, only streams whose location codes are '
             'provided here will be discovered and taken into account and '
             'ultimately displayed.')
    parser.add_argument(
        '-c', '--channel', dest='channels', action="append",
        help='Channel codes to look for (e.g. specified three times with '
             '``HHZ``, ``EHZ`` and ``ELZ`` to cover all stations that serve '
             'a broad-band, short-period or low gain vertical channel). '
             'This option can be provided multiple times and must be '
             'specified at least once for a full run (i.e. without '
             '``--update`` option). Only one stream per '
             'network/station/location combination will be displayed, '
             'selected by the lowest latency.')
    parser.add_argument(
        '-i', '--id', dest='ids', action="append", default=[],
        help='SEED IDs of streams that should be included in addition to the '
             'autodiscovery of streams controlled by ``--location`` and '
             '``--channel`` options (e.g. ``IU.ANMO..LHZ``). '
             'This option can be provided multiple times.')
    parser.add_argument(
        '--skip', dest='skip', action="append", default=[],
        help='Networks or stations that should be skipped (e.g. ``IU`` or '
             '``IU.ANMO``). This option can be provided multiple times.')
    parser.add_argument(
        '-f', '--format', default="MSEED", choices=ENTRY_POINTS['waveform'],
        help='Waveform format of SDS archive. Should be "MSEED" in most '
             'cases. Use ``None`` or empty string for format autodection '
             '(slower and should not be necessary in most all cases). '
             'Warning: formats that do not support ``headonly`` '
             'option in ``read()`` operation will be significantly slower).')
    parser.add_argument(
        '--check-backwards-days', dest='check_back_days', default=30,
        type=int, help='Check for latency backwards for this many days.')
    parser.add_argument(
        '--check-quality-days', dest='check_quality_days', default=7,
        type=int, help='Calculate and plot data availability and number of '
                       'gaps for a period of this many days.')
    parser.add_argument(
        '--latency-warn', dest='latency_warn', default=3600,
        type=float, help='Latency warning threshold in seconds.')
    parser.add_argument(
        '--latency-warn-color', dest='latency_warn_color', default="#FFFF33",
        help='Latency warning threshold color (valid HTML color string).')
    parser.add_argument(
        '--latency-error', dest='latency_error', default=24 * 3600,
        type=float, help='Latency error threshold in seconds.')
    parser.add_argument(
        '--latency-error-color', dest='latency_error_color', default="#E41A1C",
        help='Latency error threshold color (valid HTML color string).')
    parser.add_argument(
        '--percentage-warn', dest='percentage_warn', default=99.5, type=float,
        help='Data availability percentage warning threshold (``0`` to '
             '``100``).')
    parser.add_argument(
        '--gaps-warn', dest='gaps_warn', default=20, type=int,
        help='Gap/overlap number warning threshold.')
    parser.add_argument(
        '--data-quality-warn-color', dest='data_quality_warn_color',
        default="#377EB8",
        help='Data quality (percentage/gap count) warning color '
             '(valid HTML color string).')
    parser.add_argument(
        '--outdated-color', dest='outdated_color', default="#808080",
        help='Color for streams that have no data in check range '
             '(valid HTML color string).')
    parser.add_argument(
        '--ok-color', dest='ok_color', default="#4DAF4A",
        help='Color for streams that pass all checks (valid HTML color '
             'string).')
    parser.add_argument(
        '--background-color', dest='background_color', default="#999999",
        help='Color for background of page (valid HTML color string).')
    parser.add_argument(
        '-V', '--version', action='version', version='%(prog)s ' + __version__)

    args = parser.parse_args(argv)

    now = UTCDateTime()
    stop_time = now - args.check_back_days * 24 * 3600
    client = Client(args.sds_root)
    dtype_streamfile = np.dtype("U10, U30, U10, U10, f8, f8, i8")
    availability_check_endtime = now - 3600
    availability_check_starttime = (
        availability_check_endtime - (args.check_quality_days * 24 * 3600))
    streams_file = args.output + ".txt"
    html_file = args.output + ".html"
    scan_file = args.output + ".png"
    if args.format.upper() == "NONE" or args.format == "":
        args.format = None

    # check whether to set up list of streams to check or use existing list
    # update list of streams once per day at nighttime
    if args.update:
        if not Path(streams_file).is_file():
            msg = ("Update flag specified, but no output of previous full run "
                   "was present in the expected location (as determined by "
                   "``--output`` flag: {})").format(streams_file)
            raise IOError(msg)
        # use existing list of streams and availability information, just
        # update latency
        nslc = np.loadtxt(streams_file, delimiter=",", dtype=dtype_streamfile)
    else:
        if not args.locations or not args.channels:
            msg = ("At least one location code ``--location`` and at least "
                   "one channel code ``--channel`` must be specified.")
            raise ObsPyException(msg)
        nsl = set()
        # get all network/station combinations in SDS archive
        for net, sta in client.get_all_stations():
            if net in args.skip or ".".join((net, sta)) in args.skip:
                continue
            # for all combinations of user specified location and channel codes
            # check if data is in SDS archive
            for loc in args.locations:
                for cha in args.channels:
                    if client.has_data(net, sta, loc, cha):
                        # for now omit channel information, we only include the
                        # channel with lowest latency later on
                        nsl.add((net, sta, loc))
                        break
        nsl = sorted(nsl)
        nslc = []
        # determine which channel to check for each network/station/location
        # combination
        for net, sta, loc in nsl:
            latency = []
            # check latency of all channels that should be checked
            for cha in args.channels:
                latency_ = client.get_latency(
                    net, sta, loc, cha, stop_time=stop_time,
                    check_has_no_data=False)
                latency.append(latency_ or np.inf)
            # only include the channel with lowest latency in our stream list
            cha = args.channels[np.argmin(latency)]
            latency = np.min(latency)
            nslc.append((net, sta, loc, cha, latency))
        for id in args.ids:
            net, sta, loc, cha = id.split(".")
            latency = client.get_latency(
                net, sta, loc, cha, stop_time=stop_time,
                check_has_no_data=False)
            latency = latency or np.inf
            nslc.append((net, sta, loc, cha, latency))
        nslc_ = []
        # request and assemble availability information.
        # this takes pretty long (on network/slow file systems),
        # so we only do it during a full run here, not during update
        for net, sta, loc, cha, latency in nslc:
            percentage, gap_count = client.get_availability_percentage(
                net, sta, loc, cha, availability_check_starttime,
                availability_check_endtime)
            nslc_.append((net, sta, loc, cha, latency, percentage, gap_count))
        nslc = nslc_
        # write stream list and availability information to file
        nslc = np.array(sorted(nslc), dtype=dtype_streamfile)
        np.savetxt(streams_file, nslc, delimiter=",",
                   fmt=["%s", "%s", "%s", "%s", "%f", "%f", "%d"])
        # generate obspy-scan image
        files = []
        seed_ids = set()
        for nslc_ in nslc:
            net, sta, loc, cha, latency, _, _ = nslc_
            if np.isinf(latency) or latency > args.check_back_days * 24 * 3600:
                continue
            seed_ids.add(".".join((net, sta, loc, cha)))
            files += client._get_filenames(
                net, sta, loc, cha, availability_check_starttime,
                availability_check_endtime)
        scan(files, format=args.format, starttime=availability_check_starttime,
             endtime=availability_check_endtime, plot=scan_file, verbose=False,
             recursive=True, ignore_links=False, seed_ids=seed_ids,
             print_gaps=False)

    # request and assemble current latency information
    data = []
    for net, sta, loc, cha, latency, percentage, gap_count in nslc:
        if args.update:
            latency = client.get_latency(
                net, sta, loc, cha, stop_time=stop_time,
                check_has_no_data=False)
            latency = latency or np.inf
        data.append((net, sta, loc, cha, latency, percentage, gap_count))

    # separate out the long dead streams
    data_normal = []
    data_outdated = []
    for data_ in data:
        latency = data_[4]
        if np.isinf(latency) or latency > args.check_back_days * 24 * 3600:
            data_outdated.append(data_)
        else:
            data_normal.append(data_)

    # write html output to file
    html = _format_html(args, data_normal, data_outdated)
    with open(html_file, "wt") as fh:
        fh.write(html)