Python add_bad_indexes Examples, dquality.common.report.add_bad_indexes Python Examples

Example #1

0

Show file

File: real_time.py Project: bfrosik/data-quality

def verify(conf, type = 'data', report_file=None):
    """
    HDF file structure verifier.

    Parameters
    ----------
    conf : str
        configuration file name, including path

    type : str
        a string characterizung the data type (i.e. data_dark, data_white or data), defaulted to 'data'

    report_file : file
        a file where the report will be written, defaulted to None, if no report wanted

    Returns
    -------
    boolean

    """
    logger, limits, quality_checks, feedback, feedback_pv, report_type = init(conf)

    aggregateq = Queue()
    args = limits[type], aggregateq, quality_checks, feedback, feedback_pv
    feed.feed_data(conf, logger, *args)

    bad_indexes = {}
    aggregate = aggregateq.get()

    if report_file is not None:
        report.report_results(logger, aggregate, type, None, report_file, report_type)
    report.add_bad_indexes(aggregate, type, bad_indexes)

    return bad_indexes

Example #2

0

Show file

    def verify(self, config, report_file=None, sequence = None):
        """
        This function starts real time verification process according to the given configuration.

        This function reads configuration and initiates variables accordingly.
        It creates a Feed instance and starts data_feed and waits to receive results in aggregateq.
        The results are then written into a report file.

        Parameters
        ----------
        conf : str
            configuration file name, including path

        report_file : file
            a file where the report will be written, defaulted to None, if no report wanted

        sequence : list or int
            information about data sequence or number of frames

        Returns
        -------
        boolean

        """
        def get_decor(qc):
            decor = {}
            qc = [6]
            if const.QUALITYCHECK_RATE_SAT in qc:
                decor[const.QUALITYCHECK_RATE_SAT] = detector + ":" + detector_basic +":AcquireTime"
            return decor

        logger, limits, quality_checks, feedback, report_type, consumers = init(config)
        no_frames, aggregate_limit, detector, detector_basic, detector_image = adapter.parse_config(config)

        aggregateq = Queue()

        # address the special cases of quality checks when additional arguments are required
        decor = get_decor(quality_checks)
        if len(decor) is 0:
            self.feed = Feed()
        else:
            self.feed = FeedDecorator(decor)

        aggregate_limit = no_frames
        args = limits, aggregateq, quality_checks, aggregate_limit, consumers, feedback, detector
        ack = self.feed.feed_data(no_frames, detector, detector_basic, detector_image, logger, sequence, *args)
        if ack == 1:
            bad_indexes = {}

            aggregate = aggregateq.get()

            if report_file is not None:
                report.report_results(logger, aggregate, None, report_file, report_type)
            report.add_bad_indexes(aggregate, bad_indexes)

            return bad_indexes

Example #3

0

Show file

File: data.py Project: decarlof/data-quality

def verify_file_ge(logger, file, limits, quality_checks, report_type,
                   report_dir, consumers):
    """
    This method handles verification of data in a ge file type.
    This method creates and starts a new handler process. The handler is initialized with data queue,
    the data type, which is 'data', and a result queue.
    After starting the process the function enqueues queue slice by slice into data, until all data is
    queued. The last enqueued element is end of the data marker.

    Parameters
    ----------
    logger: Logger
        Logger instance.

    file : str
        a filename including path that will be verified

    limits : dict
        a dictionary of limits values

    quality_checks : dict
        a dictinary specifying quality checks structure that will be applied to verify the data file

    report_type : int
        report type, currently supporting 'none', 'errors', and 'full'

    report_dir : str
        a directory where report files will be located

    consumers : dict
        a dictionary containing consumer processes to run, and their parameters

    Returns
    -------
    bad_indexes : dict
        a dictionary of bad indexes per data type

    """
    type = 'data'

    fp, nframes, fsize = utils.get_data_ge(logger, file)
    # data file is corrupted, error message is logged
    if fp is None:
        return None

    dataq = Queue()
    aggregateq = Queue()

    p = Process(target=handler.handle_data,
                args=(dataq, limits, aggregateq, quality_checks, consumers))
    p.start()

    for i in range(0, nframes):
        img = np.fromfile(fp, 'uint16', fsize)
        dataq.put(Data(const.DATA_STATUS_DATA, img, type))
        time.sleep(.2)
    dataq.put(Data(const.DATA_STATUS_END))

    # receive the results
    bad_indexes = {}
    aggregate = aggregateq.get()
    report.add_bad_indexes(aggregate, bad_indexes)

    if report_type != const.REPORT_NONE:
        if report_dir is None:
            report_file = file + '.report'
        else:
            file_path = file.rsplit("/", )
            report_file = report_dir + "/" + file_path[len(file_path) -
                                                       1] + '.report'

        report.report_results(logger, aggregate, file, report_file,
                              report_type)

    logger.info('data verifier evaluated ' + file + ' file')

    return bad_indexes

Example #4

0

Show file

File: data.py Project: decarlof/data-quality

def verify_file_hdf(logger, file, data_tags, limits, quality_checks,
                    report_type, report_dir, consumers):
    """
    This method handles verification of data in hdf type file.

    This method creates and starts a new handler process. The handler is initialized with data queue,
    the data type, and a result queue. The data type can be 'data_dark', 'data_white' or 'data'.
    After starting the process the function enqueues queue slice by slice into data, until all data is
    queued. The last enqueued element is end of the data marker.

    Parameters
    ----------
    logger: Logger
        Logger instance.

    file : str
        a filename including path that will be verified

    data_tags : dict
        a dictionary od data_type/hdf tag

    limits : dict
        a dictionary of limits values

    quality_checks : dict
        a dictinary specifying quality checks structure that will be applied to verify the data file

    report_type : int
        report type, currently supporting 'none', 'errors', and 'full'

    report_dir : str
        a directory where report files will be located

    consumers : dict
        a dictionary containing consumer processes to run, and their parameters

    Returns
    -------
    bad_indexes : dict
        a dictionary of bad indexes per data type

    """
    def process_data(data_type):
        data_tag = data_tags[data_type]
        dt = fp[data_tag]
        for i in range(0, dt.shape[0]):
            data = Data(const.DATA_STATUS_DATA, dt[i], data_type)
            dataq.put(data)
            # add delay to slow down flow up, so the flow down (results)
            # are handled in synch
            time.sleep(.1)

    fp, tags = utils.get_data_hdf(file)
    dataq = Queue()
    aggregateq = Queue()

    p = Process(target=handler.handle_data,
                args=(dataq, limits, aggregateq, quality_checks, consumers))
    p.start()

    # assume a fixed order of data types; this will determine indexes on the data
    if 'data_dark' in data_tags:
        process_data('data_dark')
    if 'data_white' in data_tags:
        process_data('data_white')
    if 'data' in data_tags:
        process_data('data')

    dataq.put(Data(const.DATA_STATUS_END))

    if report_type != const.REPORT_NONE:
        if report_dir is None:
            report_file = file.rsplit(".", )[0] + '.report'
        else:
            file = file.rsplit(".", )[0]
            file_path = file.rsplit("/", )
            report_file = report_dir + "/" + file_path[len(file_path -
                                                           1)] + '.report'

    # receive the results
    bad_indexes = {}
    aggregate = aggregateq.get()

    if report_file is not None:
        report.report_results(logger, aggregate, None, report_file,
                              report_type)
    report.add_bad_indexes(aggregate, bad_indexes)

    logger.info('data verifier evaluated ' + file + ' file')
    return bad_indexes

Example #5

0

Show file

File: accumulator.py Project: AdvancedPhotonSource/data-quality

def verify(conf, folder, data_type, num_files, report_by_files=True):
    """
    This function discovers new files and evaluates data in the files.

    This is the main function called when the verifier application starts.
    It reads the configuration for the directory to monitor, for pattern
    that represents a file extension to look for, and for a number of
    files that are expected for the experiment. The number of files
    configuration parameter is added for experiments that generate
    multiple files. In some cases the experiment data is collected
    into a single file, which is organized with data sets.

    The function calls directory function that sets up the monitoring
    and returns notifier. After the monitoring is initialized, it starts
    a loop that reads the global "*files*" queue and then the global
    "*results*" queue. If there is any new file, the file is removed
    from the queue, and the data in the file is validated by a sequence
    of validation methods. If there is any new result, the result is
    removed from the queue, corresponding process is terminated, and
    the result is presented. (currently printed on console, later will
    be pushed into an EPICS process variable)

    The loop is interrupted when all expected processes produced results.
    The number of expected processes is determined by number of files and
    number of validation functions.

    Parameters
    ----------
    conf : str
        configuration file name, including path

    folder : str
        monitored directory

    data_type : str
        defines which data type is being evaluated

    num_files : int
        number of files that will be processed

    report_by_files : boolean
        this variable directs how to present the bad indexes in a report. If True, the indexes
        are related to the files, and a filename is included in the report. Otherwise, the
        report contains a list of bad indexes.

    Returns
    -------
    bad_indexes : dict
        a dictionary or list containing bad indexes

    """
    logger, limits, quality_checks, extensions, report_type, consumers = init(
        conf)
    if not os.path.isdir(folder):
        logger.error('parameter error: directory ' + folder +
                     ' does not exist')
        sys.exit(-1)

    notifier = directory(folder, extensions)

    interrupted = False
    file_list = []
    offset_list = []
    dataq = Queue()
    aggregateq = Queue()
    p = Process(target=datahandler.handle_data,
                args=(dataq, limits, aggregateq, quality_checks, consumers))
    p.start()

    file_index = 0
    slice_index = 0
    while not interrupted:
        # The notifier will put a new file into a newFiles queue if one was
        # detected
        notifier.process_events()
        if notifier.check_events():
            notifier.read_events()

        # checking the newFiles queue for new entries and starting verification
        # processes for each new file
        while not files.empty():
            file = files.get()
            if file.find('INTERRUPT') >= 0:
                # the calling function may use a 'interrupt' command to stop the monitoring
                # and processing.
                dataq.put(Data(const.DATA_STATUS_END))
                notifier.stop()
                interrupted = True
                break
            else:
                if file_index == 0:
                    report_file = file.rsplit(".", )[0] + '.report'
                fp, tags = utils.get_data_hdf(file)
                data_tag = tags['/exchange/' + data_type]
                data = np.asarray(fp[data_tag])
                slice_index += data.shape[0]
                file_list.append(file)
                offset_list.append(slice_index)
                for i in range(0, data.shape[0]):
                    dataq.put(Data(const.DATA_STATUS_DATA, data[i], data_type))
                file_index += 1
                if file_index == num_files:
                    dataq.put(Data(const.DATA_STATUS_END))
                    notifier.stop()
                    interrupted = True
                    break

    aggregate = aggregateq.get()

    #report.report_results(logger, aggregate, data_type, None, report_file, report_type)

    bad_indexes = {}
    if report_by_files == 'True':
        report.add_bad_indexes_per_file(aggregate, bad_indexes, file_list,
                                        offset_list)
    else:
        report.add_bad_indexes(aggregate, bad_indexes)
    try:
        report_file = open(report_file, 'w')
        report.report_bad_indexes(bad_indexes, report_file)
    except:
        logger.warning('Cannot open report file')

    return bad_indexes

Example #6

0

Show file

File: accumulator.py Project: decarlof/data-quality

def verify(conf, folder, data_type, num_files, report_by_files=True):
    """
    This function discovers new files and evaluates data in the files.

    This is the main function called when the verifier application starts.
    It reads the configuration for the directory to monitor, for pattern
    that represents a file extension to look for, and for a number of
    files that are expected for the experiment. The number of files
    configuration parameter is added for experiments that generate
    multiple files. In some cases the experiment data is collected
    into a single file, which is organized with data sets.

    The function calls directory function that sets up the monitoring
    and returns notifier. After the monitoring is initialized, it starts
    a loop that reads the global "*files*" queue and then the global
    "*results*" queue. If there is any new file, the file is removed
    from the queue, and the data in the file is validated by a sequence
    of validation methods. If there is any new result, the result is
    removed from the queue, corresponding process is terminated, and
    the result is presented. (currently printed on console, later will
    be pushed into an EPICS process variable)

    The loop is interrupted when all expected processes produced results.
    The number of expected processes is determined by number of files and
    number of validation functions.

    Parameters
    ----------
    conf : str
        configuration file name, including path

    folder : str
        monitored directory

    data_type : str
        defines which data type is being evaluated

    num_files : int
        number of files that will be processed

    report_by_files : boolean
        this variable directs how to present the bad indexes in a report. If True, the indexes
        are related to the files, and a filename is included in the report. Otherwise, the
        report contains a list of bad indexes.

    Returns
    -------
    bad_indexes : dict
        a dictionary or list containing bad indexes

    """
    logger, limits, quality_checks, extensions, report_type, consumers = init(conf)
    if not os.path.isdir(folder):
        logger.error(
            'parameter error: directory ' +
            folder + ' does not exist')
        sys.exit(-1)

    notifier = directory(folder, extensions)

    interrupted = False
    file_list = []
    offset_list = []
    dataq = Queue()
    aggregateq = Queue()
    p = Process(target=datahandler.handle_data, args=(dataq, limits, aggregateq, quality_checks, consumers))
    p.start()

    file_index = 0
    slice_index = 0
    while not interrupted:
        # The notifier will put a new file into a newFiles queue if one was
        # detected
        notifier.process_events()
        if notifier.check_events():
            notifier.read_events()

        # checking the newFiles queue for new entries and starting verification
        # processes for each new file
        while not files.empty():
            file = files.get()
            if file.find('INTERRUPT') >= 0:
                # the calling function may use a 'interrupt' command to stop the monitoring
                # and processing.
                dataq.put(Data(const.DATA_STATUS_END))
                notifier.stop()
                interrupted = True
                break
            else:
                if file_index == 0:
                    report_file = file.rsplit(".",)[0] + '.report'
                fp, tags = utils.get_data_hdf(file)
                data_tag = tags['/exchange/'+data_type]
                data = np.asarray(fp[data_tag])
                slice_index += data.shape[0]
                file_list.append(file)
                offset_list.append(slice_index)
                for i in range(0, data.shape[0]):
                    dataq.put(Data(const.DATA_STATUS_DATA, data[i], data_type))
                file_index += 1
                if file_index == num_files:
                    dataq.put(Data(const.DATA_STATUS_END))
                    notifier.stop()
                    interrupted = True
                    break

    aggregate = aggregateq.get()

    #report.report_results(logger, aggregate, data_type, None, report_file, report_type)

    bad_indexes = {}
    if report_by_files == 'True':
        report.add_bad_indexes_per_file(aggregate, bad_indexes, file_list, offset_list)
    else:
        report.add_bad_indexes(aggregate, bad_indexes)
    try:
        report_file = open(report_file, 'w')
        report.report_bad_indexes(bad_indexes, report_file)
    except:
        logger.warning('Cannot open report file')


    return bad_indexes

Example #7

0

Show file

File: data.py Project: bfrosik/data-quality

def verify_file_ge(logger, file, limits, quality_checks, report_type, report_dir):
    """
    This method handles verification of ge file type.
    This method creates and starts a new handler process. The handler is initialized with data queue,
    the data type, which is 'data', and a result queue.
    After starting the process the function enqueues queue slice by slice into data, until all data is
    queued. The last enqueued element is end of the data marker.

    Parameters
    ----------
    logger: Logger
        Logger instance.

    file : str
        a filename including path that will be verified

    data_tags : dict
        a dictionary od data_type/hdf tag

    limits : dict
        a dictionary of limits values

    quality_checks : dict
        a dictinary specifying quality checks structure that will be applied to verify the data file

    report_type : int
        report type, currently supporting 'none', 'errors', and 'full'

    report_dir : str
        a directory where report files will be located

    Returns
    -------
    bad_indexes : dict
        a dictionary of bad indexes per data type

    """
    type = 'data'

    fp, nframes, fsize = utils.get_data_ge(logger, file)
    # data file is corrupted, error message is logged
    if fp is None:
        return None

    dataq = Queue()
    aggregateq = Queue()

    p = Process(target=handler.handle_data, args=(dataq, limits['data'], aggregateq, quality_checks))
    p.start()

    for i in range(0,nframes):
        img = np.fromfile(fp,'uint16', fsize)
        dataq.put(Data(img))
    dataq.put('all_data')

    bad_indexes = {}
    aggregate = aggregateq.get()
    report.add_bad_indexes(aggregate, type, bad_indexes)

    if report_type != const.REPORT_NONE:
        if report_dir is None:
            report_file = file + '.report'
        else:
            file_path = file.rsplit("/",)
            report_file = report_dir + "/" + file_path[len(file_path)-1]+ '.report'

        report.report_results(logger, aggregate, type, file, report_file, report_type)

    logger.info('data verifier evaluated ' + file + ' file')

    return bad_indexes

Example #8

0

Show file

File: data.py Project: bfrosik/data-quality

def verify_file_hdf(logger, file, data_tags, limits, quality_checks, report_type, report_dir):
    """
    This method handles verification of hdf type file.
    This method creates and starts a new handler process. The handler is initialized with data queue,
    the data type, and a result queue. The data type can be 'data_dark', 'data_white' or 'data'.
    After starting the process the function enqueues queue slice by slice into data, until all data is
    queued. The last enqueued element is end of the data marker.

    Parameters
    ----------
    logger: Logger
        Logger instance.

    file : str
        a filename including path that will be verified

    data_tags : dict
        a dictionary od data_type/hdf tag

    limits : dict
        a dictionary of limits values

    quality_checks : dict
        a dictinary specifying quality checks structure that will be applied to verify the data file

    report_type : int
        report type, currently supporting 'none', 'errors', and 'full'

    report_dir : str
        a directory where report files will be located

    Returns
    -------
    bad_indexes : dict
        a dictionary of bad indexes per data type

    """
    fp, tags = utils.get_data_hdf(file)

    queues = {}
    bad_indexes = {}

    for type in data_tags.keys():
        data_tag = data_tags[type]
        if data_tag in tags:
            queue = Queue()
            queues[type] = queue
            process_data(type, queue, fp, data_tag, limits, quality_checks)

    if report_type != const.REPORT_NONE:
        if report_dir is None:
            report_file = file.rsplit(".",)[0] + '.report'
        else:
            file = file.rsplit(".",)[0]
            file_path = file.rsplit("/",)
            report_file = report_dir + "/" + file_path[len(file_path-1)]+ '.report'

    # receive the results
    for type in queues.keys():
        queue = queues[type]
        aggregate = queue.get()
        report.add_bad_indexes(aggregate, type, bad_indexes)
        if report_file is not None:
            report.report_results(logger, aggregate, type, file, report_file, report_type)

    logger.info('data verifier evaluated ' + file + ' file')

    return bad_indexes

Example #9

0

Show file

File: real_time_pv.py Project: AdvancedPhotonSource/data-quality

    def verify(self, config, report_file=None, sequence=None):
        """
        This function starts real time verification process according to the given configuration.

        This function reads configuration and initiates variables accordingly.
        It creates a Feed instance and starts data_feed and waits to receive results in aggregateq.
        The results are then written into a report file.

        Parameters
        ----------
        conf : str
            configuration file name, including path

        report_file : file
            a file where the report will be written, defaulted to None, if no report wanted

        sequence : list or int
            information about data sequence or number of frames

        Returns
        -------
        boolean

        """
        feed_args, feed_kwargs, feedback, decor_map, logger, report_type = self.init(
            config)

        # init the pv feedback
        if not feedback is None:
            feedbackq = Queue()
            feedback_pvs = utils.get_feedback_pvs(feed_args[1])
            fb_args = {
                'feedback_pvs': feedback_pvs,
                'detector': feed_kwargs['detector']
            }
            feedback_obj = fb.Feedback(feedbackq, feedback, **fb_args)
            # put the logger to args
            if const.FEEDBACK_LOG in feedback:
                feedback_obj.set_logger(logger)
            feed_kwargs['feedbackq'] = feedbackq

            self.p = Process(target=feedback_obj.deliver, args=())
            self.p.start()

        reportq = Queue()

        # address the special cases of quality checks when additional arguments are required
        if decor_map is None:
            self.feed = Feed()
        else:
            self.feed = FeedDecorator(decor_map)

        ack = self.feed.feed_data(logger, reportq, *feed_args, **feed_kwargs)
        if ack == 1:
            bad_indexes = {}
            aggregate = reportq.get()

            if report_file is not None:
                report.report_results(logger, aggregate, None, report_file,
                                      report_type)
            report.add_bad_indexes(aggregate, bad_indexes)

            return bad_indexes

Example #10

0

Show file

File: data.py Project: decarlof/data-quality

def verify_file_hdf(logger, file, data_tags, limits, quality_checks, report_type, report_dir, consumers):
    """
    This method handles verification of data in hdf type file.

    This method creates and starts a new handler process. The handler is initialized with data queue,
    the data type, and a result queue. The data type can be 'data_dark', 'data_white' or 'data'.
    After starting the process the function enqueues queue slice by slice into data, until all data is
    queued. The last enqueued element is end of the data marker.

    Parameters
    ----------
    logger: Logger
        Logger instance.

    file : str
        a filename including path that will be verified

    data_tags : dict
        a dictionary od data_type/hdf tag

    limits : dict
        a dictionary of limits values

    quality_checks : dict
        a dictinary specifying quality checks structure that will be applied to verify the data file

    report_type : int
        report type, currently supporting 'none', 'errors', and 'full'

    report_dir : str
        a directory where report files will be located

    consumers : dict
        a dictionary containing consumer processes to run, and their parameters

    Returns
    -------
    bad_indexes : dict
        a dictionary of bad indexes per data type

    """
    def process_data(data_type):
        data_tag = data_tags[data_type]
        dt = fp[data_tag]
        for i in range(0,dt.shape[0]):
            data = Data(const.DATA_STATUS_DATA, dt[i], data_type)
            dataq.put(data)
            # add delay to slow down flow up, so the flow down (results)
            # are handled in synch
            time.sleep(.1)

    fp, tags = utils.get_data_hdf(file)
    dataq = Queue()
    aggregateq = Queue()

    p = Process(target=handler.handle_data, args=(dataq, limits, aggregateq, quality_checks, consumers))
    p.start()

    # assume a fixed order of data types; this will determine indexes on the data
    if 'data_dark' in data_tags:
        process_data('data_dark')
    if 'data_white' in data_tags:
        process_data('data_white')
    if 'data' in data_tags:
        process_data('data')

    dataq.put(Data(const.DATA_STATUS_END))


    if report_type != const.REPORT_NONE:
        if report_dir is None:
            report_file = file.rsplit(".",)[0] + '.report'
        else:
            file = file.rsplit(".",)[0]
            file_path = file.rsplit("/",)
            report_file = report_dir + "/" + file_path[len(file_path-1)]+ '.report'

    # receive the results
    bad_indexes = {}
    aggregate = aggregateq.get()

    if report_file is not None:
        report.report_results(logger, aggregate, None, report_file, report_type)
    report.add_bad_indexes(aggregate, bad_indexes)

    logger.info('data verifier evaluated ' + file + ' file')
    return bad_indexes