def test_event_descriptor_insertion():
    # format some data keys for insertion
    data_keys = {'some_value': {'source': 'PV:pv1',
                                'shape': [1, 2],
                                'dtype': 'array'},
                 'some_other_val': {'source': 'PV:pv2',
                                    'shape': [],
                                    'dtype': 'number'},
                 'data_key3': {'source': 'PV:pv1',
                               'shape': [],
                               'dtype': 'number',
                               'external': 'FS:foobar'}}
    time = ttime.time()
    # test insert
    ev_desc_uid = mdsc.insert_descriptor(run_start_uid, data_keys, time,
                                         str(uuid.uuid4()))
    ev_desc_mds, = mdsc.find_descriptors(uid=ev_desc_uid)
    # make sure the sanitized event descriptor has no uid
    check_for_id(ev_desc_mds)

    # make sure the event descriptor is pointing to the correct run start
    referenced_run_start = ev_desc_mds['run_start']
    assert referenced_run_start.uid == run_start_uid
    assert ev_desc_mds['time'] == time

    for k in data_keys:
        for ik in data_keys[k]:
            assert ev_desc_mds.data_keys[k][ik] == data_keys[k][ik]
Exemple #2
0
def test_event_descriptor_insertion():
    # format some data keys for insertion
    data_keys = {'some_value': {'source': 'PV:pv1',
                                'shape': [1, 2],
                                'dtype': 'array'},
                 'some_other_val': {'source': 'PV:pv2',
                                    'shape': [],
                                    'dtype': 'number'},
                 'data_key3': {'source': 'PV:pv1',
                               'shape': [],
                               'dtype': 'number',
                               'external': 'FS:foobar'}}
    time = ttime.time()
    # test insert
    ev_desc_uid = mdsc.insert_descriptor(run_start_uid, data_keys, time,
                                         str(uuid.uuid4()))
    ev_desc_mds, = mdsc.find_descriptors(uid=ev_desc_uid)
    # make sure the sanitized event descriptor has no uid
    check_for_id(ev_desc_mds)

    # make sure the event descriptor is pointing to the correct run start
    referenced_run_start = ev_desc_mds['run_start']
    assert_equal(referenced_run_start.uid, run_start_uid)
    assert_equal(ev_desc_mds['time'], time)

    for k in data_keys:
        for ik in data_keys[k]:
            assert_equal(ev_desc_mds.data_keys[k][ik],
                         data_keys[k][ik])
def get_events(headers, fields=None, fill=True):
    """
    Get Events from given run(s).

    Parameters
    ----------
    headers : Header or iterable of Headers
        The headers to fetch the events for
    fields : list, optional
        whitelist of field names of interest; if None, all are returned
    fill : bool, optional
        Whether externally-stored data should be filled in. Defaults to True

    Yields
    ------
    event : Event
        The event, optionally with non-scalar data filled in
    """
    # A word about the 'fields' argument:
    # Notice that we assume that the same field name cannot occur in
    # more than one descriptor. We could relax this assumption, but
    # we current enforce it in bluesky, so it is safe for now.
    try:
        headers.items()
    except AttributeError:
        pass
    else:
        headers = [headers]

    if fields is None:
        fields = []
    fields = set(fields)

    for header in headers:
        descriptors = find_descriptors(header['start']['uid'])
        for descriptor in descriptors:
            all_fields = set(descriptor['data_keys'])
            if fields:
                discard_fields = all_fields - fields
            else:
                discard_fields = []
            if discard_fields == all_fields:
                continue
            for event in get_events_generator(descriptor):
                for field in discard_fields:
                    del event.data[field]
                    del event.timestamps[field]
                if fill:
                    fill_event(event)
                yield event
def test_find_events_smoke():

    num = 50
    rs, e_desc, data_keys = setup_syn()
    all_data = syn_data(data_keys, num)

    mdsc.bulk_insert_events(e_desc, all_data, validate=False)
    mdsc.insert_run_stop(rs, ttime.time(), uid=str(uuid.uuid4()))
    mdsc.clear_process_cache()

    # make sure the uid works
    next(mdsc.find_events(descriptor=e_desc))

    mdsc.clear_process_cache()
    descriptor, = mdsc.find_descriptors(uid=e_desc)

    mdsc.clear_process_cache()
    # make sure that searching by descriptor document works
    next(mdsc.find_events(descriptor=descriptor))
Exemple #5
0
def test_find_events_smoke():

    num = 50
    rs, e_desc, data_keys = setup_syn()
    all_data = syn_data(data_keys, num)

    mdsc.bulk_insert_events(e_desc, all_data, validate=False)
    mdsc.insert_run_stop(rs, ttime.time(), uid=str(uuid.uuid4()))
    mdsc.clear_process_cache()
    
    # make sure the uid works
    next(mdsc.find_events(descriptor=e_desc))
    
    mdsc.clear_process_cache()
    descriptor, = mdsc.find_descriptors(uid=e_desc)
    
    mdsc.clear_process_cache()
    # make sure that searching by descriptor document works
    next(mdsc.find_events(descriptor=descriptor))
def get_table(headers, fields=None, fill=True, convert_times=True):
    """
    Make a table (pandas.DataFrame) from given run(s).

    Parameters
    ----------
    headers : Header or iterable of Headers
        The headers to fetch the events for
    fields : list, optional
        whitelist of field names of interest; if None, all are returned
    fill : bool, optional
        Whether externally-stored data should be filled in. Defaults to True
    convert_times : bool, optional
        Whether to convert times from float (seconds since 1970) to
        numpy datetime64, using pandas. True by default.

    Returns
    -------
    table : pandas.DataFrame
    """
    # A word about the 'fields' argument:
    # Notice that we assume that the same field name cannot occur in
    # more than one descriptor. We could relax this assumption, but
    # we current enforce it in bluesky, so it is safe for now.
    try:
        headers.items()
    except AttributeError:
        pass
    else:
        headers = [headers]

    if fields is None:
        fields = []
    fields = set(fields)

    dfs = []
    for header in headers:
        descriptors = find_descriptors(header['start']['uid'])
        for descriptor in descriptors:
            all_fields = set(descriptor['data_keys'])
            if fields:
                discard_fields = all_fields - fields
            else:
                discard_fields = []
            if discard_fields == all_fields:
                continue
            is_external = _inspect_descriptor(descriptor)

            payload = get_events_table(descriptor)
            descriptor, data, seq_nums, times, uids, timestamps = payload
            df = pd.DataFrame(index=seq_nums)
            if convert_times:
                times = pd.to_datetime(
                    pd.Series(times), unit='s', utc=True).dt.tz_localize(TZ)
            df['time'] = times
            for field, values in six.iteritems(data):
                if field in discard_fields:
                    logger.debug('Discarding field %s', field)
                    continue
                if is_external[field] and fill:
                    logger.debug('filling data for %s', field)
                    # TODO someday we will have bulk retrieve in FS
                    values = [fs.retrieve(value) for value in values]
                df[field] = values
            dfs.append(df)
    if dfs:
        return pd.concat(dfs)
    else:
        # edge case: no data
        return pd.DataFrame()
    def __call__(self, **kwargs):
        """Given search criteria, find Headers describing runs.

        This function returns a list of dictionary-like objects encapsulating
        the metadata for a run -- start time, instruments used, and so on.
        In addition to the Parameters below, advanced users can specifiy
        arbitrary queries that are passed through to mongodb.

        Parameters
        ----------
        start_time : time-like, optional
            Include Headers for runs started after this time. Valid
            "time-like" representations are:
                - float timestamps (seconds since 1970), such as time.time()
                - '2015'
                - '2015-01'
                - '2015-01-30'
                - '2015-03-30 03:00:00'
                - Python datetime objects, such as datetime.datetime.now()
        stop_time: time-like, optional
            Include Headers for runs started before this time. See
            `start_time` above for examples.
        beamline_id : str, optional
            String identifier for a specific beamline
        project : str, optional
            Project name
        owner : str, optional
            The username of the logged-in user when the scan was performed
        scan_id : int, optional
            Integer scan identifier
        uid : str, optional
            Globally unique id string provided to metadatastore
        data_key : str, optional
            The alias (e.g., 'motor1') or PV identifier of data source

        Returns
        -------
        data : list
            Header objects

        Examples
        --------
        >>> DataBroker(start_time='2015-03-05', stop_time='2015-03-10')
        >>> DataBroker(data_key='motor1')
        >>> DataBroker(data_key='motor1', start_time='2015-03-05')
        """
        data_key = kwargs.pop('data_key', None)
        run_start = find_run_starts(**kwargs)
        if data_key is not None:
            node_name = 'data_keys.{0}'.format(data_key)

            query = {node_name: {'$exists': True}}
            descriptors = []
            for rs in run_start:
                descriptor = find_descriptors(run_start=rs, **query)
                for d in descriptor:
                    descriptors.append(d)
            # query = {node_name: {'$exists': True},
            #          'run_start_id': {'$in': [ObjectId(rs.id) for rs in run_start]}}
            # descriptors = find_descriptors(**query)
            result = []
            known_uids = deque()
            for descriptor in descriptors:
                if descriptor['run_start']['uid'] not in known_uids:
                    rs = descriptor['run_start']
                    known_uids.append(rs['uid'])
                    result.append(rs)
            run_start = result
        result = []
        for rs in run_start:
            result.append(Header.from_run_start(rs))
        return result