コード例 #1
0
def test_bulk_table():
    num = 50
    rs, e_desc, data_keys = setup_syn()
    all_data = syn_data(data_keys, num)

    mdsc.bulk_insert_events(e_desc, all_data, validate=False)
    mdsc.insert_run_stop(rs, ttime.time(), uid=str(uuid.uuid4()))
    ret = mdsc.get_events_table(e_desc)
    descriptor, data_table, seq_nums, times, uids, timestamps_table = ret

    for vals in data_table.values():
        assert_true(all(s == v for s, v in zip(seq_nums, vals)))
コード例 #2
0
ファイル: test_commands.py プロジェクト: licode/metadatastore
def test_bulk_table():
    num = 50
    rs, e_desc, data_keys = setup_syn()
    all_data = syn_data(data_keys, num)

    mdsc.bulk_insert_events(e_desc, all_data, validate=False)
    mdsc.insert_run_stop(rs, ttime.time(), uid=str(uuid.uuid4()))
    ret = mdsc.get_events_table(e_desc)
    descriptor, data_table, seq_nums, times, uids, timestamps_table = ret

    for vals in data_table.values():
        assert all(s == v for s, v in zip(seq_nums, vals))
コード例 #3
0
ファイル: databroker.py プロジェクト: danielballan/databroker
def get_table(headers, fields=None, fill=True, convert_times=True):
    """
    Make a table (pandas.DataFrame) from given run(s).

    Parameters
    ----------
    headers : Header or iterable of Headers
        The headers to fetch the events for
    fields : list, optional
        whitelist of field names of interest; if None, all are returned
    fill : bool, optional
        Whether externally-stored data should be filled in. Defaults to True
    convert_times : bool, optional
        Whether to convert times from float (seconds since 1970) to
        numpy datetime64, using pandas. True by default.

    Returns
    -------
    table : pandas.DataFrame
    """
    # A word about the 'fields' argument:
    # Notice that we assume that the same field name cannot occur in
    # more than one descriptor. We could relax this assumption, but
    # we current enforce it in bluesky, so it is safe for now.
    try:
        headers.items()
    except AttributeError:
        pass
    else:
        headers = [headers]

    if fields is None:
        fields = []
    fields = set(fields)

    dfs = []
    for header in headers:
        descriptors = find_descriptors(header['start']['uid'])
        for descriptor in descriptors:
            all_fields = set(descriptor['data_keys'])
            if fields:
                discard_fields = all_fields - fields
            else:
                discard_fields = []
            if discard_fields == all_fields:
                continue
            is_external = _inspect_descriptor(descriptor)

            payload = get_events_table(descriptor)
            descriptor, data, seq_nums, times, uids, timestamps = payload
            df = pd.DataFrame(index=seq_nums)
            if convert_times:
                times = pd.to_datetime(
                    pd.Series(times), unit='s', utc=True).dt.tz_localize(TZ)
            df['time'] = times
            for field, values in six.iteritems(data):
                if field in discard_fields:
                    logger.debug('Discarding field %s', field)
                    continue
                if is_external[field] and fill:
                    logger.debug('filling data for %s', field)
                    # TODO someday we will have bulk retrieve in FS
                    values = [fs.retrieve(value) for value in values]
                df[field] = values
            dfs.append(df)
    if dfs:
        return pd.concat(dfs)
    else:
        # edge case: no data
        return pd.DataFrame()