def test_bulk_table(): num = 50 rs, e_desc, data_keys = setup_syn() all_data = syn_data(data_keys, num) mdsc.bulk_insert_events(e_desc, all_data, validate=False) mdsc.insert_run_stop(rs, ttime.time(), uid=str(uuid.uuid4())) ret = mdsc.get_events_table(e_desc) descriptor, data_table, seq_nums, times, uids, timestamps_table = ret for vals in data_table.values(): assert_true(all(s == v for s, v in zip(seq_nums, vals)))
def test_bulk_table(): num = 50 rs, e_desc, data_keys = setup_syn() all_data = syn_data(data_keys, num) mdsc.bulk_insert_events(e_desc, all_data, validate=False) mdsc.insert_run_stop(rs, ttime.time(), uid=str(uuid.uuid4())) ret = mdsc.get_events_table(e_desc) descriptor, data_table, seq_nums, times, uids, timestamps_table = ret for vals in data_table.values(): assert all(s == v for s, v in zip(seq_nums, vals))
def get_table(headers, fields=None, fill=True, convert_times=True): """ Make a table (pandas.DataFrame) from given run(s). Parameters ---------- headers : Header or iterable of Headers The headers to fetch the events for fields : list, optional whitelist of field names of interest; if None, all are returned fill : bool, optional Whether externally-stored data should be filled in. Defaults to True convert_times : bool, optional Whether to convert times from float (seconds since 1970) to numpy datetime64, using pandas. True by default. Returns ------- table : pandas.DataFrame """ # A word about the 'fields' argument: # Notice that we assume that the same field name cannot occur in # more than one descriptor. We could relax this assumption, but # we current enforce it in bluesky, so it is safe for now. try: headers.items() except AttributeError: pass else: headers = [headers] if fields is None: fields = [] fields = set(fields) dfs = [] for header in headers: descriptors = find_descriptors(header['start']['uid']) for descriptor in descriptors: all_fields = set(descriptor['data_keys']) if fields: discard_fields = all_fields - fields else: discard_fields = [] if discard_fields == all_fields: continue is_external = _inspect_descriptor(descriptor) payload = get_events_table(descriptor) descriptor, data, seq_nums, times, uids, timestamps = payload df = pd.DataFrame(index=seq_nums) if convert_times: times = pd.to_datetime( pd.Series(times), unit='s', utc=True).dt.tz_localize(TZ) df['time'] = times for field, values in six.iteritems(data): if field in discard_fields: logger.debug('Discarding field %s', field) continue if is_external[field] and fill: logger.debug('filling data for %s', field) # TODO someday we will have bulk retrieve in FS values = [fs.retrieve(value) for value in values] df[field] = values dfs.append(df) if dfs: return pd.concat(dfs) else: # edge case: no data return pd.DataFrame()