def test_bulk_events_to_event_page(): run_bundle = event_model.compose_run() desc_bundle = run_bundle.compose_descriptor( data_keys={'motor': {'shape': [], 'dtype': 'number', 'source': '...'}, 'image': {'shape': [512, 512], 'dtype': 'number', 'source': '...', 'external': 'FILESTORE:'}}, name='primary') desc_bundle_baseline = run_bundle.compose_descriptor( data_keys={'motor': {'shape': [], 'dtype': 'number', 'source': '...'}}, name='baseline') res_bundle = run_bundle.compose_resource( spec='TIFF', root='/tmp', resource_path='stack.tiff', resource_kwargs={}) datum_doc1 = res_bundle.compose_datum(datum_kwargs={'slice': 5}) datum_doc2 = res_bundle.compose_datum(datum_kwargs={'slice': 10}) event1 = desc_bundle.compose_event( data={'motor': 0, 'image': datum_doc1['datum_id']}, timestamps={'motor': 0, 'image': 0}, filled={'image': False}, seq_num=1) event2 = desc_bundle.compose_event( data={'motor': 0, 'image': datum_doc2['datum_id']}, timestamps={'motor': 0, 'image': 0}, filled={'image': False}, seq_num=2) event3 = desc_bundle_baseline.compose_event( data={'motor': 0}, timestamps={'motor': 0}, seq_num=1) primary_event_page = event_model.pack_event_page(event1, event2) baseline_event_page = event_model.pack_event_page(event3) bulk_events = {'primary': [event1, event2], 'baseline': [event3]} pages = event_model.bulk_events_to_event_pages(bulk_events) assert tuple(pages) == (primary_event_page, baseline_event_page)
def bulk_to_pages(name, doc): """ Converts bulk_events/datum to event/datum_page. """ key_map = {'bulk_events': 'event_page', 'bulk_datum': 'datum_page'} if name == 'bulk_events': doc = event_model.bulk_events_to_event_pages(doc) elif name == 'bulk_datum': doc = event_model.bulk_datum_to_datum_pages(doc) page_list = [[key_map[name], item] for item in doc] return page_list
def export(gen, filepath, **kwargs): """ Export a stream of documents to CSV file(s) and one JSON file of metadata. Creates {filepath}_meta.json and then {filepath}_{stream_name}.csv for every Event stream. The structure of the json is:: {'start': {...}, 'descriptors': {'<stream_name>': [{...}, {...}, ...], ...}, 'stop': {...}} Parameters ---------- gen : generator expected to yield (name, document) pairs filepath : str the filepath and filename suffix to use in the output files. **kwargs : kwargs kwargs to be passed to pandas.Dataframe.to_csv. Returns ------- dest : tuple filepaths of generated files """ meta = {} # to be exported as JSON at the end meta['descriptors'] = defaultdict(list) # map stream_name to descriptors files = {} # map descriptor uid to file handle of CSV file desc_counters = defaultdict(itertools.count) has_header = set() # a set of uids indicating if the file has a header kwargs.setdefault('header', True) initial_header_kwarg = kwargs['header'] # used later to set the headers kwargs.setdefault('index_label', 'time') kwargs.setdefault('mode', 'a') try: for name, doc in gen: if name == 'start': if 'start' in meta: raise RuntimeError("This exporter expects documents from " "one run only.") meta['start'] = doc elif name == 'stop': meta['stop'] = doc elif name == 'descriptor': stream_name = doc.get('name') meta['descriptors'][stream_name].append(doc) filepath_ = (f"{filepath}_{stream_name}_" f"{next(desc_counters[doc['uid']])}.csv") files[doc['uid']] = open(filepath_, 'w+') elif (name == 'event' or name == 'bulk_event' or name == 'event_page'): if name == 'event': # convert event to an event_pages list event_pages = [event_model.pack_event_page(doc)] elif name == 'bulk_event': # convert bulk_event to event_pages event_pages = event_model.bulk_events_to_event_pages(doc) else: # convert an event_page to an event_pages list. event_pages = [doc] for event_page in event_pages: if not all(event_page['filled'].values()): # check that all event_page data is filled unfilled_data = [] for field in event_page['filled']: if not event_page['filled'][field]: unfilled_data.append(field) # Note: As of this writing, this is a slightly # aspirational error message, as event_model.Filler has # not been merged yet. May need to be revisited if it # is renamed or kept elsewhere in the end. raise UnfilledData('unfilled data found in' '{}. Try passing the parameter ' '"gen" through "event_model.Filler"' ' first'.format(unfilled_data)) else: event_data = pandas.DataFrame(event_page['data'], index=event_page['time']) event_data['seq_num'] = event_page['seq_num'] if initial_header_kwarg: kwargs['header'] = event_page['descriptor'] \ not in has_header event_data.to_csv(files[event_page['descriptor']], **kwargs) has_header.add(event_page['descriptor']) finally: for f in files.values(): f.close() with open(f"{filepath}_meta.json", 'w') as f: json.dump(meta, f) return (f.name, ) + tuple(f.name for f in files.values())