def test_mantid_convert_tof_to_direct_energy_transfer(): efixed = 1000 * sc.Unit('meV') in_ws = make_workspace('tof', emode='Direct', efixed=efixed) out_mantid = mantid_convert_units(in_ws, 'energy_transfer', emode='Direct', efixed=efixed) in_da = scn.mantid.from_mantid(in_ws) out_scipp = scn.convert(data=in_da, origin='tof', target='energy_transfer', scatter=True) # The conversion consists of multiplications and additions, thus the relative error # changes with the inputs. In this case, small tof yields a large error due to # the 1/tof**2 factor in the conversion. # rtol is chosen to account for linearly changing tof in the input data. assert sc.allclose( out_scipp.coords['energy_transfer'], out_mantid.coords['energy_transfer'], rtol=sc.linspace( 'energy_transfer', 1e-6, 1e-10, out_scipp.coords['energy_transfer'].sizes['energy_transfer'])) assert sc.identical(out_scipp.coords['spectrum'], out_mantid.coords['spectrum'])
def test_extract_energy_initial(): from mantid.simpleapi import mtd mtd.clear() ds = scn.load(scn.data.get_path("CNCS_51936_event.nxs"), mantid_args={"SpectrumMax": 1}) assert sc.identical(ds.coords["incident_energy"], sc.scalar(value=3.0, unit=sc.Unit("meV")))
def _extract_einitial(ws): if ws.run().hasProperty("Ei"): ei = ws.run().getProperty("Ei").value elif ws.run().hasProperty('EnergyRequest'): ei = ws.run().getProperty('EnergyRequest').value[-1] else: ei = 0 return sc.scalar(ei, unit=sc.Unit("meV"))
def test_beamline_compute_l1(in_ws, in_da): out_mantid = in_ws.detectorInfo().l1() * sc.Unit('m') in_da = scn.mantid.from_mantid(in_ws) out_scipp = scn.L1(in_da) assert sc.allclose(out_scipp, out_mantid, rtol=1e-15 * sc.units.one, atol=1e-15 * out_scipp.unit)
def test_field_properties(nexus_group: Tuple[Callable, LoadFromNexus]): resource, loader = nexus_group with resource(builder_with_events_monitor_and_log())() as f: field = nexus.NXroot(f, loader)['entry/events_0/event_time_offset'] assert field.dtype == np.array(1).dtype assert field.name == '/entry/events_0/event_time_offset' assert field.shape == (6, ) assert field.unit == sc.Unit('ns')
def __init__(self, stream_info: StreamInfo, buffer_size: int, data_queue: mp.Queue): self._buffer_mutex = threading.Lock() self._buffer_size = buffer_size self._name = stream_info.source_name self._data_queue = data_queue self._buffer_filled_size = 0 self._data_array = sc.zeros(dims=[self._name], shape=(buffer_size, ), unit=sc.Unit("nanoseconds"), dtype=np.int64)
def _create_metadata_buffer_array(name: str, unit: str, dtype: Any, buffer_size: int): return sc.DataArray(sc.zeros(dims=[name], shape=(buffer_size, ), unit=unit, dtype=dtype), coords={ "time": sc.zeros(dims=[name], shape=(buffer_size, ), unit=sc.Unit("nanoseconds"), dtype=np.dtype('datetime64[ns]')) })
def _get_unit(attributes: h5py.AttributeManager, transform_name: str) -> sc.Unit: try: unit_str = attributes["units"] except KeyError: raise TransformationError( f"Missing units for transformation at {transform_name}") try: unit = sc.Unit(unit_str) except RuntimeError: raise TransformationError(f"Unrecognised units '{unit_str}' for " f"transformation at {transform_name}") return unit
def test_stream_object_as_transformation_results_in_warning(): builder = NexusBuilder() builder.add_component(Source("source")) stream_path = "/entry/streamed_nxlog_transform" builder.add_stream(Stream(stream_path)) builder.add_dataset_at_path("/entry/source/depends_on", stream_path, {}) with pytest.warns(UserWarning): loaded_data, _ = _load_nexus_json(builder.json_string) # A 0 distance translation is used in place of the streamed transformation default = [0, 0, 0] assert np.allclose(loaded_data["source_position"].values, default) assert loaded_data["source_position"].unit == sc.Unit("m")
def make_variables_from_run_logs(ws): for property_name in ws.run().keys(): units_string = ws.run()[property_name].units try: unit = additional_unit_mapping.get(units_string, sc.Unit(units_string)) except RuntimeError: # TODO catch UnitError once exposed from C++ # Parsing unit string failed unit = None values = deepcopy(ws.run()[property_name].value) if units_string and unit is None: warnings.warn(f"Workspace run log '{property_name}' " f"has unrecognised units: '{units_string}'") if unit is None: unit = sc.units.one try: times = deepcopy(ws.run()[property_name].times) is_time_series = True dimension_label = "time" except AttributeError: times = None is_time_series = False dimension_label = property_name if np.isscalar(values): property_data = sc.scalar(values, unit=unit) else: property_data = sc.Variable(values=values, unit=unit, dims=[dimension_label]) if is_time_series: # If property has timestamps, create a DataArray data_array = sc.DataArray(data=property_data, coords={ dimension_label: sc.Variable(dims=[dimension_label], values=times) }) yield property_name, sc.scalar(data_array) elif not np.isscalar(values): # If property is multi-valued, create a wrapper single # value variable. This prevents interference with # global dimensions for for output Dataset. yield property_name, sc.scalar(property_data) else: yield property_name, property_data
def test_extract_energy_final(): # Efinal is often stored in a non-default parameter file parameters = { 'IN16B': 'IN16B_silicon_311_Parameters.xml', 'IRIS': 'IRIS_mica_002_Parameters.xml', 'OSIRIS': 'OSIRIS_graphite_002_Parameters.xml', 'BASIS': 'BASIS_silicon_311_Parameters.xml' } unsupported = [ 'ZEEMANS', 'MARS', 'IN10', 'IN13', 'IN16', 'VISION', 'VESUVIO' ] for instr in _all_indirect(blacklist=unsupported): out = _load_indirect_instrument(instr, parameters) ds = scn.from_mantid(out) efs = ds.coords["final_energy"] assert not sc.all(sc.isnan(efs)).value assert efs.unit == sc.Unit("meV")
def test_convert_tof_to_energy_elastic(): tof = make_test_data(coords=('tof', 'Ltotal'), dataset=True) energy = scn.convert(tof, origin='tof', target='energy', scatter=True) check_tof_conversion_metadata(energy, 'energy', sc.units.meV) tof_in_seconds = sc.to_unit(tof.coords['tof'], 's') # e [J] = 1/2 m(n) [kg] (l [m] / tof [s])^2 joule_to_mev = sc.to_unit(1.0 * sc.Unit('J'), sc.units.meV).value neutron_mass = sc.to_unit(m_n, sc.units.kg).value # Spectrum 0 is 11 m from source for val, t in zip(energy.coords['energy']['spectrum', 0].values, tof_in_seconds.values): np.testing.assert_almost_equal( val, joule_to_mev * neutron_mass / 2 * (11 / t)**2, val * 1e-3) # Spectrum 1 L = 10.0 + math.sqrt(1.0 * 1.0 + 0.1 * 0.1) for val, t in zip(energy.coords['energy']['spectrum', 1].values, tof_in_seconds.values): np.testing.assert_almost_equal( val, joule_to_mev * 0.5 * neutron_mass * (L / t)**2, val * 1e-3)
def _extract_efinal(ws, spec_dim): detInfo = ws.detectorInfo() specInfo = ws.spectrumInfo() ef = np.empty(shape=(specInfo.size(), ), dtype=float) ef[:] = np.nan analyser_ef = _get_instrument_efixed(workspace=ws) ids = detInfo.detectorIDs() for spec_index in range(len(specInfo)): detector_ef = None if specInfo.hasDetectors(spec_index): # Just like mantid, we only take the first entry of the group. det_index = specInfo.getSpectrumDefinition(spec_index)[0][0] detector_ef = _try_except(op=ws.getEFixed, possible_except=RuntimeError, failure=None, detId=int(ids[det_index])) detector_ef = detector_ef if detector_ef is not None else analyser_ef if not detector_ef: continue # Cannot assign an Ef. May or may not be an error # - i.e. a diffraction detector, monitor etc. ef[spec_index] = detector_ef return sc.Variable(dims=[spec_dim], values=ef, unit=sc.Unit("meV"))
"", start_time=self._start_time, stop_time=self._stop_time, nexus_structure=self._nexus_structure)) def seek(self, partition: TopicPartition): pass def offsets_for_times(self, partitions: List[TopicPartition]): self.queried_timestamp = partitions[0].offset return partitions # Short time to use for buffer emit and data_stream interval in tests # pass or fail fast! SHORT_TEST_INTERVAL = 100. * sc.Unit('milliseconds') # Small buffer of 20 events is sufficient for the tests TEST_BUFFER_SIZE = 20 TEST_STREAM_ARGS = { "kafka_broker": "broker", "topics": ["topic"], "interval": SHORT_TEST_INTERVAL, "event_buffer_size": TEST_BUFFER_SIZE, "slow_metadata_buffer_size": TEST_BUFFER_SIZE, "fast_metadata_buffer_size": TEST_BUFFER_SIZE, "chopper_buffer_size": TEST_BUFFER_SIZE, "consumer_type": ConsumerType.FAKE, "timeout": 10. * sc.units.s } # "timeout" arg: if something gets broken then this makes sure the
def test_default_unit(): u = sc.Unit() assert u == sc.units.dimensionless
def _load_log_data_from_group(group: Group, nexus: LoadFromNexus, select=tuple())\ -> Tuple[str, sc.Variable]: property_name = nexus.get_name(group) value_dataset_name = "value" time_dataset_name = "time" # TODO This is wrong if the log just has a single value. Can we check # the shape in advance? index = to_plain_index(["time"], select) try: values = nexus.load_dataset_from_group_as_numpy_array(group, value_dataset_name, index=index) except MissingDataset: if nexus.contains_stream(group): raise SkipSource("Log is missing value dataset but contains stream") raise BadSource(f"NXlog '{property_name}' has no value dataset") if values.size == 0: raise BadSource(f"NXlog '{property_name}' has an empty value dataset") unit = nexus.get_unit(nexus.get_dataset_from_group(group, value_dataset_name)) try: unit = sc.Unit(unit) except sc.UnitError: warn(f"Unrecognized unit '{unit}' for value dataset " f"in NXlog '{group.name}'; setting unit as 'dimensionless'") unit = sc.units.dimensionless try: is_time_series = True dimension_label = "time" times = load_time_dataset(nexus, group, time_dataset_name, dim=dimension_label, index=index) if tuple(times.shape) != values.shape: raise BadSource(f"NXlog '{property_name}' has time and value " f"datasets of different shapes") except MissingDataset: dimension_label = property_name is_time_series = False # TODO Is NXlog similar to NXdata such that we could use that for loading? if np.ndim(values) > 1: raise BadSource(f"NXlog '{property_name}' has {value_dataset_name} " f"dataset with more than 1 dimension, handling " f"this is not yet implemented") if np.ndim(values) == 0: property_data = sc.scalar(values, unit=unit, dtype=nexus.get_dataset_numpy_dtype( nexus.get_dataset_from_group( group, value_dataset_name))) else: property_data = sc.Variable(values=values, unit=unit, dims=[dimension_label], dtype=nexus.get_dataset_numpy_dtype( nexus.get_dataset_from_group( group, value_dataset_name))) if is_time_series: # If property has timestamps, create a DataArray data_array = {"data": property_data, "coords": {dimension_label: times}} return property_name, sc.scalar(sc.DataArray(**data_array)) elif not np.isscalar(values): # If property is multi-valued, create a wrapper single # value variable. This prevents interference with # global dimensions for the output Dataset. return property_name, sc.scalar(property_data) return property_name, property_data
def unit(self) -> Union[sc.Unit, None]: if 'units' in self.attrs: return sc.Unit(self._loader.get_unit(self._dataset)) return None
async def _data_stream( data_queue: mp.Queue, worker_instruction_queue: mp.Queue, kafka_broker: str, topics: Optional[List[str]], interval: sc.Variable, event_buffer_size: int, slow_metadata_buffer_size: int, fast_metadata_buffer_size: int, chopper_buffer_size: int, run_info_topic: Optional[str] = None, start_at: StartTime = StartTime.NOW, end_at: StopTime = StopTime.NEVER, query_consumer: Optional["KafkaQueryConsumer"] = None, # noqa: F821 consumer_type: ConsumerType = ConsumerType.REAL, halt_after_n_data_chunks: int = np.iinfo(np.int32).max, # for tests halt_after_n_warnings: int = np.iinfo(np.int32).max, # for tests test_message_queue: Optional[mp.Queue] = None, # for tests timeout: Optional[sc.Variable] = None, # for tests ) -> Generator[sc.DataArray, None, None]: """ Main implementation of data stream is extracted to this function so that fake consumers can be injected for unit tests """ # Search backwards to find the last run_start message try: from ._consumer import (get_run_start_message, KafkaQueryConsumer) from ._data_consumption_manager import (data_consumption_manager, ManagerInstruction, InstructionType) except ImportError: raise ImportError(_missing_dependency_message) if topics is None and run_info_topic is None: raise ValueError("At least one of 'topics' and 'run_info_topic'" " must be specified") # This is defaulted to None in the function signature # to avoid it having to be imported earlier if query_consumer is None: query_consumer = KafkaQueryConsumer(kafka_broker) # stream_info contains information on where to look for data and metadata. # The data from the start message is yielded as the first chunk of data. # # TODO: This should, in principle, not look any different from any other # chunk of data, right now it seems it may be different? # (see https://github.com/scipp/scippneutron/issues/114) # # Generic data chunk structure: geometry, metadata, and event data are all # optional. # - first data chunk will most probably contain no event data # - subsequent chunks can contain geometry info, if e.g. some pixels have # moved # - metadata (e.g. sample environment) might be empty, if values have not # changed stream_info = None run_id = "" run_title = "-" # for display in widget stop_time_ms = None n_data_chunks = 0 if run_info_topic is not None: run_start_info = get_run_start_message(run_info_topic, query_consumer) run_id = run_start_info.job_id run_title = run_start_info.run_name # default value for stop_time in message flatbuffer is 0, # it means that field has not been populated if end_at == StopTime.END_OF_RUN and run_start_info.stop_time != 0: stop_time_ms = run_start_info.stop_time if topics is None: loaded_data, stream_info = _load_nexus_json(run_start_info.nexus_structure, get_start_info=True) topics = [stream.topic for stream in stream_info] else: loaded_data, _ = _load_nexus_json(run_start_info.nexus_structure, get_start_info=False) topics.append(run_info_topic) # listen for stop run message yield loaded_data n_data_chunks += 1 if start_at == StartTime.START_OF_RUN: start_time = run_start_info.start_time * sc.Unit("milliseconds") else: start_time = time.time() * sc.units.s # Convert to int and float as easier to pass to mp.Process # (sc.Variable would have to be serialised/deserialised) start_time_ms = int(sc.to_unit(start_time, "milliseconds").value) interval_s = float(sc.to_unit(interval, 's').value) # Specify to start the process using the "spawn" method, otherwise # on Linux the default is to fork the Python interpreter which # is "problematic" in a multithreaded process, this can apparently # even cause multiprocessing's own Queue to cause problems when forking. # See documentation: # https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods # pytorch docs mention Queue problem: # https://pytorch.org/docs/stable/notes/multiprocessing.html data_collect_process = mp.get_context("spawn").Process( target=data_consumption_manager, args=(start_time_ms, stop_time_ms, run_id, topics, kafka_broker, consumer_type, stream_info, interval_s, event_buffer_size, slow_metadata_buffer_size, fast_metadata_buffer_size, chopper_buffer_size, worker_instruction_queue, data_queue, test_message_queue)) try: data_stream_widget = DataStreamWidget(start_time_ms=start_time_ms, stop_time_ms=stop_time_ms, run_title=run_title) data_collect_process.start() # When testing, if something goes wrong, the while loop below can # become infinite. So we introduce a timeout. if timeout is not None: start_timeout = time.time() timeout_s = float(sc.to_unit(timeout, 's').value) n_warnings = 0 while data_collect_process.is_alive( ) and n_data_chunks < halt_after_n_data_chunks and \ n_warnings < halt_after_n_warnings and \ not data_stream_widget.stop_requested: if timeout is not None and (time.time() - start_timeout) > timeout_s: raise TimeoutError("data_stream timed out in test") try: new_data = data_queue.get_nowait() if isinstance(new_data, Warning): # Raise warnings in this process so that they # can be captured in tests warn(new_data) n_warnings += 1 continue elif isinstance(new_data, StopTimeUpdate): data_stream_widget.set_stop_time(new_data.stop_time_ms) if end_at == StopTime.END_OF_RUN: worker_instruction_queue.put( ManagerInstruction(InstructionType.UPDATE_STOP_TIME, new_data.stop_time_ms)) continue n_data_chunks += 1 yield convert_from_pickleable_dict(new_data) except QueueEmpty: await asyncio.sleep(0.5 * interval_s) finally: # Ensure cleanup happens however the loop exits worker_instruction_queue.put(ManagerInstruction(InstructionType.STOP_NOW)) if data_collect_process.is_alive(): process_halt_timeout_s = 4. data_collect_process.join(process_halt_timeout_s) if data_collect_process.is_alive(): data_collect_process.terminate() for queue in (data_queue, worker_instruction_queue, test_message_queue): _cleanup_queue(queue) data_stream_widget.set_stopped()