def test_adaptive_energy_range_builder(): unknown_1 = Spectrum.read_file(io.StringIO(unknown_1_text)) reference_1 = Spectrum.read_file(io.StringIO(reference_1_text)) reference_2 = Spectrum.read_file(io.StringIO(reference_2_text)) fit_energies, fit_energy_indices = AdaptiveEnergyRangeBuilder( ).build_range(unknown_spectrum=unknown_1, reference_spectrum_seq=[reference_1, reference_2]) assert fit_energies.shape == (2, ) assert fit_energies.iloc[0] == 11765.0 assert fit_energies.iloc[-1] == 11771.0 assert fit_energy_indices.tolist() == [False, True, True, False]
def build(cls, config): log = logging.getLogger(name=str(cls)) # read section [references] # support a PRM file such as # prm = path/to/one.prm # or # a list of one or more file globs such as # arsenic_2_reference_spectra/*.e # arsenic_3_reference_spectra/*.e if config.has_section('references'): if config.has_option('references', 'prm'): prm_file_path = os.path.expanduser(config.get('references', 'prm')) reference_spectrum_list, max_cmp, min_cmp = cls.build_reference_spectrum_list_from_prm_file(prm_file_path) else: reference_spectrum_list, max_cmp, min_cmp = cls.build_reference_spectrum_list_from_config_file(config) elif config.has_section('reference_spectra'): if config.has_option('reference_spectra', 'prm'): prm_file_path = os.path.expanduser(config.get('reference_spectra', 'pem')) reference_spectrum_list, max_cmp, min_cmp = cls.build_reference_spectrum_list_from_prm_file(prm_file_path) else: raise Exception('section [reference_spectra] is missing required parameter prm') else: raise Exception('configuration file is missing section [references]') if 0 < min_cmp <= max_cmp: component_count_range = range(min_cmp, max_cmp+1) logging.info('component count range: {}'.format(component_count_range)) else: logging.exception('minimum and maximum component counts are not valid') energy_range = cls.get_energy_range_from_config(config) # read data files unknown_spectrum_file_path_list = [] for j, (unknown_spectrum_glob, _) in enumerate(config.items('data')): log.info('unknown spectrum glob: {}'.format(unknown_spectrum_glob)) glob_pattern_expanded = os.path.expanduser(unknown_spectrum_glob) unknown_spectrum_file_path_list.extend(glob.glob(glob_pattern_expanded)) log.info('found {} data files'.format(len(unknown_spectrum_file_path_list))) unknown_spectrum_list = [] for unknown_spectrum_file_path in unknown_spectrum_file_path_list: log.info('reading data file {}'.format(unknown_spectrum_file_path)) unknown_spectrum = Spectrum.read_file(unknown_spectrum_file_path) unknown_spectrum_list.append(unknown_spectrum) fit_task = cls( reference_spectrum_list=reference_spectrum_list, unknown_spectrum_list=unknown_spectrum_list, energy_range_builder=energy_range, component_count_range=component_count_range ) return fit_task
def test_arsenic_1(caplog, request): """ Test fits for known arsenic data and reference_spectra. Expect to find PRM, data, and reference files in a directory called 'test_arsenic_fit'. See also: http://stackoverflow.com/questions/29627341/pytest-where-to-store-expected-data. :param request: pytest fixture with information about the path to this test file :return: """ caplog.set_level(logging.INFO) test_arsenic_fit_fp = request.module.__file__ log.info('test_arsenic_fit_fp: {}'.format(test_arsenic_fit_fp)) test_arsenic_fit_dir_path, _ = os.path.splitext(test_arsenic_fit_fp) #reference_file_path_pattern = os.path.join(test_arsenic_fit_dir_path, 'reference', 'arsenate_*.e') reference_file_path_pattern = os.path.join(test_arsenic_fit_dir_path, 'reference', '*.e') data_file_path = os.path.join(test_arsenic_fit_dir_path, 'data', 'OTT3_55_spot0.e') reference_spectrum_list = [ ReferenceSpectrum.read_file(file_path) for file_path in glob(reference_file_path_pattern) ] log.info(reference_spectrum_list) unknown_spectrum = Spectrum.read_file(data_file_path) log.info(unknown_spectrum) task = AllCombinationFitTask( ls=LinearRegression, energy_range_builder=AdaptiveEnergyRangeBuilder(), reference_spectrum_list=reference_spectrum_list, unknown_spectrum_list=[ unknown_spectrum, ], best_fits_plot_limit=1, component_count_range=range(1, 3 + 1)) with tempfile.TemporaryDirectory() as plots_pdf_dp: task.fit_all(plots_pdf_dp=plots_pdf_dp) unknown_spectrum_fit = task.fit_table[unknown_spectrum] assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.fit_spectrum_b.shape assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.unknown_spectrum_b.shape assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.residuals.shape assert 3 == len(unknown_spectrum_fit.best_fit.reference_spectra_seq)
def test_arsenic_1(caplog, request): """ Test fits for known arsenic data and reference_spectra. Expect to find PRM, data, and reference files in a directory called 'test_arsenic_fit'. See also: http://stackoverflow.com/questions/29627341/pytest-where-to-store-expected-data. :param request: pytest fixture with information about the path to this test file :return: """ caplog.setLevel(logging.INFO) test_arsenic_fit_fp = request.module.__file__ log.info('test_arsenic_fit_fp: {}'.format(test_arsenic_fit_fp)) test_arsenic_fit_dir_path, _ = os.path.splitext(test_arsenic_fit_fp) #reference_file_path_pattern = os.path.join(test_arsenic_fit_dir_path, 'reference', 'arsenate_*.e') reference_file_path_pattern = os.path.join(test_arsenic_fit_dir_path, 'reference', '*.e') data_file_path = os.path.join(test_arsenic_fit_dir_path, 'data', 'OTT3_55_spot0.e') reference_spectrum_list = [ ReferenceSpectrum.read_file(file_path) for file_path in glob(reference_file_path_pattern) ] log.info(reference_spectrum_list) unknown_spectrum = Spectrum.read_file(data_file_path) log.info(unknown_spectrum) task = AllCombinationFitTask( energy_range_builder=AdaptiveEnergyRangeBuilder(), reference_spectrum_list=reference_spectrum_list, unknown_spectrum_list=[unknown_spectrum, ], component_count_range=range(1, 3+1) ) task.fit_all() unknown_spectrum_fit = task.fit_table[unknown_spectrum] assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.fit_spectrum_b.shape assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.unknown_spectrum_b.shape assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.residuals.shape assert 3 == len(unknown_spectrum_fit.best_fit.reference_spectra_seq)
def build_unknown_spectrum_list_from_config_file(config): log = logging.getLogger(name=__name__) unknown_spectrum_file_path_list = [] for j, (unknown_spectrum_glob, _) in enumerate(config.items('data')): log.info('unknown spectrum glob: {}'.format(unknown_spectrum_glob)) glob_pattern_expanded = os.path.expanduser(unknown_spectrum_glob) unknown_spectrum_file_path_list.extend(glob(glob_pattern_expanded)) log.info('found {} data files'.format(len(unknown_spectrum_file_path_list))) unknown_spectrum_list = [] for unknown_spectrum_file_path in unknown_spectrum_file_path_list: log.info('reading data file {}'.format(unknown_spectrum_file_path)) unknown_spectrum = Spectrum.read_file(unknown_spectrum_file_path) unknown_spectrum_list.append(unknown_spectrum) if len(unknown_spectrum_list) == 0: raise ConfigurationFileError('no spectrum files were found using globs "{}"'.format(config.items('data'))) else: return unknown_spectrum_list