Example #1
0
def test_adaptive_energy_range_builder():
    unknown_1 = Spectrum.read_file(io.StringIO(unknown_1_text))
    reference_1 = Spectrum.read_file(io.StringIO(reference_1_text))
    reference_2 = Spectrum.read_file(io.StringIO(reference_2_text))

    fit_energies, fit_energy_indices = AdaptiveEnergyRangeBuilder(
    ).build_range(unknown_spectrum=unknown_1,
                  reference_spectrum_seq=[reference_1, reference_2])

    assert fit_energies.shape == (2, )
    assert fit_energies.iloc[0] == 11765.0
    assert fit_energies.iloc[-1] == 11771.0
    assert fit_energy_indices.tolist() == [False, True, True, False]
Example #2
0
    def build(cls, config):
        log = logging.getLogger(name=str(cls))

        # read section [references]
        # support a PRM file such as
        #   prm = path/to/one.prm
        # or
        # a list of one or more file globs such as
        #   arsenic_2_reference_spectra/*.e
        #   arsenic_3_reference_spectra/*.e

        if config.has_section('references'):
            if config.has_option('references', 'prm'):
                prm_file_path = os.path.expanduser(config.get('references', 'prm'))
                reference_spectrum_list, max_cmp, min_cmp = cls.build_reference_spectrum_list_from_prm_file(prm_file_path)
            else:
                reference_spectrum_list, max_cmp, min_cmp = cls.build_reference_spectrum_list_from_config_file(config)
        elif config.has_section('reference_spectra'):
            if config.has_option('reference_spectra', 'prm'):
                prm_file_path = os.path.expanduser(config.get('reference_spectra', 'pem'))
                reference_spectrum_list, max_cmp, min_cmp = cls.build_reference_spectrum_list_from_prm_file(prm_file_path)
            else:
                raise Exception('section [reference_spectra] is missing required parameter prm')
        else:
            raise Exception('configuration file is missing section [references]')

        if 0 < min_cmp <= max_cmp:
            component_count_range = range(min_cmp, max_cmp+1)
            logging.info('component count range: {}'.format(component_count_range))
        else:
            logging.exception('minimum and maximum component counts are not valid')

        energy_range = cls.get_energy_range_from_config(config)

        # read data files
        unknown_spectrum_file_path_list = []
        for j, (unknown_spectrum_glob, _) in enumerate(config.items('data')):
            log.info('unknown spectrum glob: {}'.format(unknown_spectrum_glob))
            glob_pattern_expanded = os.path.expanduser(unknown_spectrum_glob)
            unknown_spectrum_file_path_list.extend(glob.glob(glob_pattern_expanded))
        log.info('found {} data files'.format(len(unknown_spectrum_file_path_list)))

        unknown_spectrum_list = []
        for unknown_spectrum_file_path in unknown_spectrum_file_path_list:
            log.info('reading data file {}'.format(unknown_spectrum_file_path))
            unknown_spectrum = Spectrum.read_file(unknown_spectrum_file_path)
            unknown_spectrum_list.append(unknown_spectrum)

        fit_task = cls(
            reference_spectrum_list=reference_spectrum_list,
            unknown_spectrum_list=unknown_spectrum_list,
            energy_range_builder=energy_range,
            component_count_range=component_count_range
        )

        return fit_task
Example #3
0
def test_arsenic_1(caplog, request):
    """
    Test fits for known arsenic data and reference_spectra.
    Expect to find PRM, data, and reference files in a directory called 'test_arsenic_fit'.
    See also: http://stackoverflow.com/questions/29627341/pytest-where-to-store-expected-data.

    :param request: pytest fixture with information about the path to this test file
    :return:
    """

    caplog.set_level(logging.INFO)

    test_arsenic_fit_fp = request.module.__file__
    log.info('test_arsenic_fit_fp: {}'.format(test_arsenic_fit_fp))
    test_arsenic_fit_dir_path, _ = os.path.splitext(test_arsenic_fit_fp)

    #reference_file_path_pattern = os.path.join(test_arsenic_fit_dir_path, 'reference', 'arsenate_*.e')
    reference_file_path_pattern = os.path.join(test_arsenic_fit_dir_path,
                                               'reference', '*.e')
    data_file_path = os.path.join(test_arsenic_fit_dir_path, 'data',
                                  'OTT3_55_spot0.e')

    reference_spectrum_list = [
        ReferenceSpectrum.read_file(file_path)
        for file_path in glob(reference_file_path_pattern)
    ]
    log.info(reference_spectrum_list)

    unknown_spectrum = Spectrum.read_file(data_file_path)
    log.info(unknown_spectrum)

    task = AllCombinationFitTask(
        ls=LinearRegression,
        energy_range_builder=AdaptiveEnergyRangeBuilder(),
        reference_spectrum_list=reference_spectrum_list,
        unknown_spectrum_list=[
            unknown_spectrum,
        ],
        best_fits_plot_limit=1,
        component_count_range=range(1, 3 + 1))

    with tempfile.TemporaryDirectory() as plots_pdf_dp:
        task.fit_all(plots_pdf_dp=plots_pdf_dp)

        unknown_spectrum_fit = task.fit_table[unknown_spectrum]

        assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.fit_spectrum_b.shape
        assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.unknown_spectrum_b.shape
        assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.residuals.shape

        assert 3 == len(unknown_spectrum_fit.best_fit.reference_spectra_seq)
Example #4
0
def test_arsenic_1(caplog, request):
    """
    Test fits for known arsenic data and reference_spectra.
    Expect to find PRM, data, and reference files in a directory called 'test_arsenic_fit'.
    See also: http://stackoverflow.com/questions/29627341/pytest-where-to-store-expected-data.

    :param request: pytest fixture with information about the path to this test file
    :return:
    """

    caplog.setLevel(logging.INFO)

    test_arsenic_fit_fp = request.module.__file__
    log.info('test_arsenic_fit_fp: {}'.format(test_arsenic_fit_fp))
    test_arsenic_fit_dir_path, _ = os.path.splitext(test_arsenic_fit_fp)

    #reference_file_path_pattern = os.path.join(test_arsenic_fit_dir_path, 'reference', 'arsenate_*.e')
    reference_file_path_pattern = os.path.join(test_arsenic_fit_dir_path, 'reference', '*.e')
    data_file_path = os.path.join(test_arsenic_fit_dir_path, 'data', 'OTT3_55_spot0.e')

    reference_spectrum_list = [
        ReferenceSpectrum.read_file(file_path)
        for file_path
        in glob(reference_file_path_pattern)
    ]
    log.info(reference_spectrum_list)

    unknown_spectrum = Spectrum.read_file(data_file_path)
    log.info(unknown_spectrum)

    task = AllCombinationFitTask(
        energy_range_builder=AdaptiveEnergyRangeBuilder(),
        reference_spectrum_list=reference_spectrum_list,
        unknown_spectrum_list=[unknown_spectrum, ],
        component_count_range=range(1, 3+1)
    )

    task.fit_all()

    unknown_spectrum_fit = task.fit_table[unknown_spectrum]

    assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.fit_spectrum_b.shape
    assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.unknown_spectrum_b.shape
    assert unknown_spectrum_fit.best_fit.interpolant_incident_energy.shape == unknown_spectrum_fit.best_fit.residuals.shape

    assert 3 == len(unknown_spectrum_fit.best_fit.reference_spectra_seq)
Example #5
0
def build_unknown_spectrum_list_from_config_file(config):
    log = logging.getLogger(name=__name__)

    unknown_spectrum_file_path_list = []
    for j, (unknown_spectrum_glob, _) in enumerate(config.items('data')):
        log.info('unknown spectrum glob: {}'.format(unknown_spectrum_glob))
        glob_pattern_expanded = os.path.expanduser(unknown_spectrum_glob)
        unknown_spectrum_file_path_list.extend(glob(glob_pattern_expanded))
    log.info('found {} data files'.format(len(unknown_spectrum_file_path_list)))

    unknown_spectrum_list = []
    for unknown_spectrum_file_path in unknown_spectrum_file_path_list:
        log.info('reading data file {}'.format(unknown_spectrum_file_path))
        unknown_spectrum = Spectrum.read_file(unknown_spectrum_file_path)
        unknown_spectrum_list.append(unknown_spectrum)

    if len(unknown_spectrum_list) == 0:
        raise ConfigurationFileError('no spectrum files were found using globs "{}"'.format(config.items('data')))
    else:
        return unknown_spectrum_list