Esempio n. 1
0
    def process(self, **kwargs):
        """Process module."""
        old_observations = tuple(
            getattr(self, '_' + x) for x in self._obs_keys)
        if (kwargs.get('root', 'output') == 'output'
                and 'extra_times' in kwargs):
            obslist = (
                list(
                    zip(*([kwargs.get(k) for k in self._okeys] +
                          [[True for x in range(len(kwargs['times']))]]))) +
                list(
                    zip(*([kwargs.get('extra_' + k) for k in self._okeys] +
                          [[False
                            for x in range(len(kwargs['extra_times']))]]))))
            obslist.sort()

            self._all_observations = np.concatenate(
                [np.atleast_2d(np.array(x, dtype=object)) for x in obslist],
                axis=0).T
            for ki, key in enumerate(self._obs_keys):
                setattr(self, '_' + key, self._all_observations[ki])
        else:
            for key in list(
                    set(self._obs_keys) - set(['frequencies', 'observed'])):
                setattr(self, '_' + key, kwargs[key])
            self._frequencies = np.array([
                x / frequency_unit(y) if x is not None else None
                for x, y in zip(kwargs['frequencies'], kwargs['u_frequencies'])
            ])
            self._observed = np.full_like(kwargs['times'], True, dtype=bool)
            self._all_observations = tuple(
                getattr(self, '_' + x) for x in self._obs_keys)

        outputs = OrderedDict([
            ('all_' + x, getattr(self, '_' + x))
            for x in list(set(self._obs_keys) - set(['observed']))
        ])
        if any(not np.array_equal(x, y)
               for x, y in zip(old_observations, self._all_observations)):
            self._all_band_indices = np.array([
                (self._photometry.find_band_index(
                    b, telescope=t, instrument=i, mode=m, bandset=bs, system=s)
                 if f is None else -1)
                for ti, b, t, s, i, m, bs, f, uf, zps, msr, o in zip(
                    *self._all_observations)
            ])
            self._observation_types = np.array([
                'magcount' if
                ('countrate' in msr and 'magnitude' in msr and zp is not None
                 and self._fitter._prefer_fluxes) else
                self._photometry._band_kinds[bi] if bi >= 0 else 'fluxdensity'
                for bi, zp, msr in zip(self._all_band_indices,
                                       self._zeropoints, self._measures)
            ],
                                               dtype=object)
        outputs['all_band_indices'] = self._all_band_indices
        outputs['observation_types'] = self._observation_types
        outputs['observed'] = np.array(self._observed, dtype=bool)
        return outputs
Esempio n. 2
0
    def fit_data(self,
                 event_name='',
                 method=None,
                 iterations=None,
                 frack_step=20,
                 num_walkers=None,
                 num_temps=1,
                 burn=None,
                 post_burn=None,
                 fracking=True,
                 gibbs=False,
                 pool=None,
                 output_path='',
                 suffix='',
                 write=False,
                 upload=False,
                 upload_token='',
                 check_upload_quality=True,
                 convergence_type=None,
                 convergence_criteria=None,
                 save_full_chain=False,
                 extra_outputs=None):
        """Fit the data for a given event.

        Fitting performed using a combination of emcee and fracking.
        """
        if self._speak:
            speak('Fitting ' + event_name, self._speak)
        from mosfit.__init__ import __version__
        global model
        model = self._model
        prt = self._printer

        upload_model = upload and iterations > 0

        if pool is not None:
            self._pool = pool

        if upload:
            try:
                import dropbox
            except ImportError:
                if self._test:
                    pass
                else:
                    prt.message('install_db', error=True)
                    raise

        if not self._pool.is_master():
            try:
                self._pool.wait()
            except (KeyboardInterrupt, SystemExit):
                pass
            return (None, None, None)

        self._method = method

        if self._method == 'nester':
            self._sampler = Nester(self, model, iterations, burn, post_burn,
                                   num_walkers, convergence_criteria,
                                   convergence_type, gibbs, fracking,
                                   frack_step)
        else:
            self._sampler = Ensembler(self, model, iterations, burn, post_burn,
                                      num_temps, num_walkers,
                                      convergence_criteria, convergence_type,
                                      gibbs, fracking, frack_step)

        self._sampler.run(self._walker_data)

        prt.message('constructing')

        if write:
            if self._speak:
                speak(prt._strings['saving_output'], self._speak)

        if self._event_path:
            entry = Entry.init_from_file(catalog=None,
                                         name=self._event_name,
                                         path=self._event_path,
                                         merge=False,
                                         pop_schema=False,
                                         ignore_keys=[ENTRY.MODELS],
                                         compare_to_existing=False)
            new_photometry = []
            for photo in entry.get(ENTRY.PHOTOMETRY, []):
                if PHOTOMETRY.REALIZATION not in photo:
                    new_photometry.append(photo)
            if len(new_photometry):
                entry[ENTRY.PHOTOMETRY] = new_photometry
        else:
            entry = Entry(name=self._event_name)

        uentry = Entry(name=self._event_name)
        data_keys = set()
        for task in model._call_stack:
            if model._call_stack[task]['kind'] == 'data':
                data_keys.update(
                    list(model._call_stack[task].get('keys', {}).keys()))
        entryhash = entry.get_hash(keys=list(sorted(list(data_keys))))

        # Accumulate all the sources and add them to each entry.
        sources = []
        for root in model._references:
            for ref in model._references[root]:
                sources.append(entry.add_source(**ref))
        sources.append(entry.add_source(**self._DEFAULT_SOURCE))
        source = ','.join(sources)

        usources = []
        for root in model._references:
            for ref in model._references[root]:
                usources.append(uentry.add_source(**ref))
        usources.append(uentry.add_source(**self._DEFAULT_SOURCE))
        usource = ','.join(usources)

        model_setup = OrderedDict()
        for ti, task in enumerate(model._call_stack):
            task_copy = deepcopy(model._call_stack[task])
            if (task_copy['kind'] == 'parameter'
                    and task in model._parameter_json):
                task_copy.update(model._parameter_json[task])
            model_setup[task] = task_copy
        modeldict = OrderedDict([(MODEL.NAME, model._model_name),
                                 (MODEL.SETUP, model_setup),
                                 (MODEL.CODE, 'MOSFiT'),
                                 (MODEL.DATE, time.strftime("%Y/%m/%d")),
                                 (MODEL.VERSION, __version__),
                                 (MODEL.SOURCE, source)])

        self._sampler.prepare_output(check_upload_quality, upload)

        self._sampler.append_output(modeldict)

        umodeldict = deepcopy(modeldict)
        umodeldict[MODEL.SOURCE] = usource
        modelhash = get_model_hash(umodeldict,
                                   ignore_keys=[MODEL.DATE, MODEL.SOURCE])
        umodelnum = uentry.add_model(**umodeldict)

        if self._sampler._upload_model is not None:
            upload_model = self._sampler._upload_model

        modelnum = entry.add_model(**modeldict)

        samples, probs, weights = self._sampler.get_samples()

        extras = OrderedDict()
        samples_to_plot = self._sampler._nwalkers

        if isinstance(self._sampler, Nester):
            icdf = np.cumsum(np.concatenate(([0.0], weights)))
            draws = np.random.rand(samples_to_plot)
            indices = np.searchsorted(icdf, draws) - 1
        else:
            indices = list(range(samples_to_plot))

        ri = 0
        selected_extra = False
        for xi, x in enumerate(samples):
            ri = ri + 1
            prt.message('outputting_walker', [ri, len(samples)],
                        inline=True,
                        min_time=0.2)
            if xi in indices:
                output = model.run_stack(x, root='output')
                if extra_outputs is not None:
                    if not extra_outputs and not selected_extra:
                        extra_options = list(output.keys())
                        prt.message('available_keys')
                        for opt in extra_options:
                            prt.prt('- {}'.format(opt))
                        selected_extra = True
                    for key in extra_outputs:
                        new_val = output.get(key, [])
                        new_val = all_to_list(new_val)
                        extras.setdefault(key, []).append(new_val)
                for i in range(len(output['times'])):
                    if not np.isfinite(output['model_observations'][i]):
                        continue
                    photodict = {
                        PHOTOMETRY.TIME:
                        output['times'][i] + output['min_times'],
                        PHOTOMETRY.MODEL: modelnum,
                        PHOTOMETRY.SOURCE: source,
                        PHOTOMETRY.REALIZATION: str(ri)
                    }
                    if output['observation_types'][i] == 'magnitude':
                        photodict[PHOTOMETRY.BAND] = output['bands'][i]
                        photodict[PHOTOMETRY.
                                  MAGNITUDE] = output['model_observations'][i]
                        photodict[PHOTOMETRY.
                                  E_MAGNITUDE] = output['model_variances'][i]
                    elif output['observation_types'][i] == 'magcount':
                        if output['model_observations'][i] == 0.0:
                            continue
                        photodict[PHOTOMETRY.BAND] = output['bands'][i]
                        photodict[PHOTOMETRY.
                                  COUNT_RATE] = output['model_observations'][i]
                        photodict[PHOTOMETRY.
                                  E_COUNT_RATE] = output['model_variances'][i]
                        photodict[PHOTOMETRY.MAGNITUDE] = -2.5 * np.log10(
                            output['model_observations']
                            [i]) + output['all_zeropoints'][i]
                        photodict[PHOTOMETRY.E_UPPER_MAGNITUDE] = 2.5 * (
                            np.log10(output['model_observations'][i] +
                                     output['model_variances'][i]) -
                            np.log10(output['model_observations'][i]))
                        if (output['model_variances'][i] >
                                output['model_observations'][i]):
                            photodict[PHOTOMETRY.UPPER_LIMIT] = True
                        else:
                            photodict[PHOTOMETRY.E_LOWER_MAGNITUDE] = 2.5 * (
                                np.log10(output['model_observations'][i]) -
                                np.log10(output['model_observations'][i] -
                                         output['model_variances'][i]))
                    elif output['observation_types'][i] == 'fluxdensity':
                        photodict[PHOTOMETRY.FREQUENCY] = output[
                            'frequencies'][i] * frequency_unit('GHz')
                        photodict[PHOTOMETRY.FLUX_DENSITY] = output[
                            'model_observations'][i] * flux_density_unit('µJy')
                        photodict[PHOTOMETRY.E_LOWER_FLUX_DENSITY] = (
                            photodict[PHOTOMETRY.FLUX_DENSITY] -
                            (10.0**
                             (np.log10(photodict[PHOTOMETRY.FLUX_DENSITY]) -
                              output['model_variances'][i] / 2.5)) *
                            flux_density_unit('µJy'))
                        photodict[PHOTOMETRY.E_UPPER_FLUX_DENSITY] = (
                            10.0**(np.log10(photodict[PHOTOMETRY.FLUX_DENSITY])
                                   + output['model_variances'][i] / 2.5) *
                            flux_density_unit('µJy') -
                            photodict[PHOTOMETRY.FLUX_DENSITY])
                        photodict[PHOTOMETRY.U_FREQUENCY] = 'GHz'
                        photodict[PHOTOMETRY.U_FLUX_DENSITY] = 'µJy'
                    elif output['observation_types'][i] == 'countrate':
                        photodict[PHOTOMETRY.
                                  COUNT_RATE] = output['model_observations'][i]
                        photodict[PHOTOMETRY.E_LOWER_COUNT_RATE] = (
                            photodict[PHOTOMETRY.COUNT_RATE] -
                            (10.0**(np.log10(photodict[PHOTOMETRY.COUNT_RATE])
                                    - output['model_variances'][i] / 2.5)))
                        photodict[PHOTOMETRY.E_UPPER_COUNT_RATE] = (
                            10.0**(np.log10(photodict[PHOTOMETRY.COUNT_RATE]) +
                                   output['model_variances'][i] / 2.5) -
                            photodict[PHOTOMETRY.COUNT_RATE])
                        photodict[PHOTOMETRY.U_COUNT_RATE] = 's^-1'
                    if ('model_upper_limits' in output
                            and output['model_upper_limits'][i]):
                        photodict[PHOTOMETRY.UPPER_LIMIT] = bool(
                            output['model_upper_limits'][i])
                    if self._limiting_magnitude is not None:
                        photodict[PHOTOMETRY.SIMULATED] = True
                    if 'telescopes' in output and output['telescopes'][i]:
                        photodict[
                            PHOTOMETRY.TELESCOPE] = output['telescopes'][i]
                    if 'systems' in output and output['systems'][i]:
                        photodict[PHOTOMETRY.SYSTEM] = output['systems'][i]
                    if 'bandsets' in output and output['bandsets'][i]:
                        photodict[PHOTOMETRY.BAND_SET] = output['bandsets'][i]
                    if 'instruments' in output and output['instruments'][i]:
                        photodict[
                            PHOTOMETRY.INSTRUMENT] = output['instruments'][i]
                    if 'modes' in output and output['modes'][i]:
                        photodict[PHOTOMETRY.MODE] = output['modes'][i]
                    entry.add_photometry(compare_to_existing=False,
                                         check_for_dupes=False,
                                         **photodict)

                    uphotodict = deepcopy(photodict)
                    uphotodict[PHOTOMETRY.SOURCE] = umodelnum
                    uentry.add_photometry(compare_to_existing=False,
                                          check_for_dupes=False,
                                          **uphotodict)
            else:
                output = model.run_stack(x, root='objective')

            parameters = OrderedDict()
            derived_keys = set()
            pi = 0
            for ti, task in enumerate(model._call_stack):
                # if task not in model._free_parameters:
                #     continue
                if model._call_stack[task]['kind'] != 'parameter':
                    continue
                paramdict = OrderedDict(
                    (('latex', model._modules[task].latex()),
                     ('log', model._modules[task].is_log())))
                if task in model._free_parameters:
                    poutput = model._modules[task].process(
                        **{'fraction': x[pi]})
                    value = list(poutput.values())[0]
                    paramdict['value'] = value
                    paramdict['fraction'] = x[pi]
                    pi = pi + 1
                else:
                    if output.get(task, None) is not None:
                        paramdict['value'] = output[task]
                parameters.update({model._modules[task].name(): paramdict})
                # Dump out any derived parameter keys
                derived_keys.update(model._modules[task].get_derived_keys())

            for key in list(sorted(list(derived_keys))):
                if (output.get(key, None) is not None
                        and key not in parameters):
                    parameters.update({key: {'value': output[key]}})

            realdict = {REALIZATION.PARAMETERS: parameters}
            if probs is not None:
                realdict[REALIZATION.SCORE] = str(probs[xi])
            else:
                realdict[REALIZATION.SCORE] = str(
                    ln_likelihood(x) + ln_prior(x))
            realdict[REALIZATION.ALIAS] = str(ri)
            realdict[REALIZATION.WEIGHT] = str(weights[xi])
            entry[ENTRY.MODELS][0].add_realization(check_for_dupes=False,
                                                   **realdict)
            urealdict = deepcopy(realdict)
            uentry[ENTRY.MODELS][0].add_realization(check_for_dupes=False,
                                                    **urealdict)
        prt.message('all_walkers_written', inline=True)

        entry.sanitize()
        oentry = {self._event_name: entry._ordered(entry)}
        uentry.sanitize()
        ouentry = {self._event_name: uentry._ordered(uentry)}

        uname = '_'.join([self._event_name, entryhash, modelhash])

        if output_path and not os.path.exists(output_path):
            os.makedirs(output_path)

        if not os.path.exists(model.get_products_path()):
            os.makedirs(model.get_products_path())

        if write:
            prt.message('writing_complete')
            with open_atomic(
                    os.path.join(model.get_products_path(), 'walkers.json'),
                    'w') as flast, open_atomic(
                        os.path.join(
                            model.get_products_path(), self._event_name +
                            (('_' + suffix) if suffix else '') + '.json'),
                        'w') as feven:
                entabbed_json_dump(oentry, flast, separators=(',', ':'))
                entabbed_json_dump(oentry, feven, separators=(',', ':'))

            if save_full_chain:
                prt.message('writing_full_chain')
                with open_atomic(
                        os.path.join(model.get_products_path(), 'chain.json'),
                        'w') as flast, open_atomic(
                            os.path.join(
                                model.get_products_path(),
                                self._event_name + '_chain' +
                                (('_' + suffix) if suffix else '') + '.json'),
                            'w') as feven:
                    entabbed_json_dump(self._sampler._all_chain.tolist(),
                                       flast,
                                       separators=(',', ':'))
                    entabbed_json_dump(self._sampler._all_chain.tolist(),
                                       feven,
                                       separators=(',', ':'))

            if extra_outputs is not None:
                prt.message('writing_extras')
                with open_atomic(
                        os.path.join(model.get_products_path(), 'extras.json'),
                        'w') as flast, open_atomic(
                            os.path.join(
                                model.get_products_path(),
                                self._event_name + '_extras' +
                                (('_' + suffix) if suffix else '') + '.json'),
                            'w') as feven:
                    entabbed_json_dump(extras, flast, separators=(',', ':'))
                    entabbed_json_dump(extras, feven, separators=(',', ':'))

            prt.message('writing_model')
            with open_atomic(
                    os.path.join(model.get_products_path(), 'upload.json'),
                    'w') as flast, open_atomic(
                        os.path.join(
                            model.get_products_path(), uname +
                            (('_' + suffix) if suffix else '') + '.json'),
                        'w') as feven:
                entabbed_json_dump(ouentry, flast, separators=(',', ':'))
                entabbed_json_dump(ouentry, feven, separators=(',', ':'))

        if upload_model:
            prt.message('ul_fit', [entryhash, self._sampler._modelhash])
            upayload = entabbed_json_dumps(ouentry, separators=(',', ':'))
            try:
                dbx = dropbox.Dropbox(upload_token)
                dbx.files_upload(upayload.encode(),
                                 '/' + uname + '.json',
                                 mode=dropbox.files.WriteMode.overwrite)
                prt.message('ul_complete')
            except Exception:
                if self._test:
                    pass
                else:
                    raise

        if upload:
            for ce in self._converter.get_converted():
                dentry = Entry.init_from_file(catalog=None,
                                              name=ce[0],
                                              path=ce[1],
                                              merge=False,
                                              pop_schema=False,
                                              ignore_keys=[ENTRY.MODELS],
                                              compare_to_existing=False)

                dentry.sanitize()
                odentry = {ce[0]: uentry._ordered(dentry)}
                dpayload = entabbed_json_dumps(odentry, separators=(',', ':'))
                text = prt.message('ul_devent', [ce[0]], prt=False)
                ul_devent = prt.prompt(text, kind='bool', message=False)
                if ul_devent:
                    dpath = '/' + slugify(
                        ce[0] + '_' + dentry[ENTRY.SOURCES][0].get(
                            SOURCE.BIBCODE, dentry[ENTRY.SOURCES][0].get(
                                SOURCE.NAME, 'NOSOURCE'))) + '.json'
                    try:
                        dbx = dropbox.Dropbox(upload_token)
                        dbx.files_upload(
                            dpayload.encode(),
                            dpath,
                            mode=dropbox.files.WriteMode.overwrite)
                        prt.message('ul_complete')
                    except Exception:
                        if self._test:
                            pass
                        else:
                            raise

        return (entry, samples, probs)