Exemple #1
0
def read_light_curves_from_sql_database(data_release,
                                        fname,
                                        field_in='%',
                                        model_in='%',
                                        batch_size=100,
                                        offset=0,
                                        sort=True,
                                        passbands=('g', 'r'),
                                        known_redshift=True):
    print(fname)

    extrasql = ''  # "AND (objid LIKE '%00' OR objid LIKE '%50' OR sim_type_index IN (51,61,62,63,64,84,90,91,93))"  # ''#AND sim_redshift_host < 0.5 AND sim_peakmag_r < 23'
    getter = GetData(data_release)
    result = getter.get_lcs_data(columns=[
        'objid', 'ptrobs_min', 'ptrobs_max', 'sim_peakmag_r',
        'sim_redshift_host', 'mwebv', 'sim_dlmu', 'peakmjd', 'mwebv', 'ra',
        'decl', 'hostgal_photoz', 'hostgal_photoz_err'
    ],
                                 field=field_in,
                                 model=model_in,
                                 snid='%',
                                 limit=batch_size,
                                 offset=offset,
                                 shuffle=False,
                                 sort=sort,
                                 extrasql=extrasql)

    store = pd.HDFStore(fname)

    for head, phot in result:
        objid, ptrobs_min, ptrobs_max, peakmag, redshift, mwebv, dlmu, peakmjd, mwebv, ra, dec, photoz, photozerr = head

        field, model, base, snid = objid.split('_')

        lc = getter.convert_pandas_lc_to_recarray_lc(phot, passbands=passbands)

        inputlightcurve = InputLightCurve(lc['mjd'],
                                          lc['flux'],
                                          lc['dflux'],
                                          lc['pb'],
                                          lc['zpt'],
                                          lc['photflag'],
                                          ra,
                                          dec,
                                          objid,
                                          redshift,
                                          mwebv,
                                          known_redshift=known_redshift,
                                          training_set_parameters={
                                              'class_number': int(model),
                                              'peakmjd': peakmjd
                                          })

        savepd = inputlightcurve.preprocess_light_curve()
        store.append(objid, savepd)

    store.close()
    print("saved %s" % fname)
def read_fits_file(args):
    head_file, phot_file, passbands, known_redshift, calculate_t0 = args

    getter = GetData()

    light_curves = {}

    header_HDU = afits.open(head_file)
    header_data = header_HDU[1].data

    for i, head in enumerate(header_data):
        class_num = head['SIM_TYPE_INDEX']
        snid = head['SNID']
        objid = '{}_{}'.format(class_num, snid)
        ptrobs_min = head['PTROBS_MIN']
        ptrobs_max = head['PTROBS_MAX']
        redshift = head['SIM_REDSHIFT_HOST']
        peakmjd = head['PEAKMJD']
        mwebv = head['MWEBV']
        ra = head['RA']
        if 'DEC' in header_data.names:
            dec = head['DEC']
        else:
            dec = head['DECL']
        print(i, len(header_data))

        try:
            phot_data = getter.get_light_curve_array(phot_file, ptrobs_min, ptrobs_max)

            lc = getter.convert_pandas_lc_to_recarray_lc(phot_data, passbands=passbands)

            inputlightcurve = InputLightCurve(lc['mjd'], lc['flux'], lc['dflux'], lc['pb'], lc['photflag'], ra,
                                              dec, objid, redshift, mwebv, known_redshift=known_redshift,
                                              training_set_parameters={'class_number': class_num, 'peakmjd': peakmjd},
                                              calculate_t0=calculate_t0)
            light_curves[objid] = inputlightcurve.preprocess_light_curve()
        except IndexError as e:
            print("No detections:", e)  # TODO: maybe do better error checking in future
        except AttributeError as e:
            print("phot_data is NoneType", e)  # TODO: maybe fix this later - rare case
        except ValueError as e:
            print("MCMC error while fitting t0", e)
        except Exception as e:
            print("Unspecified error", e, "Ignoring light curve", objid)

    return light_curves
Exemple #3
0
def get_real_ztf_training_data(class_name,
                               data_dir='data/real_ZTF_data_from_osc',
                               save_dir='data/saved_light_curves/',
                               pbs=('g', 'r'),
                               known_redshift=True,
                               nprocesses=1,
                               redo=False,
                               calculate_t0=True):
    """
    Get data from saved real ZTF data with names and types from the Open Supernova Catalog
    """

    save_lc_filepath = os.path.join(save_dir,
                                    f"lc_classnum_{class_name}.pickle")
    if os.path.exists(save_lc_filepath) and not redo:
        with open(save_lc_filepath, "rb") as fp:  # Unpickling
            light_curves = pickle.load(fp)
    else:
        light_curves = {}
        data_filepath = os.path.join(
            data_dir, f"ZTF_data_{class_name}_osc-6-May-2020.pickle")
        with open(data_filepath, "rb") as fp:
            mjds, passbands, mags, magerrs, photflags, zeropoints, dc_mags, dc_magerrs, magnrs, \
            sigmagnrs, isdiffposs, ras, decs, objids, redshifts, mwebvs = pickle.load(fp)

        for i, objid in enumerate(objids):
            if known_redshift and (redshifts[i] is None
                                   or np.isnan(redshifts[i])):
                print(
                    f"Skipping {objid} because redshift is unknown and known_redshift model is selected"
                )
                continue

            flux = 10.**(-0.4 * (mags[i] - zeropoints[i]))
            fluxerr = np.abs(flux * magerrs[i] * (np.log(10.) / 2.5))

            passbands[i] = np.where(
                (passbands[i] == 1) | (passbands[i] == '1'), 'g', passbands[i])
            passbands[i] = np.where(
                (passbands[i] == 2) | (passbands[i] == '2'), 'r', passbands[i])

            mjd_first_detection = min(mjds[i][photflags[i] == 4096])
            photflags[i][np.where(mjds[i] == mjd_first_detection)] = 6144

            deleteindexes = np.where((
                (passbands[i] == 3) | (passbands[i] == '3')) | (
                    (mjds[i] > mjd_first_detection) & (photflags[i] == 0))
                                     | (np.isnan(flux)))
            if deleteindexes[0].size > 0:
                print("Deleting indexes {} at mjd {} and passband {}".format(
                    deleteindexes, mjds[i][deleteindexes],
                    passbands[i][deleteindexes]))
            mjd, passband, flux, fluxerr, zeropoint, photflag = delete_indexes(
                deleteindexes, mjds[i], passbands[i], flux, fluxerr,
                zeropoints[i], photflags[i])
            peakmjd = mjd[np.argmax(flux)]

            inputlightcurve = InputLightCurve(mjd,
                                              flux,
                                              fluxerr,
                                              passband,
                                              photflag,
                                              ras[i],
                                              decs[i],
                                              objid,
                                              redshifts[i],
                                              mwebvs[i],
                                              known_redshift=known_redshift,
                                              training_set_parameters={
                                                  'class_number': class_name,
                                                  'peakmjd': peakmjd
                                              },
                                              calculate_t0=calculate_t0)
            light_curves[objid] = inputlightcurve.preprocess_light_curve()

        with open(save_lc_filepath, "wb") as fp:
            pickle.dump(light_curves, fp)

    return light_curves
def read_light_curves_from_snana_fits_files(save_fname, head_files, phot_files, passbands=('g', 'r'), known_redshift=True):
    """ Save lightcurves from SNANA HEAD AND PHOT FITS files

    Parameters
    ----------
    save_fname : str
        Filename to save hdf5 file.
    dir_name : str
        Directory path of all SNANA HEAD and PHOT files.
    passbands : tuple
        passband filters.
    known_redshift : bool
        Whether to use redshift during training.

    """

    getter = GetData()

    store = pd.HDFStore(save_fname)

    for fileidx, headfilepath in enumerate(head_files):
        print(fileidx, headfilepath)
        # Check that phot file correponds to head file
        assert phot_files[i].split('_')[-2] == head_files[i].split('_')[-2]
        header_HDU = afits.open(head_files[fileidx])
        header_data = header_HDU[1].data

        for i, head in enumerate(header_data):
            model_num = head['SIM_TYPE_INDEX']
            snid = head['SNID']
            objid = 'field_{}_base_{}'.format(model_num, snid)
            ptrobs_min = head['PTROBS_MIN']
            ptrobs_max = head['PTROBS_MAX']
            peakmag_g = head['SIM_PEAKMAG_g']
            peakmag_r = head['SIM_PEAKMAG_r']
            redshift = head['SIM_REDSHIFT_HOST']
            dlmu = head['SIM_DLMU']
            peakmjd = head['PEAKMJD']
            mwebv = head['MWEBV']
            mwebv_err = head['MWEBV_ERR']
            ra = head['RA']
            if 'DEC' in header_data.names:
                dec = head['DEC']
            else:
                dec = head['DECL']
            photoz = head['HOSTGAL_PHOTOZ']
            photozerr = head['HOSTGAL_PHOTOZ_ERR']
            print(i, len(header_data))
            phot_data = getter.get_light_curve_array(phot_files[fileidx], ptrobs_min, ptrobs_max)

            lc = getter.convert_pandas_lc_to_recarray_lc(phot_data, passbands=passbands)

            inputlightcurve = InputLightCurve(lc['mjd'], lc['flux'], lc['dflux'], lc['pb'], lc['photflag'], ra,
                                              dec, objid, redshift, mwebv, known_redshift=known_redshift,
                                              training_set_parameters={'class_number': int(model_num), 'peakmjd': peakmjd})

            # TODO: work out why some light curves fail mcmc
            try:
                savepd = inputlightcurve.preprocess_light_curve()
            except Exception as e:
                print("Failed on object", objid, e)
                continue
            store.append(objid, savepd)

    store.close()
    print("saved %s" % save_fname)
def get_custom_data(class_num, data_dir, save_dir, passbands, known_redshift,
                    nprocesses, redo):
    """
    Get data from custom data files.
    You will need to write this function with this following skeleton function.

    Parameters
    ----------
    class_num : int
        Class number. E.g. SNIa is 1. See helpers.py for lookup table.
        E.g. class_num = 1
    data_dir : str
        Directory where data is stored
        E.g. data_dir='data/ZTF_20190512/'
    save_dir : str
        Directory to save processed data
        E.g. save_dir='data/saved_light_curves/'
    passbands : tuple
        Passbands to use.
        E.g. passbands=('g', 'r')
    known_redshift : bool
        Whether to correct the light curves for cosmological time dilation using redshift.
    nprocesses : int or None
        Number of processes to use
    redo : bool
        Whether to redo reading the data and saving the processed data.


    Returns
    -------
    light_curves : dict of astropy.table.Table objects
        e.g light_curves['objid1'] =
            passband   time       flux     fluxErr   photflag
              str1   float32    float32    float32    int32
            -------- -------- ----------- ---------- --------
                   g -46.8942  -48.926975  42.277767        0
                   g -43.9352  -105.35379   72.97575        0
                   g -35.9161  -46.264206    99.9172        0
                   g -28.9377  -28.978344  42.417065        0
                   g -25.9787  109.886566   46.03949        0
                   g -15.0399    -80.2485   80.38155        0
                   g -12.0218    93.51743  113.21529        0
                   g  -6.9585   248.88364 108.606865        0
                   g  -4.0411   341.41498  47.765404        0
                   g      0.0    501.7441   45.37485     6144
                 ...      ...         ...        ...      ...
                   r  40.9147   194.32494  57.836903     4096
                   r  59.9162    67.59185   45.66463     4096
                   r  62.8976    80.85155  44.356197     4096
                   r  65.8974   28.174305   44.75049     4096
                   r  71.8966  -18.790287 108.049774     4096
                   r  74.9297  -3.1707647  125.15057     4096
                   r  77.9341 -11.0205965 125.784676     4096
                   r  80.8576   129.65466   69.99305     4096
                   r  88.8922  -14.259436  52.917866     4096
                   r 103.8734   27.178356 115.537704     4096

    """
    # If the data has already been run and processed, load it. Otherwise read it and save it
    save_lc_filepath = os.path.join(save_dir,
                                    f"lc_classnum_{class_num}.pickle")
    if os.path.exists(save_lc_filepath) and not redo:
        with open(save_lc_filepath, "rb") as fp:  # Unpickling
            light_curves = pickle.load(fp)
    else:
        light_curves = {}
        # Read in data from data_dir and get the mjd, flux, fluxerr, passband, photflag as 1D numpy arrays for
        # each light curve. Get the ra, dec, objid, redshift, mwebv, model_num, peak_mjd as floats or strings.
        # Set whether you'd like to train a model with a known redshift or not. Set known_redshift as a boolean.

        # Enter your own data-reading code here that gets the mjds, fluxes, fluxerrs, passbands, photflags,
        # ras, decs, objids, redshifts, mwebvs, model_nums, peak_mjds for all the light curves from the data_dir

        # Once you have the required data information for each light curve, pass it into InputLightCurve with
        # something like the following code:
        for i, objid in enumerate(objids):
            inputlightcurve = InputLightCurve(mjds[i],
                                              fluxes[i],
                                              fluxerrs[i],
                                              passbands[i],
                                              photflags[i],
                                              ras[i],
                                              decs[i],
                                              objids[i],
                                              redshifts[i],
                                              mwebvs[i],
                                              known_redshift=known_redshift,
                                              training_set_parameters={
                                                  'class_number':
                                                  int(class_num),
                                                  'peakmjd': peakmjds[i]
                                              })
            light_curves[objid] = inputlightcurve.preprocess_light_curve()

        # If you think that reading the data is too slow, you may want to replace the for loop above with
        # multiprocessing. See the example function in get_training_data.py if you need help doing this.

        # Next, we save it:
        with open(save_lc_filepath, "wb") as fp:  # Pickling
            pickle.dump(light_curves, fp)

    return light_curves