Example #1
0
def classify_lightcurves():
    dataFileNamesR = []
    dataFileNamesG = []
    fileDirectoryR = lightcurve_path + 'Rfilter/'
    fileDirectoryG = lightcurve_path + 'Gfilter/'
    fileDirectoryClassifiedR = lightcurve_path + 'classified/Rfilter/'
    fileDirectoryClassifiedG = lightcurve_path + 'classified/Gfilter/'

    for f in os.listdir(fileDirectoryR):
        if f.endswith('.dat'):
            dataFileNamesR.append(f)

    for f in os.listdir(fileDirectoryG):
        if f.endswith('.dat'):
            dataFileNamesG.append(f)

    dataFileNamesR = natsorted(dataFileNamesR)
    dataFileNamesG = natsorted(dataFileNamesG)
    rf, pca = models.create_models(features_path + 'all_features.txt',
                                   features_path + 'pca_features.txt')
    class_results = []

    for filename in dataFileNamesR:
        data = np.loadtxt(fileDirectoryR + filename, usecols=np.arange(0, 3))
        mjd = [float(i) for i in data[:, 0]]
        mag = [float(i) for i in data[:, 1]]
        magerr = [float(i) for i in data[:, 2]]

        sosort = np.array([mjd, mag, magerr]).T
        sosort = sosort[sosort[:, 0].argsort(), ]
        mjd = sosort[:, 0]
        mag = sosort[:, 1]
        magerr = sosort[:, 2] + 0.0001

        prediction, ml_pred = microlensing_classifier.predict(
            mag, magerr, rf, pca)[0:2]
        #print('filename: ', filename, 'prediction: ', prediction, 'ml_pred = ', ml_pred)
        result = [filename, prediction, ml_pred]
        class_results.append(result)
        ml_pred = str(ml_pred)
        ml_pred = ml_pred.replace("[", "")
        ml_pred = ml_pred.replace("]", "")
        location = fileDirectoryClassifiedR + ml_pred + str(filename)
        np.savetxt(location, data, fmt='%s')

    for filename in dataFileNamesG:
        data = np.loadtxt(fileDirectoryG + filename, usecols=np.arange(0, 3))
        mjd = [float(i) for i in data[:, 0]]
        mag = [float(i) for i in data[:, 1]]
        magerr = [float(i) for i in data[:, 2]]

        sosort = np.array([mjd, mag, magerr]).T
        sosort = sosort[sosort[:, 0].argsort(), ]
        mjd = sosort[:, 0]
        mag = sosort[:, 1]
        magerr = sosort[:, 2]

        try:
            prediction, ml_pred = microlensing_classifier.predict(
                mag, magerr, rf, pca)[0:2]

        except (ValueError):
            prediction, ml_pred = "NA", "NA"

        #print('filename: ', filename, 'prediction: ', prediction, 'ml_pred = ', ml_pred)
        result = [filename, prediction, ml_pred]
        class_results.append(result)
        ml_pred = str(ml_pred)
        ml_pred = ml_pred.replace("[", "")
        ml_pred = ml_pred.replace("]", "")
        location = fileDirectoryClassifiedG + ml_pred + str(filename)
        np.savetxt(location, data, fmt='%s')

        targetname = str(filename).replace("R.dat", "")
        targetname = targetname.replace("G.dat", "")
        print(targetname)
        #target = Target.objects.get(name=targetname)
        #target.save(extras={'Microlensing probability': {'probability': ml_pred, 'timestamp': datetime.datetime.now()}})

    return class_results
def mulens(
        fid, magpsf, sigmapsf, magnr, sigmagnr,
        magzpsci, isdiffpos, ndethist):
    """ Returns the predicted class (among microlensing, variable star,
    cataclysmic event, and constant event) & probability of an alert to be
    a microlensing event in each band using a Random Forest Classifier.

    Parameters
    ----------
    fid: Spark DataFrame Column
        Filter IDs (int)
    magpsf, sigmapsf: Spark DataFrame Columns
        Magnitude from PSF-fit photometry, and 1-sigma error
    magnr, sigmagnr: Spark DataFrame Columns
        Magnitude of nearest source in reference image PSF-catalog
        within 30 arcsec and 1-sigma error
    magzpsci: Spark DataFrame Column
        Magnitude zero point for photometry estimates
    isdiffpos: Spark DataFrame Column
        t => candidate is from positive (sci minus ref) subtraction
        f => candidate is from negative (ref minus sci) subtraction

    Returns
    ----------
    out: list
        Returns the mean of the probabilities (one probability per band) if the
        event was considered as microlensing in both bands, otherwise 0.0.

    Examples
    ---------
    >>> from fink_science.utilities import concat_col
    >>> from pyspark.sql import functions as F

    >>> df = spark.read.load(ztf_alert_sample)

    # Required alert columns
    >>> what = [
    ...    'fid', 'magpsf', 'sigmapsf',
    ...    'magnr', 'sigmagnr', 'magzpsci', 'isdiffpos']

    # Use for creating temp name
    >>> prefix = 'c'
    >>> what_prefix = [prefix + i for i in what]

    # Append temp columns with historical + current measurements
    >>> for colname in what:
    ...    df = concat_col(df, colname, prefix=prefix)

    >>> args = [F.col(i) for i in what_prefix]
    >>> args += ['candidate.ndethist']
    >>> df = df.withColumn('new_mulens', mulens(*args))

    # Drop temp columns
    >>> df = df.drop(*what_prefix)

    >>> df.filter(df['new_mulens'] > 0.0).count()
    0
    """
    warnings.filterwarnings('ignore')

    # broadcast models
    curdir = os.path.dirname(os.path.abspath(__file__))
    model_path = curdir + '/data/models/'
    rf, pca = load_external_model(model_path)

    valid_index = np.arange(len(magpsf), dtype=int)

    # At most 100 measurements in each band
    mask = (ndethist.astype(int) < 100)

    # At least 10 measurements in each band
    mask *= magpsf.apply(lambda x: np.sum(np.array(x) == np.array(x))) >= 20

    to_return = np.zeros(len(magpsf), dtype=float)

    for index in valid_index[mask.values]:
        # Select only valid measurements (not upper limits)
        maskNotNone = np.array(magpsf.values[index]) == np.array(magpsf.values[index])

        classes = []
        probs = []
        for filt in [1, 2]:
            maskFilter = np.array(fid.values[index]) == filt
            m = maskNotNone * maskFilter

            # Reject if less than 10 measurements
            if np.sum(m) < 10:
                classes.append('')
                continue

            # Compute DC mag
            mag, err = np.array([
                dc_mag(i[0], i[1], i[2], i[3], i[4], i[5], i[6])
                for i in zip(
                    np.array(fid.values[index])[m],
                    np.array(magpsf.values[index])[m],
                    np.array(sigmapsf.values[index])[m],
                    np.array(magnr.values[index])[m],
                    np.array(sigmagnr.values[index])[m],
                    np.array(magzpsci.values[index])[m],
                    np.array(isdiffpos.values[index])[m])
            ]).T

            # Run the classifier
            output = microlensing_classifier.predict(mag, err, rf, pca)

            # Update the results
            # Beware, in the branch FINK the order has changed
            # classification,p_cons,p_CV,p_ML,p_var = microlensing_classifier.predict()
            classes.append(str(output[0]))
            probs.append(float(output[3][0]))

        # Append mean of classification if ML favoured, otherwise 0
        if np.all(np.array(classes) == 'ML'):
            to_return[index] = np.mean(probs)
        else:
            to_return[index] = 0.0

    return pd.Series(to_return)
Example #3
0
def mulens(fid, magpsf, sigmapsf, magnr, sigmagnr, magzpsci, isdiffpos, rf,
           pca):
    """ Returns the predicted class (among microlensing, variable star,
    cataclysmic event, and constant event) & probability of an alert to be
    a microlensing event in each band using a Random Forest Classifier.

    Parameters
    ----------
    fid: Spark DataFrame Column
        Filter IDs (int)
    magpsf, sigmapsf: Spark DataFrame Columns
        Magnitude from PSF-fit photometry, and 1-sigma error
    magnr, sigmagnr: Spark DataFrame Columns
        Magnitude of nearest source in reference image PSF-catalog
        within 30 arcsec and 1-sigma error
    magzpsci: Spark DataFrame Column
        Magnitude zero point for photometry estimates
    isdiffpos: Spark DataFrame Column
        t => candidate is from positive (sci minus ref) subtraction
        f => candidate is from negative (ref minus sci) subtraction
    rf: RandomForestClassifier
        sklearn.ensemble._forest.RandomForestClassifier
    pca: PCA
        sklearn.decomposition._pca.PCA

    Returns
    ----------
    out: list
        Returns the class (string) and microlensing score (double) ordered as
        [class_band_1, ml_score_band1, class_band_2, ml_score_band2]

    Examples
    ---------
    >>> from fink_science.utilities import concat_col
    >>> from pyspark.sql import functions as F

    # wrapper to pass broadcasted values
    >>> def mulens_wrapper(fid, magpsf, sigmapsf, magnr, sigmagnr, magzpsci, isdiffpos):
    ...     return mulens(fid, magpsf, sigmapsf, magnr, sigmagnr, magzpsci, isdiffpos, rfbcast.value, pcabcast.value)

    >>> df = spark.read.load(ztf_alert_sample)

    >>> schema = load_mulens_schema_twobands()

    # Required alert columns
    >>> what = [
    ...    'fid', 'magpsf', 'sigmapsf',
    ...    'magnr', 'sigmagnr', 'magzpsci', 'isdiffpos']

    # Use for creating temp name
    >>> prefix = 'c'
    >>> what_prefix = [prefix + i for i in what]

    # Append temp columns with historical + current measurements
    >>> for colname in what:
    ...    df = concat_col(df, colname, prefix=prefix)

    >>> curdir = os.path.dirname(os.path.abspath(__file__))
    >>> model_path = curdir + '/../data/models/'
    >>> rf, pca = load_external_model(model_path)
    >>> rfbcast = spark.sparkContext.broadcast(rf)
    >>> pcabcast = spark.sparkContext.broadcast(pca)

    >>> t = udf(mulens_wrapper, schema)
    >>> args = [col(i) for i in what_prefix]
    >>> df_mulens = df.withColumn('mulens', t(*args))

    # Drop temp columns
    >>> df_mulens = df_mulens.drop(*what_prefix)

    >>> df_mulens.agg({"mulens.ml_score_1": "min"}).collect()[0][0]
    0.0

    >>> df_mulens.agg({"mulens.ml_score_1": "max"}).collect()[0][0] < 1.0
    True
    """
    warnings.filterwarnings('ignore')

    # Select only valid measurements (not upper limits)
    maskNotNone = np.array(magpsf) != None

    out = []
    for filt in [1, 2]:
        maskFilter = np.array(fid) == filt
        m = maskNotNone * maskFilter

        # Reject if less than 10 measurements
        if np.sum(m) < 10:
            out.extend(['', 0.0])
            continue

        # Compute DC mag
        mag, err = np.array([
            dc_mag(i[0], i[1], i[2], i[3], i[4], i[5], i[6]) for i in zip(
                np.array(fid)[m],
                np.array(magpsf)[m],
                np.array(sigmapsf)[m],
                np.array(magnr)[m],
                np.array(sigmagnr)[m],
                np.array(magzpsci)[m],
                np.array(isdiffpos)[m])
        ]).T

        # Run the classifier
        output = microlensing_classifier.predict(mag, err, rf, pca)

        # Update the results
        out.extend([str(output[0]), float(output[1][0])])

    return out
Example #4
0
 def test_probability_prediction(value):
     pred = microlensing_classifier.predict(mag, magerr, rf, pca)[1]
     value.assertTrue(
         pred >= 0.4 and pred <= 0.6,
         "Classifier failed, probability prediction not within range.")
Example #5
0
 def test_predict(value):
     value.assertEqual(
         microlensing_classifier.predict(mag, magerr, rf, pca)[0], 'ML',
         "Classifier failed, predicted class is not correct.")
Example #6
0
These lc should have be some ML events, including OB190011
for lc in [207194,132119,177748,177461,78283,121424,174610,215315]:



#randi = np.arange(30000,200000,1)
for lc in randi:

    lc = int(lc)
    print(lc)
    try:
        time = hdf_files['dataset_photometry'][lc][:,9]
        mag = hdf_files['dataset_photometry'][lc][:,11]
        emag = hdf_files['dataset_photometry'][lc][:,12]
        back = hdf_files['dataset_photometry'][lc][:,-2]
        ppscale = hdf_files['dataset_photometry'][lc][:,-4]
        
        mask = (time>1) & (mag>1) & (np.abs(ppscale-exptime[ind_ref]/exptime)<0.2) & (np.abs(back)<250)
        order = time[mask].argsort()
        if np.median(mag[mask])<30:
            classification= microlensing_classifier.predict(time[mask][order],mag[mask][order],emag[mask][order], model)
            if float(classification[3][1])>0.6:
                plt.scatter(time[mask],mag[mask])
                plt.gca().invert_yaxis()
                plt.show()
                import pdb; pdb.set_trace()
    except:
        pass
import pdb; pdb.set_trace()