Esempio n. 1
0
    def classify_detections(self):
        nonedge_detections = []
        # find locations of non-edge detections
        for detection in self.detection_indices:
            t = self.valid_times[detection]
            if not self.is_edge_detection(t, self.start_edge, self.end_edge,
                                          self.left_edge, self.right_edge):
                nonedge_detections.append(detection)

        # all detections are edge detections so we flag as 'edge'
        if len(nonedge_detections) == 0:
            self.flag = 'edge'

        min_corr = min(self.correlations)
        # flag as transit if there is a stronger negative correlation than a positive one
        if self.flag == 'SL':
            if abs(min_corr) > 1.5 * self.best_correlation:
                self.flag = 'transit'

        # check centroid of detection against template to see if this is a valid detection
        candidate_detections = []
        if self.flag == 'SL':
            for detection in nonedge_detections:
                centroid_sample = mlc.offset_and_normalize(
                    self.flat_centroid[detection:detection + self.window])
                centroid_template = mlc.offset_and_normalize(
                    mlc.generate_centroid_template(1, self.window // 3))
                centroid_template2 = np.flip(centroid_template)
                #detection_sample = mlc.offset_and_normalize(flat_lcur[detection:detection+window])
                centroid_corr = scipy.signal.correlate(centroid_sample,
                                                       centroid_template,
                                                       mode='valid')[0]
                centroid_corr2 = scipy.signal.correlate(centroid_sample,
                                                        centroid_template2,
                                                        mode='valid')[0]
                if centroid_corr >= Light_Curve.centroid_threshold or centroid_corr2 >= Light_Curve.centroid_threshold:
                    self.centroid_detections.append(
                        (detection, self.best_template, None, None))
                else:
                    candidate_detections.append(detection)
            # flag as centroid if detections are centroid detections
            if len(candidate_detections) == 0:
                self.flag = 'centroid'

        # find locations of all real and flare detections
        ambiguity = .05

        for detection in candidate_detections:
            time_window = self.valid_times[detection:detection + self.window]
            detection_sample = mlc.offset_and_normalize(
                self.flat_lcur[detection:detection + self.window])
            gaussian_template = mlc.offset_and_normalize(
                mlc.generate_template(
                    1,
                    self.window // 3,
                    gaps=True,
                    detection_time=self.valid_times[detection +
                                                    self.window // 2],
                    times=time_window))
            #flare_template = mlc.generate_flare_template(1, window//3, gaps=True, detection_time=valid_times[detection+window//2], times=time_window)
            gaussian_result = scipy.signal.correlate(detection_sample,
                                                     self.best_template,
                                                     mode='valid')[0]
            flare_result = 0
            best_flare_template = None
            best_flare_time_window = None
            for i in range(-5, 6):
                flare_time_window = self.valid_times[detection + i:detection +
                                                     self.window + i]
                flare_template = mlc.offset_and_normalize(
                    mlc.generate_flare_template(
                        1,
                        self.window // 3,
                        gaps=True,
                        detection_time=self.valid_times[detection +
                                                        self.window // 2 + i],
                        times=flare_time_window))
                flare_window = mlc.offset_and_normalize(
                    self.flat_lcur[detection + i:detection + self.window + i])
                flare_correlation = scipy.signal.correlate(flare_window,
                                                           flare_template,
                                                           mode='valid')[0]
                if flare_correlation > flare_result:
                    flare_result = flare_correlation
                    best_flare_template = flare_template
                    best_flare_time_window = flare_time_window
            # compare correlation to gaussian and flare templates
            if flare_result > gaussian_result:
                # check for ambiguity between the gaussian and flare results
                if abs(flare_result - gaussian_result) < ambiguity:
                    self.ambiguous_flare.append(
                        (detection, gaussian_template, best_flare_template,
                         best_flare_time_window))
                else:
                    self.flare_detections.append(
                        (detection, gaussian_template, best_flare_template,
                         best_flare_time_window))
            else:
                if abs(gaussian_result - flare_result) < ambiguity:
                    self.ambiguous_real.append(
                        (detection, gaussian_template, best_flare_template,
                         best_flare_time_window))
                else:
                    self.real_detections.append(
                        (detection, gaussian_template, best_flare_template,
                         best_flare_time_window))

        # all detections are not SL detections so we flag as 'flare', 'ambiguousFlare', 'ambiguousSL'
        if self.flag == 'SL' and len(self.real_detections) == 0:
            if len(self.ambiguous_real) > 0:
                self.flag = 'ambiguousSL'
            elif len(self.ambiguous_flare) > 0:
                self.flag = 'ambiguousFlare'
            else:
                self.flag = 'flare'

        # check if the detections exceed the higher threshold
        if self.flag == 'SL':
            for detection in self.real_detections:
                detection_corr = self.correlations[detection[0]]
                if detection_corr > Light_Curve.beta:
                    self.flag = 'highSL'
                    break
Esempio n. 2
0
def mf_pipeline(directory,
                result_foldername,
                mock=False,
                num_simulations=None):
    '''
    Pipeline runs a match filter on light curves and finds if the light curve 
    matches a predetermined template.
    
    directory: string, directory where light curve .fits files are located
    result_foldername: string, location of resulting plots, data 
    mock: bool, true if using mock data, False if using real light curve
    num_simulations: int, number of mock light curves to generate (if mock)
        
    returns: dict, results for each light curve (file)
    '''
    kernel = 99
    # generate templates varying width from 30mins to 2hrs (15 bins to 60 bins)
    templates = []
    widths = [5 * j + 15 for j in range(10)] + [10 * j + 70 for j in range(4)]
    for width in widths:
        templates.append(
            mlc.offset_and_normalize(mlc.generate_template(1, width)))

    if mock:
        num_bins = 10
        counter = 0

        # create folder for resulting plots
        if not os.path.exists(result_foldername):
            os.mkdir(result_foldername)

        # results array for completeness analysis of each variable
        i_actual = [[] for _ in range(num_bins)]
        i_predicted = [[] for _ in range(num_bins)]
        inclinations = [[] for _ in range(num_bins)]

        P_actual = [[] for _ in range(num_bins)]
        P_predicted = [[] for _ in range(num_bins)]
        periods = [[] for _ in range(num_bins)]

        mbh_actual = [[] for _ in range(num_bins)]
        mbh_predicted = [[] for _ in range(num_bins)]
        mbhs = [[] for _ in range(num_bins)]

        ms_actual = [[] for _ in range(num_bins)]
        ms_predicted = [[] for _ in range(num_bins)]
        mss = [[] for _ in range(num_bins)]

        noise_actual = [[] for _ in range(num_bins)]
        noise_predicted = [[] for _ in range(num_bins)]
        noises = [[] for _ in range(num_bins)]

        threshold_actual = [[] for _ in range(num_bins)]
        threshold_predicted = [[] for _ in range(num_bins)]
        thresholds = [[] for _ in range(num_bins)]

        total_actual = []
        total_predicted = []

        # generate bins
        cosi_bins = [j / num_bins * .01 for j in range(num_bins)]
        P_bins = [
            np.e**(j * (np.log(27) - np.log(1)) / num_bins)
            for j in range(num_bins)
        ]
        mbh_bins = [15 * j / num_bins + 5 for j in range(num_bins)]
        ms_bins = [j / num_bins + 0.5 for j in range(num_bins)]
        noise_bins = np.array(
            [0, 50, 100, 150, 250, 500, 750, 1000, 1500, 2000])
        altered_noise_bins = noise_bins * 10**(-6) + 10**(-9)
        threshold_bins = np.array([.01 * i for i in range(10)])

        start = time.time()
        for z in range(1, num_simulations + 1):

            # randomly generate relevant parameters from known priors
            P = mlc.P_rng()
            M_BH = mlc.mbh_rng()
            M_S = mlc.ms_rng()
            i = mlc.i_rng()
            cosi = math.cos(i)
            noise = np.random.choice(altered_noise_bins)
            threshold = np.random.random() * .1

            # generate positive/negative signal at random
            pos_signal = np.random.choice([True, False])
            if pos_signal:
                lcur, EV, Beam, SL = mlc.generate_light_curve(P,
                                                              i,
                                                              M_BH,
                                                              M_S=M_S,
                                                              std=noise)
            else:
                lcur = mlc.generate_flat_signal(noise)

            # subtract median filter from signal and normalize for correlation analysis
            flat_lcur = lcur - scipy.signal.medfilt(lcur, kernel)
            flat_lcur = mlc.offset_and_normalize(flat_lcur)

            initial = True
            best_result = None
            best_corr = 0
            best_correlations = None
            best_template = None

            # perform cross-correlation for all template widths
            for template in templates:
                correlations = scipy.signal.correlate(flat_lcur, template)
                highest_corr = max(correlations)
                result = highest_corr > threshold

                # choose best correlation result so far (break on positive signal detection)
                if initial or highest_corr > best_corr:
                    best_corr = highest_corr
                    best_result = result
                    best_correlations = correlations
                    best_template = template
                    initial = False

                if result:
                    break

            # plot some light curves and their correlations at random based on prediction
            if (best_result != pos_signal and np.random.random() > .98) or (
                    pos_signal and np.random.random() > .99):
                lc_folder = "./{}/lc{}".format(result_foldername, counter)
                if not os.path.exists(lc_folder):
                    os.mkdir(lc_folder)
                mlc.plot_lc(lcur,
                            P,
                            M_BH,
                            i,
                            M_S,
                            filename="{}/lcur{}.pdf".format(
                                lc_folder, counter),
                            EV=EV if pos_signal else None,
                            Beam=Beam if pos_signal else None,
                            SL=SL if pos_signal else None)
                mlc.plot_lc(flat_lcur,
                            P,
                            M_BH,
                            i,
                            M_S,
                            filename="{}/flat_lcur{}.pdf".format(
                                lc_folder, counter))
                mlc.plot_corr(best_correlations, P, M_BH, i, M_S, threshold,
                              noise,
                              "{}/corr{}.pdf".format(lc_folder, counter))
                counter += 1

            # bin result depending on parameter values
            if pos_signal or best_result:
                i_binned, P_binned, mbh_binned, ms_binned = False, False, False, False
                for k in range(num_bins - 1, -1, -1):
                    if not i_binned and cosi >= cosi_bins[k]:
                        inclinations[k].append(i)
                        i_actual[k].append(pos_signal)
                        i_predicted[k].append(result)
                    if not P_binned and P >= P_bins[k]:
                        periods[k].append(i)
                        P_actual[k].append(pos_signal)
                        P_predicted[k].append(result)
                    if not mbh_binned and M_BH >= mbh_bins[k]:
                        mbhs[k].append(i)
                        mbh_actual[k].append(pos_signal)
                        mbh_predicted[k].append(result)
                    if not ms_binned and M_S >= ms_bins[k]:
                        mss[k].append(i)
                        ms_actual[k].append(pos_signal)
                        ms_predicted[k].append(result)
                    if all([i_binned, P_binned, mbh_binned, ms_binned]):
                        break

            threshold_binned, noise_binned = False, False
            for k in range(num_bins - 1, -1, -1):
                if not threshold_binned and threshold >= threshold_bins[k]:
                    thresholds[k].append(i)
                    threshold_actual[k].append(pos_signal)
                    threshold_predicted[k].append(result)
                if not noise_binned and noise == altered_noise_bins[k]:
                    noises[k].append(i)
                    noise_actual[k].append(pos_signal)
                    noise_predicted[k].append(result)
                if noise_binned and threshold_binned:
                    break

            total_actual.append(pos_signal)
            total_predicted.append(best_result)

            # perform completeness analysis
            if z % 1000 == 0 and z != 0:
                print('{} simulations complete'.format(z))
                prefix = "./{}/".format(result_foldername)
                plot_completeness(r'$\alpha$',
                                  threshold_bins,
                                  threshold_actual,
                                  threshold_predicted,
                                  num_bins,
                                  prefix + 'alpha',
                                  scale=True)
                plot_completeness('Noise [ppm]',
                                  noise_bins,
                                  noise_actual,
                                  noise_predicted,
                                  num_bins,
                                  prefix + 'noise',
                                  scale=True)
                plot_completeness('cosi',
                                  cosi_bins,
                                  i_actual,
                                  i_predicted,
                                  num_bins,
                                  prefix + 'cosi',
                                  scale=True)
                plot_completeness('Period [days]', P_bins, P_actual,
                                  P_predicted, num_bins, prefix + 'period')
                plot_completeness(r'$M_{BH} [M_{\odot}]$', mbh_bins,
                                  mbh_actual, mbh_predicted, num_bins,
                                  prefix + 'mbh')
                plot_completeness(r'$M_{\star} [M_{\odot}]$', ms_bins,
                                  ms_actual, ms_predicted, num_bins,
                                  prefix + 'ms')

        end = time.time()
        print("{} minutes".format(round((end - start) / 60, 2)))

        return total_actual, total_predicted

    else:
        counter = 1
        num_files = 0
        flags = [
            'SL', 'edge', 'transit', 'flare', 'highSL', 'ambiguousSL',
            'ambiguousFlare', 'centroid'
        ]
        results = {flag: set() for flag in flags}
        if not os.path.exists(directory + result_foldername):
            os.mkdir(directory + result_foldername)
            for flag in flags:
                os.mkdir(directory + result_foldername + '/' + flag)

        for filename in os.listdir(directory):
            num_files += 1
            if num_files % 500 == 0:
                print('{} files completed'.format(num_files))

            if filename.endswith(".fits"):
                fits_file = directory + filename
                try:
                    with fits.open(fits_file, mode="readonly",
                                   memmap=False) as hdulist:
                        tess_bjds = hdulist[1].data['TIME']
                        #sap_fluxes = hdulist[1].data['SAP_FLUX']
                        pdcsap_fluxes = hdulist[1].data['PDCSAP_FLUX']
                        centroid = hdulist[1].data['MOM_CENTR1']
                except:
                    print('Could not open file: {}'.format(filename))
                    continue

                # Create a light curve object with the file data and run pipeline on it
                lcur_object = Light_Curve(tess_bjds, pdcsap_fluxes, centroid,
                                          templates)
                lcur_object.run_pipeline()

                if lcur_object.result:
                    results[flag].add(filename)
                    folder = '{}{}/{}'.format(directory, result_foldername,
                                              flag)
                    pdf = pdfs.PdfPages('{}/light_curve{}.pdf'.format(
                        folder, counter))
                    counter += 1

                    # create plot for full light curve data
                    fig, ax = plt.subplots(5, sharex=True, figsize=(6, 10))
                    #fig.suptitle(filename)

                    # plot light curve
                    ax[0].set_title(filename)
                    ax[0].plot(lcur_object.valid_times,
                               lcur_object.valid_fluxes,
                               'ko',
                               rasterized=True,
                               markersize=1)
                    ax[0].set_ylabel('PDCSAP Flux')

                    # plot flat light curve
                    ax[1].plot(lcur_object.valid_times,
                               lcur_object.flat_lcur,
                               'ko',
                               rasterized=True,
                               markersize=1)
                    ax[1].set_ylabel('Relative Flux')

                    # plot correlation
                    ax[2].plot(lcur_object.valid_times[:len(lcur_object.
                                                            correlations)],
                               lcur_object.correlations,
                               'ko',
                               rasterized=True,
                               markersize=1)
                    ax[2].plot([
                        lcur_object.valid_times[0],
                        lcur_object.valid_times[len(lcur_object.correlations) -
                                                1]
                    ], [Light_Curve.alpha, Light_Curve.alpha],
                               '--',
                               color='orange',
                               rasterized=True)
                    if lcur_object.flag == 'highSL':
                        ax[2].plot([
                            lcur_object.valid_times[0], lcur_object.
                            valid_times[len(lcur_object.correlations) - 1]
                        ], [Light_Curve.beta, Light_Curve.beta],
                                   'b--',
                                   rasterized=True)
                    ax[2].set_ylabel('Correlation')

                    # plot centroid
                    ax[3].plot(lcur_object.valid_times,
                               lcur_object.valid_centroid,
                               'ko',
                               rasterized=True,
                               markersize=1)
                    ax[3].set_ylabel('Centroid')

                    # plot flat centroid
                    ax[4].plot(lcur_object.valid_times,
                               lcur_object.flat_centroid,
                               'ko',
                               rasterized=True,
                               markersize=1)
                    ax[4].set_ylabel('Relative Centroid')
                    ax[4].set_xlabel('Time [days]')

                    plt.tight_layout()
                    pdf.savefig(fig)
                    plt.close()

                    # zoomed in plot on location of each real positive detection
                    for detection in lcur_object.real_detections:
                        plot_detection(detection[0], lcur_object.window, 'SL',
                                       pdf, detection[1], detection[2],
                                       detection[3], lcur_object.valid_times,
                                       lcur_object.valid_fluxes,
                                       lcur_object.flat_lcur,
                                       lcur_object.correlations,
                                       lcur_object.valid_centroid,
                                       lcur_object.flat_centroid)

                    # zoomed in plot on location of each ambiguous positive detection
                    for detection in lcur_object.ambiguous_real:
                        plot_detection(
                            detection[0], lcur_object.window, 'Ambiguous SL',
                            pdf, detection[1], detection[2], detection[3],
                            lcur_object.valid_times, lcur_object.valid_fluxes,
                            lcur_object.flat_lcur, lcur_object.correlations,
                            lcur_object.valid_centroid,
                            lcur_object.flat_centroid)

                    # zoomed in plot on location of each ambiguous flare detection
                    for detection in lcur_object.ambiguous_flare:
                        plot_detection(detection[0], lcur_object.window,
                                       'Ambiguous Flare', pdf, detection[1],
                                       detection[2], detection[3],
                                       lcur_object.valid_times,
                                       lcur_object.valid_fluxes,
                                       lcur_object.flat_lcur,
                                       lcur_object.correlations,
                                       lcur_object.valid_centroid,
                                       lcur_object.flat_centroid)

                    # zoomed in plot on location of each flare detection
                    for detection in lcur_object.flare_detections:
                        plot_detection(
                            detection[0], lcur_object.window, 'Flare', pdf,
                            detection[1], detection[2], detection[3],
                            lcur_object.valid_times, lcur_object.valid_fluxes,
                            lcur_object.flat_lcur, lcur_object.correlations,
                            lcur_object.valid_centroid,
                            lcur_object.flat_centroid)
                    # zoomed in plot on location of each centroid detection
                    for detection in lcur_object.centroid_detections:
                        plot_detection(detection[0], lcur_object.window,
                                       'Centroid', pdf, detection[1],
                                       detection[2], detection[3],
                                       lcur_object.valid_times,
                                       lcur_object.valid_fluxes,
                                       lcur_object.flat_lcur,
                                       lcur_object.best_correlations,
                                       lcur_object.valid_centroid,
                                       lcur_object.flat_centroid)

                    pdf.close()

        # make a pie chart of the distribution of light curves in each bin
        pie_slices = [len(results[flag]) for flag in flags]
        plt.figure()
        plt.title('Distribution of Positive Detections')
        plt.pie(pie_slices, labels=flags)
        plt.savefig(directory + result_foldername + "/distribution.pdf")
        plt.close()
Esempio n. 3
0
               rasterized=True,
               markersize=2)
    ax[4].set_ylabel('Rlative Centroid')

    plt.tight_layout()
    pdf.savefig(fig)
    plt.close()


foldername = input("Input name of new results folder: ")
os.mkdir(foldername)

templates = []
widths = [5 * j + 15 for j in range(10)] + [10 * j + 70 for j in range(4)]
for width in widths:
    templates.append(mlc.offset_and_normalize(mlc.generate_template(1, width)))

for sector in range(15, 29):
    flags = [
        'SL', 'edge', 'transit', 'flare', 'highSL', 'ambiguousSL',
        'ambiguousFlare', 'centroid'
    ]
    results = {flag: set() for flag in flags}
    if not os.path.exists("{}/Sector{}".format(foldername, sector)):
        os.mkdir("{}/Sector{}".format(foldername, sector))
        os.mkdir("{}/Sector{}/SL_Files".format(foldername, sector))
        for flag in flags:
            os.mkdir("{}/Sector{}/{}".format(foldername, sector, flag))

    download_file = "tesscurl_sector_{}_lc.sh".format(sector)
    with open(download_file) as curl_file: