def classify_detections(self): nonedge_detections = [] # find locations of non-edge detections for detection in self.detection_indices: t = self.valid_times[detection] if not self.is_edge_detection(t, self.start_edge, self.end_edge, self.left_edge, self.right_edge): nonedge_detections.append(detection) # all detections are edge detections so we flag as 'edge' if len(nonedge_detections) == 0: self.flag = 'edge' min_corr = min(self.correlations) # flag as transit if there is a stronger negative correlation than a positive one if self.flag == 'SL': if abs(min_corr) > 1.5 * self.best_correlation: self.flag = 'transit' # check centroid of detection against template to see if this is a valid detection candidate_detections = [] if self.flag == 'SL': for detection in nonedge_detections: centroid_sample = mlc.offset_and_normalize( self.flat_centroid[detection:detection + self.window]) centroid_template = mlc.offset_and_normalize( mlc.generate_centroid_template(1, self.window // 3)) centroid_template2 = np.flip(centroid_template) #detection_sample = mlc.offset_and_normalize(flat_lcur[detection:detection+window]) centroid_corr = scipy.signal.correlate(centroid_sample, centroid_template, mode='valid')[0] centroid_corr2 = scipy.signal.correlate(centroid_sample, centroid_template2, mode='valid')[0] if centroid_corr >= Light_Curve.centroid_threshold or centroid_corr2 >= Light_Curve.centroid_threshold: self.centroid_detections.append( (detection, self.best_template, None, None)) else: candidate_detections.append(detection) # flag as centroid if detections are centroid detections if len(candidate_detections) == 0: self.flag = 'centroid' # find locations of all real and flare detections ambiguity = .05 for detection in candidate_detections: time_window = self.valid_times[detection:detection + self.window] detection_sample = mlc.offset_and_normalize( self.flat_lcur[detection:detection + self.window]) gaussian_template = mlc.offset_and_normalize( mlc.generate_template( 1, self.window // 3, gaps=True, detection_time=self.valid_times[detection + self.window // 2], times=time_window)) #flare_template = mlc.generate_flare_template(1, window//3, gaps=True, detection_time=valid_times[detection+window//2], times=time_window) gaussian_result = scipy.signal.correlate(detection_sample, self.best_template, mode='valid')[0] flare_result = 0 best_flare_template = None best_flare_time_window = None for i in range(-5, 6): flare_time_window = self.valid_times[detection + i:detection + self.window + i] flare_template = mlc.offset_and_normalize( mlc.generate_flare_template( 1, self.window // 3, gaps=True, detection_time=self.valid_times[detection + self.window // 2 + i], times=flare_time_window)) flare_window = mlc.offset_and_normalize( self.flat_lcur[detection + i:detection + self.window + i]) flare_correlation = scipy.signal.correlate(flare_window, flare_template, mode='valid')[0] if flare_correlation > flare_result: flare_result = flare_correlation best_flare_template = flare_template best_flare_time_window = flare_time_window # compare correlation to gaussian and flare templates if flare_result > gaussian_result: # check for ambiguity between the gaussian and flare results if abs(flare_result - gaussian_result) < ambiguity: self.ambiguous_flare.append( (detection, gaussian_template, best_flare_template, best_flare_time_window)) else: self.flare_detections.append( (detection, gaussian_template, best_flare_template, best_flare_time_window)) else: if abs(gaussian_result - flare_result) < ambiguity: self.ambiguous_real.append( (detection, gaussian_template, best_flare_template, best_flare_time_window)) else: self.real_detections.append( (detection, gaussian_template, best_flare_template, best_flare_time_window)) # all detections are not SL detections so we flag as 'flare', 'ambiguousFlare', 'ambiguousSL' if self.flag == 'SL' and len(self.real_detections) == 0: if len(self.ambiguous_real) > 0: self.flag = 'ambiguousSL' elif len(self.ambiguous_flare) > 0: self.flag = 'ambiguousFlare' else: self.flag = 'flare' # check if the detections exceed the higher threshold if self.flag == 'SL': for detection in self.real_detections: detection_corr = self.correlations[detection[0]] if detection_corr > Light_Curve.beta: self.flag = 'highSL' break
def mf_pipeline(directory, result_foldername, mock=False, num_simulations=None): ''' Pipeline runs a match filter on light curves and finds if the light curve matches a predetermined template. directory: string, directory where light curve .fits files are located result_foldername: string, location of resulting plots, data mock: bool, true if using mock data, False if using real light curve num_simulations: int, number of mock light curves to generate (if mock) returns: dict, results for each light curve (file) ''' kernel = 99 # generate templates varying width from 30mins to 2hrs (15 bins to 60 bins) templates = [] widths = [5 * j + 15 for j in range(10)] + [10 * j + 70 for j in range(4)] for width in widths: templates.append( mlc.offset_and_normalize(mlc.generate_template(1, width))) if mock: num_bins = 10 counter = 0 # create folder for resulting plots if not os.path.exists(result_foldername): os.mkdir(result_foldername) # results array for completeness analysis of each variable i_actual = [[] for _ in range(num_bins)] i_predicted = [[] for _ in range(num_bins)] inclinations = [[] for _ in range(num_bins)] P_actual = [[] for _ in range(num_bins)] P_predicted = [[] for _ in range(num_bins)] periods = [[] for _ in range(num_bins)] mbh_actual = [[] for _ in range(num_bins)] mbh_predicted = [[] for _ in range(num_bins)] mbhs = [[] for _ in range(num_bins)] ms_actual = [[] for _ in range(num_bins)] ms_predicted = [[] for _ in range(num_bins)] mss = [[] for _ in range(num_bins)] noise_actual = [[] for _ in range(num_bins)] noise_predicted = [[] for _ in range(num_bins)] noises = [[] for _ in range(num_bins)] threshold_actual = [[] for _ in range(num_bins)] threshold_predicted = [[] for _ in range(num_bins)] thresholds = [[] for _ in range(num_bins)] total_actual = [] total_predicted = [] # generate bins cosi_bins = [j / num_bins * .01 for j in range(num_bins)] P_bins = [ np.e**(j * (np.log(27) - np.log(1)) / num_bins) for j in range(num_bins) ] mbh_bins = [15 * j / num_bins + 5 for j in range(num_bins)] ms_bins = [j / num_bins + 0.5 for j in range(num_bins)] noise_bins = np.array( [0, 50, 100, 150, 250, 500, 750, 1000, 1500, 2000]) altered_noise_bins = noise_bins * 10**(-6) + 10**(-9) threshold_bins = np.array([.01 * i for i in range(10)]) start = time.time() for z in range(1, num_simulations + 1): # randomly generate relevant parameters from known priors P = mlc.P_rng() M_BH = mlc.mbh_rng() M_S = mlc.ms_rng() i = mlc.i_rng() cosi = math.cos(i) noise = np.random.choice(altered_noise_bins) threshold = np.random.random() * .1 # generate positive/negative signal at random pos_signal = np.random.choice([True, False]) if pos_signal: lcur, EV, Beam, SL = mlc.generate_light_curve(P, i, M_BH, M_S=M_S, std=noise) else: lcur = mlc.generate_flat_signal(noise) # subtract median filter from signal and normalize for correlation analysis flat_lcur = lcur - scipy.signal.medfilt(lcur, kernel) flat_lcur = mlc.offset_and_normalize(flat_lcur) initial = True best_result = None best_corr = 0 best_correlations = None best_template = None # perform cross-correlation for all template widths for template in templates: correlations = scipy.signal.correlate(flat_lcur, template) highest_corr = max(correlations) result = highest_corr > threshold # choose best correlation result so far (break on positive signal detection) if initial or highest_corr > best_corr: best_corr = highest_corr best_result = result best_correlations = correlations best_template = template initial = False if result: break # plot some light curves and their correlations at random based on prediction if (best_result != pos_signal and np.random.random() > .98) or ( pos_signal and np.random.random() > .99): lc_folder = "./{}/lc{}".format(result_foldername, counter) if not os.path.exists(lc_folder): os.mkdir(lc_folder) mlc.plot_lc(lcur, P, M_BH, i, M_S, filename="{}/lcur{}.pdf".format( lc_folder, counter), EV=EV if pos_signal else None, Beam=Beam if pos_signal else None, SL=SL if pos_signal else None) mlc.plot_lc(flat_lcur, P, M_BH, i, M_S, filename="{}/flat_lcur{}.pdf".format( lc_folder, counter)) mlc.plot_corr(best_correlations, P, M_BH, i, M_S, threshold, noise, "{}/corr{}.pdf".format(lc_folder, counter)) counter += 1 # bin result depending on parameter values if pos_signal or best_result: i_binned, P_binned, mbh_binned, ms_binned = False, False, False, False for k in range(num_bins - 1, -1, -1): if not i_binned and cosi >= cosi_bins[k]: inclinations[k].append(i) i_actual[k].append(pos_signal) i_predicted[k].append(result) if not P_binned and P >= P_bins[k]: periods[k].append(i) P_actual[k].append(pos_signal) P_predicted[k].append(result) if not mbh_binned and M_BH >= mbh_bins[k]: mbhs[k].append(i) mbh_actual[k].append(pos_signal) mbh_predicted[k].append(result) if not ms_binned and M_S >= ms_bins[k]: mss[k].append(i) ms_actual[k].append(pos_signal) ms_predicted[k].append(result) if all([i_binned, P_binned, mbh_binned, ms_binned]): break threshold_binned, noise_binned = False, False for k in range(num_bins - 1, -1, -1): if not threshold_binned and threshold >= threshold_bins[k]: thresholds[k].append(i) threshold_actual[k].append(pos_signal) threshold_predicted[k].append(result) if not noise_binned and noise == altered_noise_bins[k]: noises[k].append(i) noise_actual[k].append(pos_signal) noise_predicted[k].append(result) if noise_binned and threshold_binned: break total_actual.append(pos_signal) total_predicted.append(best_result) # perform completeness analysis if z % 1000 == 0 and z != 0: print('{} simulations complete'.format(z)) prefix = "./{}/".format(result_foldername) plot_completeness(r'$\alpha$', threshold_bins, threshold_actual, threshold_predicted, num_bins, prefix + 'alpha', scale=True) plot_completeness('Noise [ppm]', noise_bins, noise_actual, noise_predicted, num_bins, prefix + 'noise', scale=True) plot_completeness('cosi', cosi_bins, i_actual, i_predicted, num_bins, prefix + 'cosi', scale=True) plot_completeness('Period [days]', P_bins, P_actual, P_predicted, num_bins, prefix + 'period') plot_completeness(r'$M_{BH} [M_{\odot}]$', mbh_bins, mbh_actual, mbh_predicted, num_bins, prefix + 'mbh') plot_completeness(r'$M_{\star} [M_{\odot}]$', ms_bins, ms_actual, ms_predicted, num_bins, prefix + 'ms') end = time.time() print("{} minutes".format(round((end - start) / 60, 2))) return total_actual, total_predicted else: counter = 1 num_files = 0 flags = [ 'SL', 'edge', 'transit', 'flare', 'highSL', 'ambiguousSL', 'ambiguousFlare', 'centroid' ] results = {flag: set() for flag in flags} if not os.path.exists(directory + result_foldername): os.mkdir(directory + result_foldername) for flag in flags: os.mkdir(directory + result_foldername + '/' + flag) for filename in os.listdir(directory): num_files += 1 if num_files % 500 == 0: print('{} files completed'.format(num_files)) if filename.endswith(".fits"): fits_file = directory + filename try: with fits.open(fits_file, mode="readonly", memmap=False) as hdulist: tess_bjds = hdulist[1].data['TIME'] #sap_fluxes = hdulist[1].data['SAP_FLUX'] pdcsap_fluxes = hdulist[1].data['PDCSAP_FLUX'] centroid = hdulist[1].data['MOM_CENTR1'] except: print('Could not open file: {}'.format(filename)) continue # Create a light curve object with the file data and run pipeline on it lcur_object = Light_Curve(tess_bjds, pdcsap_fluxes, centroid, templates) lcur_object.run_pipeline() if lcur_object.result: results[flag].add(filename) folder = '{}{}/{}'.format(directory, result_foldername, flag) pdf = pdfs.PdfPages('{}/light_curve{}.pdf'.format( folder, counter)) counter += 1 # create plot for full light curve data fig, ax = plt.subplots(5, sharex=True, figsize=(6, 10)) #fig.suptitle(filename) # plot light curve ax[0].set_title(filename) ax[0].plot(lcur_object.valid_times, lcur_object.valid_fluxes, 'ko', rasterized=True, markersize=1) ax[0].set_ylabel('PDCSAP Flux') # plot flat light curve ax[1].plot(lcur_object.valid_times, lcur_object.flat_lcur, 'ko', rasterized=True, markersize=1) ax[1].set_ylabel('Relative Flux') # plot correlation ax[2].plot(lcur_object.valid_times[:len(lcur_object. correlations)], lcur_object.correlations, 'ko', rasterized=True, markersize=1) ax[2].plot([ lcur_object.valid_times[0], lcur_object.valid_times[len(lcur_object.correlations) - 1] ], [Light_Curve.alpha, Light_Curve.alpha], '--', color='orange', rasterized=True) if lcur_object.flag == 'highSL': ax[2].plot([ lcur_object.valid_times[0], lcur_object. valid_times[len(lcur_object.correlations) - 1] ], [Light_Curve.beta, Light_Curve.beta], 'b--', rasterized=True) ax[2].set_ylabel('Correlation') # plot centroid ax[3].plot(lcur_object.valid_times, lcur_object.valid_centroid, 'ko', rasterized=True, markersize=1) ax[3].set_ylabel('Centroid') # plot flat centroid ax[4].plot(lcur_object.valid_times, lcur_object.flat_centroid, 'ko', rasterized=True, markersize=1) ax[4].set_ylabel('Relative Centroid') ax[4].set_xlabel('Time [days]') plt.tight_layout() pdf.savefig(fig) plt.close() # zoomed in plot on location of each real positive detection for detection in lcur_object.real_detections: plot_detection(detection[0], lcur_object.window, 'SL', pdf, detection[1], detection[2], detection[3], lcur_object.valid_times, lcur_object.valid_fluxes, lcur_object.flat_lcur, lcur_object.correlations, lcur_object.valid_centroid, lcur_object.flat_centroid) # zoomed in plot on location of each ambiguous positive detection for detection in lcur_object.ambiguous_real: plot_detection( detection[0], lcur_object.window, 'Ambiguous SL', pdf, detection[1], detection[2], detection[3], lcur_object.valid_times, lcur_object.valid_fluxes, lcur_object.flat_lcur, lcur_object.correlations, lcur_object.valid_centroid, lcur_object.flat_centroid) # zoomed in plot on location of each ambiguous flare detection for detection in lcur_object.ambiguous_flare: plot_detection(detection[0], lcur_object.window, 'Ambiguous Flare', pdf, detection[1], detection[2], detection[3], lcur_object.valid_times, lcur_object.valid_fluxes, lcur_object.flat_lcur, lcur_object.correlations, lcur_object.valid_centroid, lcur_object.flat_centroid) # zoomed in plot on location of each flare detection for detection in lcur_object.flare_detections: plot_detection( detection[0], lcur_object.window, 'Flare', pdf, detection[1], detection[2], detection[3], lcur_object.valid_times, lcur_object.valid_fluxes, lcur_object.flat_lcur, lcur_object.correlations, lcur_object.valid_centroid, lcur_object.flat_centroid) # zoomed in plot on location of each centroid detection for detection in lcur_object.centroid_detections: plot_detection(detection[0], lcur_object.window, 'Centroid', pdf, detection[1], detection[2], detection[3], lcur_object.valid_times, lcur_object.valid_fluxes, lcur_object.flat_lcur, lcur_object.best_correlations, lcur_object.valid_centroid, lcur_object.flat_centroid) pdf.close() # make a pie chart of the distribution of light curves in each bin pie_slices = [len(results[flag]) for flag in flags] plt.figure() plt.title('Distribution of Positive Detections') plt.pie(pie_slices, labels=flags) plt.savefig(directory + result_foldername + "/distribution.pdf") plt.close()
rasterized=True, markersize=2) ax[4].set_ylabel('Rlative Centroid') plt.tight_layout() pdf.savefig(fig) plt.close() foldername = input("Input name of new results folder: ") os.mkdir(foldername) templates = [] widths = [5 * j + 15 for j in range(10)] + [10 * j + 70 for j in range(4)] for width in widths: templates.append(mlc.offset_and_normalize(mlc.generate_template(1, width))) for sector in range(15, 29): flags = [ 'SL', 'edge', 'transit', 'flare', 'highSL', 'ambiguousSL', 'ambiguousFlare', 'centroid' ] results = {flag: set() for flag in flags} if not os.path.exists("{}/Sector{}".format(foldername, sector)): os.mkdir("{}/Sector{}".format(foldername, sector)) os.mkdir("{}/Sector{}/SL_Files".format(foldername, sector)) for flag in flags: os.mkdir("{}/Sector{}/{}".format(foldername, sector, flag)) download_file = "tesscurl_sector_{}_lc.sh".format(sector) with open(download_file) as curl_file: