def detect(fname): data = dl.read_dat(fname,',') n = len(data[0]) - 4 sigmas = [] stds = [] ids = dl.get_column(data,0) for sigma in range(0,n): sigmas.append(dl.get_column(data,4+sigma)) for star in range(len(sigmas[0])): star_sigmas = [] for sigma in range(0,n): if sigmas[sigma][star] != 'nan' and sigmas[sigma][star] != '': star_sigmas.append(float(sigmas[sigma][star])) stds.append(np.std(star_sigmas)) stds_median = np.median(stds) stds_std = np.std(stds) bad_stars = [] for i in range(len(stds)): if abs(stds[i]-stds_median) > 3*stds_std: bad_stars.append(ids[i]) out = '' for star in bad_stars: out = out + star + ',' print fname + ' : ' + out
def plot(fname): setup_text_plots(fontsize=10, usetex=True) data = dl.read_dat(fname,',') n = len(data[0]) - 4 f,plots = plt.subplots(int(np.ceil(n/2))+n%2,2, sharex=False,sharey=False) sigmas = [] for sigma in range(0,n): sigmas.append(dl.get_column(data,4+sigma)) for j in range(0,len(sigmas)): sigma = sigmas[j] good_sigmas = [] for i in range(0,len(sigma)): if sigma[i] != 'nan': good_sigmas.append((float(sigma[i]))) good_sigmas = np.sort(good_sigmas) good_sigmas = good_sigmas[0:int(.8*len(good_sigmas))] hist = np.histogram(good_sigmas,bins = 175) plots[np.ceil((j)/2)][(j)%2].bar(hist[1][:-1],hist[0],width=hist[1][1]-hist[1][0]) plots[np.ceil((j)/2)][(j)%2].set_xlabel('$\sigma_{'+str(j+1)+'}$',fontsize=40) plots[np.ceil((j)/2)][(j)%2].set_ylabel('$n$',fontsize=40) plots[np.ceil((j)/2)][(j)%2].text(plots[np.ceil((j)/2)][(j)%2].get_xlim()[1]*0.9,plots[np.ceil((j)/2)][(j)%2].get_ylim()[1]*0.75,'$\sigma_{'+str(j+1)+'}$',fontsize=40) plots[np.ceil((j)/2)][(j)%2].tick_params(axis='both', which='major', labelsize=20) f.set_size_inches(32,20) f.savefig('histograms/sigma_histo_' + fname+'.png', dpi = 300)
def plot(fname): setup_text_plots(fontsize=10, usetex=True) data = dl.read_dat(fname,',') print data[0] n = len(data[0]) - 4 print int(np.ceil(n/2))+n%2 f,plots = plt.subplots(int(np.ceil(n/2))+n%2,2, sharex=False,sharey=False) dwarf_flags = dl.get_column(data,1) kepmags = dl.get_column(data,3) Teffs = dl.get_column(data,2) sigmas = [] for sigma in range(0,n): sigmas.append(dl.get_column(data,4+sigma)) length = len(dl.get_column(data,0)) for i in range(0,length): done = [] for seg in range(0,n): done.append(seg) is_dwarf = dwarf_flags[i] if is_dwarf == '1.0' or is_dwarf == '1': symbol = 'd' elif is_dwarf == '0.0' or is_dwarf == '0': symbol = 'o' else: symbol = 'x' x = int(np.ceil((seg)/2)) y = (seg)%2 plots[x][y].set_ylabel('$\log{\sigma_{' + str(seg+1) +'}}$',fontsize=40) plots[x][y].set_xlabel('Kepler band magnitude',fontsize=20) plots[x][y].tick_params(axis='both', which='major', labelsize=20) plots[x][y].grid(True) plots[x][y].set_xlim([8.25,17]) plots[x][y].set_ylim([-4.5,0]) plots[x][y].text(15.25,-0.5,'Segment ' + str(seg+1),fontsize=30) if sigma != 'nan' and kepmags[i] != 'nan' and sigmas[seg][i] != 'nan' and sigmas[seg][i] != '': plots[x][y].scatter(float(kepmags[i]),np.log10(float(sigmas[seg][i])),marker=symbol,color=color_T(Teffs[i])) print fname + ' : ' + str(done) + ' : ' + str((i*100.0)/length) + '%' f.set_size_inches(32,32) f.savefig(fname+'.png', dpi = 300)
def get_K2_info(star_id,plot_photo = False,plot_poly = False,photometry_dir = 'Decorrelatedphotometry2', targets_file = 'K2Campaign0targets.csv - K2Campaign0targets.csv',poly_fit_deg = 0,subtract_fit = True): info = dict() info['error'] = 'None' warnings.simplefilter('ignore', np.RankWarning) target_data = search_target_data(star_id,read_target_file(targets_file)) if target_data[0] == 'not_found': info['error'] = 'Star is not a target.' else: info['star_info'] = target_data photo_data = read_K2_file(search_photometry_files(star_id,'Decorrelatedphotometry2')[0]) if(photo_data[0] == 'no_data'): info['error'] = 'No photo data' return info info['photo_data_by_segments'] = split_data_in_segments(photo_data) info['polynomial_fits'] = [] if poly_fit_deg != 0 or plot_poly: for segment in info['photo_data_by_segments']: info['polynomial_fits'].append(polynomial_fit(segment,0,1,poly_fit_deg)) info['subtraction_by_segments'] = [] if subtract_fit: for i in range(0,len(info['photo_data_by_segments'])): segment = info['photo_data_by_segments'][i] x = datalib.get_column(segment,0) y = datalib.get_column(segment,1) poly = info['polynomial_fits'][i] info['subtraction_by_segments'].append(subtract_poly_from_data(x,y,poly)) random.seed() if plot_photo or plot_poly: for i in range(0,len(info['photo_data_by_segments'])): if plot_photo: f_plot_photo(info['photo_data_by_segments'][i],[random.random(),random.random(),random.random()]) if plot_poly: f_plot_poly(info['polynomial_fits'][i],datalib.get_column(info['photo_data_by_segments'][i],0),[random.random(),random.random(),random.random()]) plt.show() return info
def plot(fname): setup_text_plots(fontsize=10, usetex=True) data = dl.read_dat(fname,',') mags = dl.get_column(data,3) for i in range(0,len(mags)): if mags[i] == 'nan': del mags[i] else: mags[i] = float(mags[i]) hist = np.histogram(mags,bins=100) plt.bar(hist[1][:-1],hist[0],width=hist[1][1]-hist[1][0]) plt.xlabel('Kepler band magnitude',fontsize=50) plt.ylabel('$n$',fontsize=50) plt.tick_params(axis='both', which='major', labelsize=40) plt.gcf().set_size_inches(32,20) plt.gcf().savefig(fname.replace('evaluations/','histograms/mag/mag_histo_') +'.png', dpi = 300)
def medians(fname): setup_text_plots(fontsize=10, usetex=True) data = dl.read_dat(fname,',') n = len(data[0]) - 4 sigmas = [] for sigma in range(0,n): sigmas.append(dl.get_column(data,4+sigma)) medians = [] for j in range(0,len(sigmas)): sigma = sigmas[j] good_sigmas = [] for i in range(0,len(sigma)): if sigma[i] != 'nan': good_sigmas.append((float(sigma[i]))) good_sigmas = np.sort(good_sigmas) good_sigmas = good_sigmas[0:int(.8*len(good_sigmas))] medians.append(np.median(good_sigmas)) return medians
def get_K2_info(star_id, plot_photo=False, plot_poly=False, photometry_dir='Decorrelatedphotometry2', targets_file='K2Campaign0targets.csv - K2Campaign0targets.csv', poly_fit_deg=0, subtract_fit=True): info = dict() info['error'] = 'None' warnings.simplefilter('ignore', np.RankWarning) target_data = search_target_data(star_id, read_target_file(targets_file)) if target_data[0] == 'not_found': info['error'] = 'Star is not a target.' else: info['star_info'] = target_data photo_data = read_K2_file( search_photometry_files(star_id, 'Decorrelatedphotometry2')[0]) if (photo_data[0] == 'no_data'): info['error'] = 'No photo data' return info info['photo_data_by_segments'] = split_data_in_segments(photo_data) info['polynomial_fits'] = [] if poly_fit_deg != 0 or plot_poly: for segment in info['photo_data_by_segments']: info['polynomial_fits'].append( polynomial_fit(segment, 0, 1, poly_fit_deg)) info['subtraction_by_segments'] = [] if subtract_fit: for i in range(0, len(info['photo_data_by_segments'])): segment = info['photo_data_by_segments'][i] x = datalib.get_column(segment, 0) y = datalib.get_column(segment, 1) poly = info['polynomial_fits'][i] info['subtraction_by_segments'].append( subtract_poly_from_data(x, y, poly)) random.seed() if plot_photo or plot_poly: for i in range(0, len(info['photo_data_by_segments'])): if plot_photo: f_plot_photo( info['photo_data_by_segments'][i], [random.random(), random.random(), random.random()]) if plot_poly: f_plot_poly( info['polynomial_fits'][i], datalib.get_column(info['photo_data_by_segments'][i], 0), [random.random(), random.random(), random.random()]) plt.show() return info
def polynomial_fit(data, xcol, ycol, degree): x = datalib.get_column(data, xcol) y = datalib.get_column(data, ycol) delete_n_sigma(x, y, 4) return np.polyfit(x, y, degree)
import matplotlib.pyplot as plt import random import numpy as np import scipy as sp KEPLER_CADENCE = 29.42 def get_data(star_id, data): for row in data: if float(star_id) == float(row[0]): return row[1:4] return ['nan'] * 3 targets = datalib.get_column( K2.read_target_file('K2Campaign0targets.csv - K2Campaign0targets.csv'), 0) del targets[0] result = [[ 'EPIC', 'is_dwarf', 'teff', 'kepmag', 'sigma_1', 'sigma_2', 'sigma_3', 'sigma_4', 'sigma_5' ]] errors = [] quality = [[ '#EPIC', 'seg_1_ratio', 'seg_1_flag', 'seg_2_ratio', 'seg_2_flag', 'seg_3_ratio', 'seg_3_flag', 'seg_4_ratio', 'seg_4_flag', 'seg_5_ratio', 'seg_5_flag' ]] tess_data = datalib.read_dat('k2tess.csv', ',') del tess_data[0]
def label_d(is_dwarf): if is_dwarf == '1.0': return 'Dwarf' elif is_dwarf == '0.0': symbol = 'Giant' else: symbol = 'Unkown' data = dl.read_dat('evaluation.csv', ',') f, plots = plt.subplots(3, 2, sharex=False, sharey=False) dwarf_flags = dl.get_column(data, 1) kepmags = dl.get_column(data, 3) Teffs = dl.get_column(data, 2) sigmas = [] for sigma in range(4, 9): sigmas.append(dl.get_column(data, sigma)) length = len(dl.get_column(data, 0)) for i in range(1, length): for seg in range(4, 9): is_dwarf = dwarf_flags[i] if is_dwarf == '1.0': symbol = 'd' elif is_dwarf == '0.0': symbol = 'o' else:
return [0,0,0] Teff = float(Teff) x = (Teff-2265)/5375.0 return [-x*x+1,-4*x*(x-1),-x*(x-2)] def label_d(is_dwarf): if is_dwarf == '1.0': return 'Dwarf' elif is_dwarf == '0.0': symbol = 'Giant' else: symbol = 'Unkown' data = dl.read_dat('evaluation.csv',',') f,plots = plt.subplots(3,2, sharex=False,sharey=False) dwarf_flags = dl.get_column(data,1) kepmags = dl.get_column(data,3) Teffs = dl.get_column(data,2) sigmas = [] for sigma in range(4,9): sigmas.append(dl.get_column(data,sigma)) length = len(dl.get_column(data,0)) for i in range(1,length): for seg in range(4,9): is_dwarf = dwarf_flags[i] if is_dwarf == '1.0': symbol = 'd' elif is_dwarf == '0.0': symbol = 'o' else:
def f_plot_photo(data,col): x = datalib.get_column(data,0) y = datalib.get_column(data,1) plt.plot(x,y,'-',color = col)
import datalib as dl import numpy as np import matplotlib.pyplot as plt from astroML.plotting import setup_text_plots setup_text_plots(fontsize=10, usetex=True) data = dl.read_dat('evaluation.csv', ',') f, plots = plt.subplots(3, 2, sharex=False, sharey=False) sigmas = [] for sigma in range(4, 9): sigmas.append(dl.get_column(data, sigma)) for j in range(0, len(sigmas)): sigma = sigmas[j] good_sigmas = [] for i in range(1, len(sigma)): if sigma[i] != 'nan': good_sigmas.append((float(sigma[i]))) good_sigmas = np.sort(good_sigmas) good_sigmas = good_sigmas[0:int(0.9 * len(good_sigmas))] hist = np.histogram(good_sigmas, bins=175) plots[np.ceil((j) / 2)][(j) % 2].bar(hist[1][:-1], hist[0], width=hist[1][1] - hist[1][0]) plots[np.ceil((j) / 2)][(j) % 2].set_xlabel('$\sigma_' + str(j + 1) + '$', fontsize=40) plots[np.ceil((j) / 2)][(j) % 2].set_ylabel('$n$', fontsize=40) plots[np.ceil((j) / 2)][(j) % 2].text(15.25, -0.5,
import K2 import datalib import matplotlib.pyplot as plt import random import numpy as np import scipy as sp KEPLER_CADENCE = 29.42 def get_data(star_id,data): for row in data: if float(star_id) == float(row[0]): return row[1:4] return ['nan']*3 targets = datalib.get_column(K2.read_target_file('K2Campaign0targets.csv - K2Campaign0targets.csv'),0) del targets[0] result = [['EPIC','is_dwarf','teff','kepmag','sigma_1','sigma_2','sigma_3','sigma_4','sigma_5']] errors = [] quality = [['#EPIC','seg_1_ratio','seg_1_flag','seg_2_ratio','seg_2_flag','seg_3_ratio','seg_3_flag','seg_4_ratio','seg_4_flag','seg_5_ratio','seg_5_flag']] tess_data = datalib.read_dat('k2tess.csv',',') del tess_data[0] faint_data = datalib.read_dat('k2faint.csv',',') del faint_data[0] for i in range(0,len(targets)): target = targets[i] print target + ' : ' + str(int((100.0*i)/len(targets))) + '%'
def evaluate(folder_name): #ignores NumPy warnings warnings.simplefilter('ignore', np.RankWarning) #loads K2 tess and K2 faint catalogs tess_data = dl.read_dat('k2tess.csv',',') del tess_data[0] tess_data_ids = dl.get_column(tess_data,0) tess_data_is_dwarf = dl.get_column(tess_data,1) tess_data_teff = dl.get_column(tess_data,2) tess_data_mag = dl.get_column(tess_data,3) faint_data = dl.read_dat('k2faint.csv',',') del faint_data[0] faint_data_ids = dl.get_column(faint_data,0) faint_data_is_dwarf = dl.get_column(faint_data,1) faint_data_teff = dl.get_column(faint_data,2) faint_data_mag = dl.get_column(faint_data,3) output = [] #2d array that will be stored in a .csv file. [[id,is_dwarf,teff,mag,sigma1,sigma2...],[id,is_dwarf,teff,mag,sigma1,sigma2...]...] not_on_catalog = [] #list of the stars that could not be found in the tess and faint catalogs dirlist = os.listdir(folder_name) for n in range(0,len(dirlist)): fname = dirlist[n] data = dl.read_dat(folder_name + '/' + fname,',') #2d array containing the light curve. [[time,flux,segment],[time,flux,segment],...] #Searching on K2 catalogs for info star_id = '' is_dwarf = '' teff = 0 mag = 0 for i in range(0,len(tess_data_ids)): if str(tess_data_ids[i]).replace('.0','') in fname: star_id = tess_data_ids[i] is_dwarf = tess_data_is_dwarf[i] teff = tess_data_teff[i] mag = tess_data_mag[i] break del tess_data_ids[i],tess_data_mag[i],tess_data_teff[i],tess_data_is_dwarf[i] if star_id == '': #if it was not found in the tess catalog, it searches the faint catalog for i in range(0,len(faint_data_ids)): if str(faint_data_ids[i]).replace('.0','') in fname: star_id = faint_data_ids[i] is_dwarf = faint_data_is_dwarf[i] teff = faint_data_teff[i] mag = faint_data_mag[i] break del faint_data_ids[i],faint_data_mag[i],faint_data_teff[i],faint_data_is_dwarf[i] print star_id + ' : ' + str((n*100.0/len(dirlist))) + '%' #To know how much is already done if star_id != '' and len(data) > 1: #The code only runs if the star is on the catalog and there is data on the lightcurve # #Computes standard deviations for each segment: # time = [] flux = [] segment = [] for row in data: time.append(float(row[0])) flux.append(float(row[1])) segment.append(row[2]) #Deletes >3sigma points and nan segments sigma = np.std(flux) avg = np.average(flux) i = 0 end = len(time) while i in range(0,end): if abs(flux[i]-avg) > 3*sigma or str(segment[i]) == 'nan': del time[i],flux[i],segment[i] end = len(time) i+=1 #Splits into segments n = 0 #Number of segments for i in range(1,len(segment)): if str(segment[-i]) != 'nan': n = int(float(segment[-i]))+1 break time_seg = [] flux_seg = [] for i in range(0,n): time_seg.append([]) flux_seg.append([]) for i in range(0,len(time)): if segment[i] != 'nan': time_seg[int(float(segment[i]))].append(time[i]) flux_seg[int(float(segment[i]))].append(flux[i]) #Subtracts the polynomial fit (4 deg) in each segment and computes the standard deviation sigmas = [] for i in range(0,len(time_seg)): if time_seg[i] != [] and len(time_seg[i]) > .5*(time_seg[i][0]-time_seg[i][-1])/KEPLER_CADENCE: #This conditional checks if there is data in the segment and also that the segment have more than the 50% of the expected points poly = np.poly1d(np.polyfit(time_seg[i],flux_seg[i],4)) for j in range(0,len(time_seg[i])): flux_seg[i][j] -= poly(time_seg[i][j]) sigmas.append(np.std(flux_seg[i])) else: sigmas.append('nan') output.append([star_id,is_dwarf,teff,mag]+sigmas) else: not_on_catalog.append(fname) print not_on_catalog dl.write_dat(output,'evaluations/evaluation_of_' + folder_name + '.csv',',')
def f_plot_photo(data, col): x = datalib.get_column(data, 0) y = datalib.get_column(data, 1) plt.plot(x, y, '-', color=col)
import datalib as dl import numpy as np import matplotlib.pyplot as plt data = dl.read_dat('evaluation.csv',',') sigmas = [] for sigma in range(4,9): sigmas.append(dl.get_column(data,sigma)) medians = [] for sigma in sigmas: good_sigmas = [] for i in range(1,len(sigma)): if sigma[i] != 'nan': if np.log10(float(sigma[i])): good_sigmas.append(float(sigma[i])) good_sigmas = np.sort(good_sigmas) good_sigmas = good_sigmas[0:int(0.8*len(good_sigmas))] medians.append(np.median(good_sigmas)) plt.plot([1,2,3,4,5],medians) plt.show()
def polynomial_fit(data,xcol,ycol,degree): x = datalib.get_column(data,xcol) y = datalib.get_column(data,ycol) delete_n_sigma(x,y,4) return np.polyfit(x,y,degree)