def test_epoch(key, surv, user_choices): """ Check if previously calculated GP fit survives selection cuts. input: key, str object type surv, dict dictionary of objects surviving basic cuts keys are types, values are GP fit mean file name user_choices, dict output from snclass.util.read_user_input output: my_lc, LC object updated light curve object after checked for epoch cuts """ # sample a random obj in the training sample indx = np.random.randint(0, len(surv[key])) name = surv[key][indx] # determine fitting method fit_method = bool(int(user_choices['do_mcmc'][0])) # update path to raw data user_choices['path_to_lc'] = [name] # read light curve raw data raw = read_snana_lc(user_choices) # update raw data with user choices raw.update(user_choices) # set number of samples to 0 (we are only interested in the mean for now) raw['n_samples'] = ['0'] # initiate light curve object my_lc = LC(raw, user_choices) screen('Fitting SN' + raw['SNID:'][0], user_choices) # load GP fit my_lc.load_fit_GP(user_choices['samples_dir'][0] + '/DES_SN' + raw['SNID:'][0] + '_mean.dat') # normalize my_lc.normalize() # shift to peak mjd my_lc.mjd_shift() # check epoch requirements my_lc.check_epoch() return my_lc, raw
def classify_1obj(din): """ Perform classification of 1 supernova. input: din, dict - keywords, value type: user_input, dict -> output from read_user_input name, str -> name of raw light curve file type_number, dict -> translate between str and numerical classes identification do_plot, bool -> if True produce plots, default is False p1, dict -> keywords, value type: fname_photo_list, str: list of all photometric sample objects photo_dir, str: directory of GP fitted results for photo sample range_pcs, list: [min_number_PCs, max_number_PCs] to be tested through cross-validation SNR_dir, str: directory to store all results from this SNR cut out_dir, str: directory to store classification results plot_proj_dir, str: directory to store projection plots data_matrix, str: file holding spec data matrix output: class_results: list -> [snid, true_type, prob_Ia] """ from snclass.functions import screen, nneighbor from snclass.util import translate_snid, read_snana_lc from snclass.treat_lc import LC # update supernova name din['user_input']['path_to_lc'] = [translate_snid(din['name'])[0]] # read raw data raw = read_snana_lc(din['user_input']) # set true type for names in din['type_number'].keys(): if raw[din['user_input']['type_flag'] [0]][0] in din['type_number'][names]: true_type = names # load GP fit and test epoch cuts new_lc = LC(raw, din['user_input']) new_lc.user_choices['samples_dir'] = [din['p1']['photo_dir']] new_lc.load_fit_GP(din['p1']['photo_dir'] + din['name']) l1 = [ 1 if len(new_lc.fitted['GP_fit'][fil]) > 0 else 0 for fil in din['user_input']['filters'] ] fil_choice = din['user_input']['ref_filter'][0] if fil_choice == 'None': fil_choice = None if sum(l1) == len(din['user_input']['filters']): new_lc.normalize(samples=True, ref_filter=fil_choice) new_lc.mjd_shift() new_lc.check_epoch() if new_lc.epoch_cuts: screen(new_lc.raw['SNID:'][0], din['user_input']) # build matrix lines new_lc.build_steps(samples=True) # transform samples small_matrix = new_lc.samples_for_matrix data_test = din['p1']['obj_kpca'].transform(small_matrix) #classify samples new_label = nneighbor(data_test, din['p1']['spec_matrix'], din['p1']['binary_types'], din['user_input']) # calculate final probability ntypes = [1 for item in new_label if item == '0'] new_lc.prob_Ia = sum(ntypes) / \ float(din['user_input']['n_samples'][0]) if din['do_plot']: plot_proj(din['p1']['spec_matrix'], data_test, din['p1']['labels'], new_lc, din['p1']['plot_dir'], [0, 1], true_type) # print result to screen screen('SN' + new_lc.raw['SNID:'][0] + \ ', True type: ' + true_type + ', prob_Ia = ' + \ str(new_lc.prob_Ia), din['user_input']) class_results = [new_lc.raw['SNID:'][0], true_type, new_lc.prob_Ia] return class_results
def set_lclist(params): """ Build a list of all objects satisfying selection cuts and plot them. input: params, dict keywords: plot_dir path to store plots. If None do not build plots. if None plots are not generated fitted_data_dir path to fitted data list_dir path to list directory sample 'spec' or 'photo' user_choices, dict output from snclass.read_user_input """ import numpy as np import pylab as plt import os from snclass.treat_lc import LC from snclass.util import translate_snid, read_snana_lc from snclass.functions import screen import sys # create plot directory if params['plot_dir'] is not None and \ not os.path.isdir(params['plot_dir']): os.makedirs(params['plot_dir']) flist = os.listdir(params['fitted_data_dir']) photo_list = [] problem = [] cont = 0 rfil = params['user_choices']['ref_filter'][0] for obj in flist: if 'mean' in obj and '~' not in obj and 'Y' not in obj: screen(obj, params['user_choices']) rname = translate_snid(obj)[0] params['user_choices']['path_to_lc'] = [rname] params['user_choices']['n_samples'] = ['0'] raw = read_snana_lc(params['user_choices']) new_lc = LC(raw, params['user_choices']) if (params['user_choices']['file_root'][0] + raw['SNID:'][0] + \ '_samples.dat' in flist): new_lc.user_choices['n_samples'] = ['100'] new_lc.user_choices['samples_dir'] = [ params['fitted_data_dir'] ] try: new_lc.load_fit_GP(params['fitted_data_dir'] + obj) l1 = [ 1 if len(new_lc.fitted['GP_fit'][fil]) > 0 else 0 for fil in params['user_choices']['filters'] ] if sum(l1) == len(params['user_choices']['filters']): if rfil == 'None': new_lc.normalize() else: new_lc.normalize(ref_filter=rfil) new_lc.mjd_shift() new_lc.check_epoch() if new_lc.epoch_cuts: photo_list.append(rname) # only plot if not already done if params['plot_dir'] is not None and \ not os.path.isfile(params['plot_dir'] + 'SN' + \ raw['SNID:'][0] + '.png'): new_lc.plot_fitted(file_out=\ params['plot_dir'] + \ 'SN' + raw['SNID:'][0] + \ '.png') else: screen('SN' + raw['SNID:'][0] + ' did not satisfy' + \ ' epoch cuts!\n', params['user_choices']) cont = cont + 1 else: screen('SN' + raw['SNID:'][0] + ' does not exist in ' + \ 'all filters!\n', params['user_choices']) cont = cont + 1 except ValueError: problem.append(rname) cont = cont + 1 else: screen('Samples not found for SN' + raw['SNID:'][0], params['user_choices']) else: cont = cont + 1 screen('Missed ' + str(cont) + ' SN.', params['user_choices']) # store list of problematic fits if len(problem) > 0: op2 = open('problematic_fits.dat', 'w') for obj in problem: op2.write(obj + '\n') op2.close() sys.exit() # set parameter for file name if int(params['user_choices']['epoch_cut'][0]) < 0: epoch_min = str(abs(int(params['user_choices']['epoch_cut'][0]))) else: epoch_min = 'p' + \ str(abs(int(params['user_choices']['epoch_cut'][0]))) epoch_max = str(int(params['user_choices']['epoch_cut'][1]) - 1) filter_list = params['user_choices']['filters'][0] for item in params['user_choices']['filters'][1:]: filter_list = filter_list + item # save objs list if not os.path.isdir(params['list_dir']): os.makedirs(params['list_dir']) ref_filter = params['user_choices']['ref_filter'][0] if ref_filter is None: ref_fils = 'global' else: ref_fils = ref_filter op1 = open(params['list_dir'] + params['sample'] + '_' + filter_list + \ '_' + epoch_min + '_' + epoch_max + '_ref_' + ref_fils + \ '.list', 'w') for item in photo_list: op1.write(item + '\n') op1.close()
def select_GP(params, user_choices): """ Select original objs to build a synthetic spectroscopic sample. input: params, dict output from set_paramameters user_choices, dict output from snclass.util.read_user_input """ from snclass.util import translate_snid, read_snana_lc from snclass.functions import screen from snclass.treat_lc import LC from snclass.fit_lc_gptools import save_result import os import numpy as np import sys # set reference filter if user_choices['ref_filter'][0] == 'None': fil_choice = None else: fil_choice = user_choices['ref_filter'][0] # select extra GP realizations in order to construct # a representative spec sample for key in params['draw_spec_samples'].keys(): cont = 0 fail = 0 # check if there are existing objs in this sample screen('... Check existing objs', user_choices) ready = [] for obj in params['surv_spec_names'][key]: obj_id = translate_snid(obj) for j in xrange(params['draw_spec_samples'][key]): mean_file = params['synthetic_dir'] + '/' + \ user_choices['file_root'][0] + str(j) + \ 'X' + obj_id + '_mean.dat' if os.path.isfile(mean_file) and mean_file not in ready: cont = cont + 1 ready.append(mean_file) screen('Found ready SN ' + str(cont) + 'X' + \ obj_id, user_choices) while cont < params['draw_spec_samples'][key]: # draw one of the objs in the spec sample indx = np.random.randint(0, params['spec_pop'][key]) name = params['surv_spec_names'][key][indx] user_choices['path_to_lc'] = [name] # read light curve raw data raw = read_snana_lc(user_choices) if os.path.isfile(params['fitted_data_dir'] + user_choices['file_root'][0] + \ raw['SNID:'][0] + '_samples.dat'): # initiate light curve object my_lc = LC(raw, user_choices) screen('Loading SN' + raw['SNID:'][0], user_choices) # load GP fit my_lc.user_choices['n_samples'] = ['100'] my_lc.user_choices['samples_dir'] = [params['fitted_data_dir']] my_lc.load_fit_GP(params['fitted_data_dir'] + user_choices['file_root'][0] + \ raw['SNID:'][0] + '_mean.dat') l1 = [ 1 if len(my_lc.fitted['GP_fit'][fil]) > 0 else 0 for fil in user_choices['filters'] ] if sum(l1) == len(user_choices['filters']): # normalize my_lc.normalize(samples=True, ref_filter=fil_choice) # shift to peak mjd my_lc.mjd_shift() # check epoch requirements my_lc.check_epoch() if my_lc.epoch_cuts: screen('... Passed epoch cuts', user_choices) screen('... ... This is SN type ' + raw[user_choices['type_flag'][0]][0] + \ ' number ' + str(cont + 1) + ' of ' + str(params['draw_spec_samples'][key]), user_choices) # draw one realization size = len(my_lc.fitted['realizations'][ user_choices['filters'][0]]) indx2 = np.random.randint(0, size) for fil in user_choices['filters']: print '... ... ... filter ' + fil raw['GP_fit'][fil] = my_lc.fitted['realizations'][ fil][indx2] raw['GP_std'][fil] = my_lc.fitted['GP_std'][fil] raw['xarr'][fil] = my_lc.fitted['xarr'][fil] # set new file root raw['file_root'] = [user_choices['file_root'][0] + \ str(cont) + 'X'] raw['samples_dir'] = [params['synthetic_dir'] + '/'] save_result(raw) # check epoch for this realization new_lc = LC(raw, user_choices) new_lc.load_fit_GP(params['synthetic_dir'] + '/' + \ user_choices['file_root'][0] + str(cont) + \ 'X' + raw['SNID:'][0] + '_mean.dat') new_lc.normalize(ref_filter=fil_choice) new_lc.mjd_shift() new_lc.check_epoch() if new_lc.epoch_cuts: cont = cont + 1 else: screen('Samples failed to pass epoch cuts!\n', user_choices) os.remove(params['synthetic_dir'] + '/' + user_choices['file_root'][0] + str(cont) + \ 'X' + raw['SNID:'][0] + '_mean.dat') print '\n' else: screen('Failed to pass epoch cuts!\n', user_choices) fail = fail + 1 if fail > 10 * params['spec_pop'][key]: cont = 100000 sys.exit()
def build_sample(params): """ Build a directory holding all raw data passing selection cuts. input: params, dict keywords: 'raw_dir' -> new directory to be created 'photo_dir' -> photometric LC fitted with GP 'spec_dir' -> sectroscopic LC fitted with GP 'user_choices' -> output from snclass.util.read_user_input """ import shutil from snclass.util import read_user_input, read_snana_lc, translate_snid from snclass.treat_lc import LC from snclass.functions import screen # create data directory if not os.path.isdir(params['raw_dir']): os.makedirs(params['raw_dir']) # read fitted light curves photo_list = os.listdir(params['photo_dir']) spec_list = os.listdir(params['spec_dir']) # build filter list fil_list = params['user_choices']['filters'][0] for i in xrange(1, len(params['user_choices']['filters'])): fil_list = fil_list + params['user_choices']['filters'][i] for sn_set in [photo_list, spec_list]: for obj in sn_set: if 'samples' in obj and '~' not in obj and 'Y' not in obj: screen(obj, params['user_choices']) rname = translate_snid(obj)[0] params['user_choices']['path_to_lc'] = [rname] params['user_choices']['n_samples'] = ['0'] # read raw data raw = read_snana_lc(params['user_choices']) new_lc = LC(raw, params['user_choices']) # load GP fit if sn_set == photo_list: new_lc.load_fit_GP(photo_dir + params['user_choices']['file_root'][0] + raw['SNID:'][0] + '_mean.dat') else: new_lc.load_fit_GP(spec_dir + params['user_choices']['file_root'][0] + raw['SNID:'][0] + '_mean.dat') l1 = [ 1 if len(new_lc.fitted['GP_fit'][fil]) > 0 else 0 for fil in params['user_choices']['filters'] ] if sum(l1) == len(params['user_choices']['filters']): # treat light curve new_lc.normalize(ref_filter= \ params['user_choices']['ref_filter'][0]) new_lc.mjd_shift() new_lc.check_basic() new_lc.check_epoch() # check epoch cuts data_path = params['user_choices']['path_to_obs'][0] if new_lc.epoch_cuts: shutil.copy2(data_path + rname, raw_dir + rname) else: screen('... SN' + raw['SNID:'][0] + \ ' fail to pass epoch cuts!', params['user_choices'])
def classify_test(test_name, matrix, user_input, test_dir='test_samples/', csamples=True): """ Classify one photometric supernova using a trained KernelPCA matrix. input: test_name, str name of mean GP fit file matrix, snclass.matrix.DataMatrix object trained KernelPCA matrix user_input, dict output from snclass.util.read_user_input test_dir, str, optional name of directory to store samples from test light curve Default is 'test_samples/' csamples, bool, optional If True, fit GP object and generate sample file as output otherwise reads samples from file Default is True return: new_lc, snclass.treat_lc.LC object updated with test projections and probability of being Ia """ # update path to raw light curve user_input['path_to_lc'] = [translate_snid(test_name, 'FLUXCAL')[0]] # store number of samples for latter tests nsamples = user_input['n_samples'][0] # reset the number of samples for preliminary tests user_input['n_samples'] = ['0'] # read raw data raw = read_snana_lc(user_input) # load GP fit and test epoch cuts new_lc = LC(raw, user_input) new_lc.load_fit_GP(user_input['samples_dir'][0] + test_name) new_lc.normalize() new_lc.mjd_shift() new_lc.check_epoch() if new_lc.epoch_cuts: # update test sample directory user_input['samples_dir'] = [test_dir] # update user choices new_lc.user_choices = user_input # update number of samples new_lc.user_choices['n_samples'] = [nsamples] # fit GP or normalize/shift fitted mean test_matrix = test_samples(new_lc, calc_samples=bool(csamples)) # project test new_lc.test_proj = matrix.transf_test.transform(test_matrix) # classify new_lc.new_label = nneighbor(new_lc.test_proj, matrix.low_dim_matrix, matrix.sntype, matrix.user_choices) if csamples: new_lc.prob_Ia = sum([1 for item in new_label if item == '0' ]) / float(nsamples) return new_lc else: return None
def check_file(self, filename, epoch=True, ref_filter=None): """ Construct one line of the data matrix. input: filename, str file of raw data for 1 supernova epoch, bool - optional If true, check if SN satisfies epoch cuts Default is True ref_filter, str - optional Reference filter for peak MJD calculation Default is None """ screen('Fitting ' + filename, self.user_choices) # translate identifier self.user_choices['path_to_lc'] = [ translate_snid(filename, self.user_choices['photon_flag'][0])[0] ] # read light curve raw data raw = read_snana_lc(self.user_choices) # initiate light curve object lc_obj = LC(raw, self.user_choices) # load GP fit lc_obj.load_fit_GP(self.user_choices['samples_dir'][0] + filename) # normalize lc_obj.normalize(ref_filter=ref_filter) # shift to peak mjd lc_obj.mjd_shift() if epoch: # check epoch requirements lc_obj.check_epoch() else: lc_obj.epoch_cuts = True if lc_obj.epoch_cuts: # build data matrix lines lc_obj.build_steps() # store obj_line = [] for fil in self.user_choices['filters']: for item in lc_obj.flux_for_matrix[fil]: obj_line.append(item) rflag = self.user_choices['redshift_flag'][0] redshift = raw[rflag][0] obj_class = raw[self.user_choices['type_flag'][0]][0] self.snid.append(raw['SNID:'][0]) return obj_line, redshift, obj_class else: screen('... Failed to pass epoch cuts!', self.user_choices) screen('\n', self.user_choices) return None
def main(args): """ Construct 'fake' training. Use photometric simulated sample to guess proportions between spectroscopic sample classes. """ # read user input user_choices = read_user_input(args.input) ########################################################################## # Spec # build complete spec list screen('Building spectroscopic sample.', user_choices) user_choices['sample_cut'] = ['1', '3', '21', '22', '23', '32', '33'] spec_list = choose_sn(user_choices, output_file='spec_' + \ user_choices['epoch_cut'][0] + '_' + \ user_choices['epoch_cut'][1] '.list') # check population according to type spec_pop = check_pop('spec.list', user_choices) # count spec classes surviving selection cuts surv_spec = check_fitted(user_choices['samples_dir'][0], user_choices) ########################################################################## # Photo #build complete photo list screen('Build photometric samples.', user_choices) user_choices['sample_cut'] = ['-9'] name_plist = 'photo_' + user_choices['epoch_cut'][0] + '_' + \ user_choices['epoch_cut'][1] '.list' photo_list = choose_sn(user_choices, output_file=name_plist) # check population according to type photo_pop = check_pop(name_plist, user_choices) photo_frac = calc_fraction(photo_pop) ########################################################################## # Building fake training sample screen('Checking compatibility.', user_choices) # construct number of SN expected in spec sample spec_num = {} for item in photo_pop.keys(): spec_num[item] = int(np.round(sum(spec_pop.values()) * photo_frac[item])) #construct synthetic spec data directory synthetic_dir = args.dir if not os.path.isdir(synthetic_dir): os.makedirs(synthetic_dir) # collect gp objects gp_objs = {} #run through all types for key in spec_num.keys(): # start cont of failed tries fail = 0 if key in surv_spec.keys(): # check which objs and samples were already calculated ready = check_mean_GP(key, surv_spec, spec_num, user_choices, synthetic_dir) cont = len(ready) while cont < spec_num[key]: my_lc = test_epoch(key, surv_spec, user_choices) mean_name = synthetic_dir + '/' + \ user_choices['file_root'][0] + my_lc[1]['SNID:'][0] + \ '_mean.dat' screen('... This is SN type ' + my_lc[1]['SIM_NON1a:'][0] + \ ' number ' + str(len(ready) + 1) + ' of ' + str(spec_num[key]), user_choices) if my_lc[0].epoch_cuts and mean_name not in ready: ready.append(mean_name) cont = len(ready) shutil.copy2(user_choices['samples_dir'][0] + '/' + \ user_choices['file_root'][0] + \ my_lc[1]['SNID:'][0] + '_mean.dat', mean_name) screen('\n', user_choices) else: # build GP object raw, gp_objs = setup_gp(my_lc[1], user_choices, gp_objs) # set new file root raw['file_root'] = [user_choices['file_root'][0] + \ str(len(ready)) + 'X'] raw['samples_dir'] = [synthetic_dir + '/'] save_result(raw) # check epoch for this realization new_lc = LC(raw, user_choices) new_lc.load_fit_GP(synthetic_dir + '/' + \ user_choices['file_root'][0] + str(cont) + \ 'X' + raw['SNID:'][0] + '_mean.dat') new_lc.normalize() new_lc.mjd_shift() new_lc.check_epoch() if new_lc.epoch_cuts: ready.append(synthetic_dir + '/' + user_choices['file_root'][0] + str(cont) + \ 'X' + raw['SNID:'][0] + '_mean.dat') cont = len(ready) screen('\n', user_choices) else: os.remove(synthetic_dir + '/' + \ user_choices['file_root'][0] + str(cont) + \ 'X' + raw['SNID:'][0] + '_mean.dat') fail = fail + 1 screen(str(fail) + ' samples failed to pass epoch cuts!\n', user_choices) if fail > 10 * spec_num[key]: cont = 100000