Ejemplo n.º 1
0
def test_epoch(key, surv, user_choices):
    """
    Check if previously calculated GP fit survives selection cuts.
    
    input: key, str
           object type

           surv, dict
           dictionary of objects surviving basic cuts
           keys are types, values are GP fit mean file name

           user_choices, dict
           output from snclass.util.read_user_input

    output: my_lc, LC object
            updated light curve object after checked for epoch cuts            
    """
    # sample a random obj in the training sample
    indx = np.random.randint(0, len(surv[key]))
    name = surv[key][indx]

    # determine fitting method
    fit_method = bool(int(user_choices['do_mcmc'][0]))

    # update path to raw data
    user_choices['path_to_lc'] = [name]

    # read light curve raw data
    raw = read_snana_lc(user_choices)

    # update raw data with user choices
    raw.update(user_choices)

    # set number of samples to 0 (we are only interested in the mean for now)
    raw['n_samples'] = ['0']

    # initiate light curve object
    my_lc = LC(raw, user_choices)

    screen('Fitting SN' + raw['SNID:'][0], user_choices)

    # load GP fit
    my_lc.load_fit_GP(user_choices['samples_dir'][0] + '/DES_SN' + raw['SNID:'][0] + '_mean.dat')

    # normalize
    my_lc.normalize()

    # shift to peak mjd
    my_lc.mjd_shift()

    # check epoch requirements
    my_lc.check_epoch()

    return my_lc, raw
Ejemplo n.º 2
0
def select_GP(params, user_choices):
    """
    Select original objs to build a synthetic spectroscopic sample.

    input: params, dict
           output from set_paramameters

           user_choices, dict
           output from snclass.util.read_user_input
    """
    from snclass.util import translate_snid, read_snana_lc
    from snclass.functions import screen
    from snclass.treat_lc import LC
    from snclass.fit_lc_gptools import save_result

    import os
    import numpy as np
    import sys

    # set reference filter
    if user_choices['ref_filter'][0] == 'None':
        fil_choice = None
    else:
        fil_choice = user_choices['ref_filter'][0]

    # select extra GP realizations in order to construct
    # a representative spec sample
    for key in params['draw_spec_samples'].keys():
        cont = 0
        fail = 0

        # check if there are existing objs in this sample
        screen('... Check existing objs', user_choices)
        ready = []
        for obj in params['surv_spec_names'][key]:
            obj_id = translate_snid(obj)

            for j in xrange(params['draw_spec_samples'][key]):
                mean_file = params['synthetic_dir'] + '/' + \
                            user_choices['file_root'][0] + str(j) + \
                            'X' + obj_id + '_mean.dat'

                if os.path.isfile(mean_file) and mean_file not in ready:
                    cont = cont + 1
                    ready.append(mean_file)
                    screen('Found ready SN ' + str(cont) + 'X' + \
                           obj_id, user_choices)

        while cont < params['draw_spec_samples'][key]:

            # draw one of the objs in the spec sample
            indx = np.random.randint(0, params['spec_pop'][key])
            name = params['surv_spec_names'][key][indx]

            user_choices['path_to_lc'] = [name]

            # read light curve raw data
            raw = read_snana_lc(user_choices)

            if os.path.isfile(params['fitted_data_dir'] + user_choices['file_root'][0] + \
                              raw['SNID:'][0] + '_samples.dat'):

                # initiate light curve object
                my_lc = LC(raw, user_choices)

                screen('Loading SN' + raw['SNID:'][0], user_choices)

                # load GP fit
                my_lc.user_choices['n_samples'] = ['100']
                my_lc.user_choices['samples_dir'] = [params['fitted_data_dir']]
                my_lc.load_fit_GP(params['fitted_data_dir'] + user_choices['file_root'][0] + \
                                  raw['SNID:'][0] + '_mean.dat')

                l1 = [
                    1 if len(my_lc.fitted['GP_fit'][fil]) > 0 else 0
                    for fil in user_choices['filters']
                ]
                if sum(l1) == len(user_choices['filters']):

                    # normalize
                    my_lc.normalize(samples=True, ref_filter=fil_choice)

                    # shift to peak mjd
                    my_lc.mjd_shift()

                    # check epoch requirements
                    my_lc.check_epoch()

                    if my_lc.epoch_cuts:

                        screen('... Passed epoch cuts', user_choices)
                        screen('... ... This is SN type ' +  raw[user_choices['type_flag'][0]][0] + \
                               ' number ' + str(cont + 1) + ' of ' +
                               str(params['draw_spec_samples'][key]), user_choices)

                        # draw one realization
                        size = len(my_lc.fitted['realizations'][
                            user_choices['filters'][0]])
                        indx2 = np.random.randint(0, size)

                        for fil in user_choices['filters']:
                            print '... ... ... filter ' + fil

                            raw['GP_fit'][fil] = my_lc.fitted['realizations'][
                                fil][indx2]
                            raw['GP_std'][fil] = my_lc.fitted['GP_std'][fil]
                            raw['xarr'][fil] = my_lc.fitted['xarr'][fil]

                        # set new file root
                        raw['file_root'] = [user_choices['file_root'][0] + \
                                             str(cont) + 'X']
                        raw['samples_dir'] = [params['synthetic_dir'] + '/']
                        save_result(raw)

                        # check epoch for this realization
                        new_lc = LC(raw, user_choices)
                        new_lc.load_fit_GP(params['synthetic_dir'] + '/' + \
                                       user_choices['file_root'][0] + str(cont) + \
                                       'X' + raw['SNID:'][0] + '_mean.dat')
                        new_lc.normalize(ref_filter=fil_choice)
                        new_lc.mjd_shift()
                        new_lc.check_epoch()

                        if new_lc.epoch_cuts:
                            cont = cont + 1
                        else:
                            screen('Samples failed to pass epoch cuts!\n',
                                   user_choices)
                            os.remove(params['synthetic_dir'] + '/' +
                                      user_choices['file_root'][0] + str(cont) + \
                                  'X' + raw['SNID:'][0] + '_mean.dat')
                        print '\n'

                    else:
                        screen('Failed to pass epoch cuts!\n', user_choices)
                        fail = fail + 1

                    if fail > 10 * params['spec_pop'][key]:
                        cont = 100000
                        sys.exit()
Ejemplo n.º 3
0
def build_sample(params):
    """
    Build a directory holding all raw data passing selection cuts.

    input: params, dict
           keywords:  'raw_dir' -> new directory to be created
                      'photo_dir' -> photometric LC fitted with GP
                      'spec_dir' -> sectroscopic LC fitted with GP
                      'user_choices' -> output from 
                                        snclass.util.read_user_input
    """
    import shutil
    from snclass.util import read_user_input, read_snana_lc, translate_snid
    from snclass.treat_lc import LC
    from snclass.functions import screen

    # create data directory
    if not os.path.isdir(params['raw_dir']):
        os.makedirs(params['raw_dir'])

    # read fitted light curves
    photo_list = os.listdir(params['photo_dir'])
    spec_list = os.listdir(params['spec_dir'])

    # build filter list
    fil_list = params['user_choices']['filters'][0]
    for i in xrange(1, len(params['user_choices']['filters'])):
        fil_list = fil_list + params['user_choices']['filters'][i]

    for sn_set in [photo_list, spec_list]:
        for obj in sn_set:
            if 'samples' in obj and '~' not in obj and 'Y' not in obj:

                screen(obj, params['user_choices'])

                rname = translate_snid(obj)[0]
                params['user_choices']['path_to_lc'] = [rname]
                params['user_choices']['n_samples'] = ['0']

                # read raw data
                raw = read_snana_lc(params['user_choices'])
                new_lc = LC(raw, params['user_choices'])

                # load GP fit
                if sn_set == photo_list:
                    new_lc.load_fit_GP(photo_dir +
                                       params['user_choices']['file_root'][0] +
                                       raw['SNID:'][0] + '_mean.dat')
                else:
                    new_lc.load_fit_GP(spec_dir +
                                       params['user_choices']['file_root'][0] +
                                       raw['SNID:'][0] + '_mean.dat')

                l1 = [
                    1 if len(new_lc.fitted['GP_fit'][fil]) > 0 else 0
                    for fil in params['user_choices']['filters']
                ]

                if sum(l1) == len(params['user_choices']['filters']):
                    # treat light curve
                    new_lc.normalize(ref_filter= \
                                     params['user_choices']['ref_filter'][0])
                    new_lc.mjd_shift()
                    new_lc.check_basic()
                    new_lc.check_epoch()

                    # check epoch cuts
                    data_path = params['user_choices']['path_to_obs'][0]
                    if new_lc.epoch_cuts:
                        shutil.copy2(data_path + rname, raw_dir + rname)
                    else:
                        screen('... SN' + raw['SNID:'][0] + \
                               ' fail to pass epoch cuts!',
                               params['user_choices'])
Ejemplo n.º 4
0
def classify_test(test_name,
                  matrix,
                  user_input,
                  test_dir='test_samples/',
                  csamples=True):
    """
    Classify one photometric supernova using a trained KernelPCA matrix.

    input: test_name, str
           name of mean GP fit file

           matrix, snclass.matrix.DataMatrix object
           trained KernelPCA matrix

           user_input, dict
           output from snclass.util.read_user_input

           test_dir, str, optional
           name of directory to store samples from test light curve
           Default is 'test_samples/'

           csamples, bool, optional
           If True, fit GP object and generate sample file as output
           otherwise reads samples from file
           Default is True

    return: new_lc, snclass.treat_lc.LC object
            updated with test projections and probability of being Ia
    """
    # update path to raw light curve
    user_input['path_to_lc'] = [translate_snid(test_name, 'FLUXCAL')[0]]

    # store number of samples for latter tests
    nsamples = user_input['n_samples'][0]

    # reset the number of samples for preliminary tests
    user_input['n_samples'] = ['0']

    # read raw data
    raw = read_snana_lc(user_input)

    # load GP fit and test epoch cuts
    new_lc = LC(raw, user_input)
    new_lc.load_fit_GP(user_input['samples_dir'][0] + test_name)
    new_lc.normalize()
    new_lc.mjd_shift()
    new_lc.check_epoch()

    if new_lc.epoch_cuts:
        # update test sample directory
        user_input['samples_dir'] = [test_dir]

        # update user choices
        new_lc.user_choices = user_input

        # update number of samples
        new_lc.user_choices['n_samples'] = [nsamples]

        # fit GP or normalize/shift fitted mean
        test_matrix = test_samples(new_lc, calc_samples=bool(csamples))

        # project test
        new_lc.test_proj = matrix.transf_test.transform(test_matrix)

        # classify
        new_lc.new_label = nneighbor(new_lc.test_proj, matrix.low_dim_matrix,
                                     matrix.sntype, matrix.user_choices)

        if csamples:
            new_lc.prob_Ia = sum([1 for item in new_label if item == '0'
                                  ]) / float(nsamples)

        return new_lc

    else:
        return None
Ejemplo n.º 5
0
    def check_file(self, filename, epoch=True, ref_filter=None):
        """
        Construct one line of the data matrix.

        input:   filename, str
                 file of raw data for 1 supernova

                 epoch, bool - optional
                 If true, check if SN satisfies epoch cuts
                 Default is True

                 ref_filter, str - optional
                 Reference filter for peak MJD calculation
                 Default is None
        """
        screen('Fitting ' + filename, self.user_choices)

        # translate identifier
        self.user_choices['path_to_lc'] = [
            translate_snid(filename, self.user_choices['photon_flag'][0])[0]
        ]

        # read light curve raw data
        raw = read_snana_lc(self.user_choices)

        # initiate light curve object
        lc_obj = LC(raw, self.user_choices)

        # load GP fit
        lc_obj.load_fit_GP(self.user_choices['samples_dir'][0] + filename)

        # normalize
        lc_obj.normalize(ref_filter=ref_filter)

        # shift to peak mjd
        lc_obj.mjd_shift()

        if epoch:
            # check epoch requirements
            lc_obj.check_epoch()
        else:
            lc_obj.epoch_cuts = True

        if lc_obj.epoch_cuts:
            # build data matrix lines
            lc_obj.build_steps()

            # store
            obj_line = []
            for fil in self.user_choices['filters']:
                for item in lc_obj.flux_for_matrix[fil]:
                    obj_line.append(item)

            rflag = self.user_choices['redshift_flag'][0]
            redshift = raw[rflag][0]

            obj_class = raw[self.user_choices['type_flag'][0]][0]

            self.snid.append(raw['SNID:'][0])

            return obj_line, redshift, obj_class

        else:
            screen('... Failed to pass epoch cuts!', self.user_choices)
            screen('\n', self.user_choices)
            return None
Ejemplo n.º 6
0
def main(args):
    """
    Construct 'fake' training.

    Use photometric simulated sample to guess proportions between
    spectroscopic sample classes.
    """
    # read user input
    user_choices = read_user_input(args.input)

    ##########################################################################
    # Spec

    # build complete spec list
    screen('Building spectroscopic sample.', user_choices)
    user_choices['sample_cut'] = ['1', '3', '21', '22', '23', '32', '33']
    spec_list = choose_sn(user_choices, output_file='spec_' + \
                          user_choices['epoch_cut'][0] + '_' + \
                          user_choices['epoch_cut'][1] '.list')

    # check population according to type
    spec_pop = check_pop('spec.list', user_choices)

    # count spec classes surviving selection cuts
    surv_spec = check_fitted(user_choices['samples_dir'][0], user_choices)

    ##########################################################################
    # Photo

    #build complete photo list
    screen('Build photometric samples.', user_choices)
    user_choices['sample_cut'] = ['-9']
    name_plist = 'photo_' + user_choices['epoch_cut'][0] + '_' + \
                 user_choices['epoch_cut'][1] '.list'
    photo_list = choose_sn(user_choices, output_file=name_plist)

    # check population according to type
    photo_pop = check_pop(name_plist, user_choices)
    photo_frac = calc_fraction(photo_pop)

    ##########################################################################
    # Building fake training sample

    screen('Checking compatibility.', user_choices)

    # construct number of SN expected in spec sample
    spec_num = {}
    for item in photo_pop.keys():
        spec_num[item] = int(np.round(sum(spec_pop.values()) * photo_frac[item]))

    #construct synthetic spec data directory
    synthetic_dir = args.dir
    if not os.path.isdir(synthetic_dir):
        os.makedirs(synthetic_dir)

    # collect gp objects
    gp_objs = {}

    #run through all types
    for key in spec_num.keys():

        # start cont of failed tries
        fail = 0

        if key in surv_spec.keys():

            # check which objs and samples were already calculated
            ready = check_mean_GP(key, surv_spec, spec_num, user_choices, synthetic_dir)

            cont = len(ready)

            while cont < spec_num[key]:
  
                my_lc = test_epoch(key, surv_spec, user_choices)

                mean_name = synthetic_dir + '/' + \
                            user_choices['file_root'][0] + my_lc[1]['SNID:'][0] + \
                           '_mean.dat'

                screen('... This is SN type ' +  my_lc[1]['SIM_NON1a:'][0] + \
                       ' number ' + str(len(ready) + 1) + ' of ' + 
                       str(spec_num[key]), user_choices) 

                if my_lc[0].epoch_cuts and mean_name not in ready:
                    ready.append(mean_name)
                    cont = len(ready)

                    shutil.copy2(user_choices['samples_dir'][0] + '/' + \
                                 user_choices['file_root'][0] + \
                                 my_lc[1]['SNID:'][0] + '_mean.dat',
                                 mean_name)
                    screen('\n', user_choices)

                else:                

                    # build GP object
                    raw, gp_objs = setup_gp(my_lc[1], user_choices, gp_objs)

                    # set new file root
                    raw['file_root'] = [user_choices['file_root'][0] + \
                                        str(len(ready)) + 'X']
                    raw['samples_dir'] = [synthetic_dir + '/']
                    save_result(raw)

                    # check epoch for this realization
                    new_lc = LC(raw, user_choices)
                    new_lc.load_fit_GP(synthetic_dir + '/' + \
                                       user_choices['file_root'][0] + str(cont) + \
                                       'X' + raw['SNID:'][0] + '_mean.dat')
                    new_lc.normalize()
                    new_lc.mjd_shift()
                    new_lc.check_epoch()

                    if new_lc.epoch_cuts:
                        ready.append(synthetic_dir + '/' +
                                     user_choices['file_root'][0] + str(cont) + \
                                     'X' + raw['SNID:'][0] + '_mean.dat')
                        cont = len(ready)
                        screen('\n', user_choices)

                    else:
                        os.remove(synthetic_dir + '/' + \
                                  user_choices['file_root'][0] + str(cont) + \
                                  'X' + raw['SNID:'][0] + '_mean.dat')
                        fail = fail + 1
                        screen(str(fail) + ' samples failed to pass epoch cuts!\n', user_choices)

                        if fail > 10 * spec_num[key]:
                            cont = 100000