Exemple #1
0
def apply_sn_classifier(xml_fpath):
    """ Apply Dovi SN classifier to the source described in the given vosource.xml filepath

    """
    signals_list = []
    gen = generators_importers.from_xml(signals_list)
    gen.generate(xml_handle=os.path.expandvars(xml_fpath))
    gen.sig.add_features_to_xml_string(gen.signals_list)
    #NOTE: this is a string:   gen.sig.xml_string

    # For TESTING:
    #d = mlens3.EventData(os.path.abspath(os.environ.get("TCP_DIR") + "/Data/vosource_tutor12881.xml"))

    d = mlens3.EventData(gen.sig.xml_string)

    #gen.sig.write_xml(out_xml_fpath=new_xml_fpath)

    #NOTE: print gen.sig.x_sdict.keys()
    #['feat_gen_date', 'src_id', 'ra', 'features', 'feature_docs', 'dec', 'dec_rms', 'class', 'ra_rms', 'ts']

    sn = snlc_classifier.Dovi_SN(datamodel=d,
                                 x_sdict=gen.sig.x_sdict,
                                 doplot=False)  #,doplot=True)

    return sn.final_results
def apply_sn_classifier(xml_fpath):
    """ Apply Dovi SN classifier to the source described in the given vosource.xml filepath

    """
    signals_list = []
    gen = generators_importers.from_xml(signals_list)
    gen.generate(xml_handle=os.path.expandvars(xml_fpath))
    gen.sig.add_features_to_xml_string(gen.signals_list)
    #NOTE: this is a string:   gen.sig.xml_string


    # For TESTING:
    #d = mlens3.EventData(os.path.abspath(os.environ.get("TCP_DIR") + "/Data/vosource_tutor12881.xml"))

    d = mlens3.EventData(gen.sig.xml_string)

    #gen.sig.write_xml(out_xml_fpath=new_xml_fpath)

    #NOTE: print gen.sig.x_sdict.keys()
    #['feat_gen_date', 'src_id', 'ra', 'features', 'feature_docs', 'dec', 'dec_rms', 'class', 'ra_rms', 'ts']

    sn =  snlc_classifier.Dovi_SN(datamodel=d,x_sdict=gen.sig.x_sdict,doplot=False)#,doplot=True)

    return sn.final_results
    def generate_arff_using_asasdat(self, data_fpaths=[], include_arff_header=False, arff_output_fp=None, skip_class=False):
        """ Given a list of ASAS data file filepaths, for each source/file:
        - choose the optimal aperture, depending upon median magnitude,
        - exclude bad/flagged epochs
        - generate features from timeseries (placing in intermediate XML-string format)
        - collect resulting features for all given sources, and place in ARFF style file
              which will later be read by ML training/classification code.
              
        Partially adapted from: TCP/Software/citris33/arff_generation_master_using_generic_ts_data.py:get_dat_arffstrs()
        """
        from get_colors_for_tutor_sources import Parse_Nomad_Colors_List
        import tutor_database_project_insert
        adt = tutor_database_project_insert.ASAS_Data_Tools(pars=self.pars)
        adt.frame_limitmags = self.retrieve_limitmags_from_pkl()

        sys.path.append(os.environ.get('TCP_DIR') + '/Software/feature_extract/MLData')
        #sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + '/Software/feature_extract/Code/extractors'))
        #print os.environ.get("TCP_DIR")
        import arffify

        sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                      'Software/feature_extract/Code'))
        import db_importer
        from data_cleaning import sigmaclip_sdict_ts
        sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                      'Software/feature_extract'))
        from Code import generators_importers

        master_list = []
        master_features_dict = {}
        all_class_list = []
        master_classes_dict = {}

        for dat_fpath in data_fpaths:
            ### This truncates at file extension '.'
            #new_srcid = "'" + dat_fpath[dat_fpath.rfind('/')+1:dat_fpath.rfind('.')] + "'"
            #### This is for ACVS files, which have '.' in the source-name:
            new_srcid = "'" + dat_fpath[dat_fpath.rfind('/')+1:] + "'"
            ts_str = open(dat_fpath).read()
            source_intermed_dict = adt.parse_asas_ts_data_str(ts_str)
            mag_data_dict = adt.filter_best_ts_aperture(source_intermed_dict)
            try:
                xml_str = self.form_xml_string(mag_data_dict)
            except:
                print "FAILED:form_xml_string()", dat_fpath
                continue # skip this source
            #from get_colors_for_tutor_sources import Parse_Nomad_Colors_List
            #ParseNomadColorsList = Parse_Nomad_Colors_List(fpath=os.path.abspath(os.environ.get("TCP_DIR") + '/Data/best_nomad_src_list'))
            ParseNomadColorsList = Parse_Nomad_Colors_List(fpath='/home/dstarr/src/TCP/Data/best_nomad_src_for_asas_kepler')

            ### Generate the features:
            signals_list = []
            gen = generators_importers.from_xml(signals_list)
            if 1:
                #import pdb; pdb.set_trace()
                #print
                new_xml_str = ParseNomadColorsList.get_colors_for_srcid(xml_str=xml_str, srcid=new_srcid)

            gen.generate(xml_handle=new_xml_str)
            gen.sig.add_features_to_xml_string(signals_list)                
            gen.sig.x_sdict['src_id'] = new_srcid
            dbi_src = db_importer.Source(make_dict_if_given_xml=False)
            dbi_src.source_dict_to_xml(gen.sig.x_sdict)

            xml_fpath = dbi_src.xml_string

            a = arffify.Maker(search=[], skip_class=skip_class, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False)
            out_dict = a.generate_arff_line_for_vosourcexml(num=new_srcid, xml_fpath=xml_fpath)

            master_list.append(out_dict)
            all_class_list.append(out_dict['class'])
            master_classes_dict[out_dict['class']] = 0
            for feat_tup in out_dict['features']:
                master_features_dict[feat_tup] = 0 # just make sure there is this key in the dict.  0 is filler


        master_features = master_features_dict.keys()
        master_classes = master_classes_dict.keys()
        a = arffify.Maker(search=[], skip_class=skip_class, local_xmls=True, 
                          convert_class_abrvs_to_names=False,
                          flag_retrieve_class_abrvs_from_TUTOR=False,
                          dorun=False, add_srcid_to_arff=True)
        a.master_features = master_features
        a.all_class_list = all_class_list
        a.master_classes = master_classes
        a.master_list = master_list


        a.write_arff(outfile=arff_output_fp, \
                     remove_sparse_classes=True, \
                     n_sources_needed_for_class_inclusion=1,
                     include_header=include_arff_header,
                     use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False)
Exemple #4
0
     - e.g. .bashrc/execute for bash:
                export TCP_DIR=/home/pteluser/src/TCP/
"""

import os, sys

sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                                      'Software/feature_extract'))
sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
              'Software/feature_extract/Code'))

from Code import generators_importers, feature_interfaces

signals_list = []

gen = generators_importers.from_xml(signals_list)
gen.generate(xml_handle="../../Data/source_5.xml",register=False)
gen.sig.add_features_to_xml_string(gen.signals_list)

interface = feature_interfaces.feature_interface
noisify_extr = interface.request_extractor('noisify') # grab the noisifying extractor from the interface

for i in range(len(signals_list)):
	signal = signals_list[i]
	signal.register_signal(initialize=False)
	
interface.notify(noisify_extr)

def fetch_noisified(signal, band):
	noisified = signal.properties['data'][band]['inter']['noisify'].result
	return noisified
            result_dict = self.PTF_LCSN_classifier(data, z=z, dz=dz)
        #except:
        #    result_dict = {}
        self.final_results = result_dict

if __name__ == '__main__':

    ##### To (re)generate features for given vosource.xml, it's easiest to use db_importer.py:
    #     The result from this (among other things) is a feature-added xml_string
    sys.path.append(os.environ.get("TCP_DIR") + '/Software/feature_extract')
    sys.path.append(
        os.environ.get("TCP_DIR") + '/Software/feature_extract/Code')
    #import db_importer
    from Code import generators_importers
    signals_list = []
    gen = generators_importers.from_xml(signals_list)
    gen.generate(xml_handle=os.path.expandvars(
        "$HOME/scratch/sn_tutor_vosource_xmls/vosource_21293.xml"))
    gen.sig.add_features_to_xml_string(gen.signals_list)
    #NOTE: this is a string:   gen.sig.xml_string

    # For TESTING:

    sys.path.append(
        os.environ.get("TCP_DIR") +
        '/Software/feature_extract/Code/extractors')
    import mlens3  # for microlensing classification
    #d = mlens3.EventData(os.path.abspath(os.environ.get("TCP_DIR") + "/Data/vosource_tutor12881.xml"))
    #d = mlens3.EventData(os.path.abspath(os.environ.get("TCP_DIR") + "/Data/vosource_sn_withsdssfeats_2004az.xml"))
    d = mlens3.EventData(gen.sig.xml_string)
    #d = mlens3.EventData(os.path.abspath("/tmp/test_feature_algorithms.VOSource.xml"))
    def generate_arff_using_asasdat(self, data_fpaths=[], include_arff_header=False, arff_output_fp=None):
        """ Given a list of LINEAR data file filepaths, for each source/file:
        - choose the optimal aperture, depending upon median magnitude <---only for ASAS!!!
        - exclude bad/flagged epochs
        - generate features from timeseries (placing in intermediate XML-string format)
        - collect resulting features for all given sources, and place in ARFF style file
              which will later be read by ML training/classification code.
              
        Partially adapted from: TCP/Software/citris33/arff_generation_master_using_generic_ts_data.py:get_dat_arffstrs()
        """
        import tutor_database_project_insert
        adt = tutor_database_project_insert.ASAS_Data_Tools(pars=pars)
        adt.frame_limitmags = self.retrieve_limitmags_from_pkl()


        sys.path.append(os.environ.get('TCP_DIR') + '/Software/feature_extract/MLData')
        #sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + '/Software/feature_extract/Code/extractors'))
        #print os.environ.get("TCP_DIR")
        import arffify

        sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                      'Software/feature_extract/Code'))
        import db_importer
        from data_cleaning import sigmaclip_sdict_ts
        sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                      'Software/feature_extract'))
        from Code import generators_importers

        master_list = []
        master_features_dict = {}
        all_class_list = []
        master_classes_dict = {}

        for dat_fpath in data_fpaths:
            new_srcid = dat_fpath[dat_fpath.rfind('/')+1:dat_fpath.rfind('.dat')]
            ts_str = open(dat_fpath).read()
            source_intermed_dict = adt.parse_asas_ts_data_str(ts_str)
            """mag_data_dict = adt.filter_best_ts_aperture(source_intermed_dict)
            """
            # Need to have a function like this for LINEAR data:
            
            xml_str = self.form_xml_string(mag_data_dict)
            
            ### Generate the features:
            signals_list = []
            gen = generators_importers.from_xml(signals_list)
            gen.generate(xml_handle=xml_str)
            gen.sig.add_features_to_xml_string(signals_list)                
            gen.sig.x_sdict['src_id'] = new_srcid
            dbi_src = db_importer.Source(make_dict_if_given_xml=False)
            dbi_src.source_dict_to_xml(gen.sig.x_sdict)

            xml_fpath = dbi_src.xml_string

            a = arffify.Maker(search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False)
            out_dict = a.generate_arff_line_for_vosourcexml(num=new_srcid, xml_fpath=xml_fpath)

            master_list.append(out_dict)
            all_class_list.append(out_dict['class'])
            master_classes_dict[out_dict['class']] = 0
            for feat_tup in out_dict['features']:
                master_features_dict[feat_tup] = 0 # just make sure there is this key in the dict.  0 is filler


        master_features = master_features_dict.keys()
        master_classes = master_classes_dict.keys()
        a = arffify.Maker(search=[], skip_class=False, local_xmls=True, 
                          convert_class_abrvs_to_names=False,
                          flag_retrieve_class_abrvs_from_TUTOR=False,
                          dorun=False, add_srcid_to_arff=True)
        a.master_features = master_features
        a.all_class_list = all_class_list
        a.master_classes = master_classes
        a.master_list = master_list


        a.write_arff(outfile=arff_output_fp, \
                     remove_sparse_classes=True, \
                     n_sources_needed_for_class_inclusion=1,
                     include_header=include_arff_header,
                     use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False)
Exemple #7
0
    def generate_arff_using_asasdat(self,
                                    data_fpaths=[],
                                    include_arff_header=False,
                                    arff_output_fp=None):
        """ Given a list of LINEAR data file filepaths, for each source/file:
        - choose the optimal aperture, depending upon median magnitude <---only for ASAS!!!
        - exclude bad/flagged epochs
        - generate features from timeseries (placing in intermediate XML-string format)
        - collect resulting features for all given sources, and place in ARFF style file
              which will later be read by ML training/classification code.
              
        Partially adapted from: TCP/Software/citris33/arff_generation_master_using_generic_ts_data.py:get_dat_arffstrs()
        """
        import tutor_database_project_insert
        adt = tutor_database_project_insert.ASAS_Data_Tools(pars=pars)
        adt.frame_limitmags = self.retrieve_limitmags_from_pkl()

        sys.path.append(
            os.environ.get('TCP_DIR') + '/Software/feature_extract/MLData')
        #sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + '/Software/feature_extract/Code/extractors'))
        #print os.environ.get("TCP_DIR")
        import arffify

        sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                      'Software/feature_extract/Code'))
        import db_importer
        from data_cleaning import sigmaclip_sdict_ts
        sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                      'Software/feature_extract'))
        from Code import generators_importers

        master_list = []
        master_features_dict = {}
        all_class_list = []
        master_classes_dict = {}

        for dat_fpath in data_fpaths:
            new_srcid = dat_fpath[dat_fpath.rfind('/') +
                                  1:dat_fpath.rfind('.dat')]
            ts_str = open(dat_fpath).read()
            source_intermed_dict = adt.parse_asas_ts_data_str(ts_str)
            """mag_data_dict = adt.filter_best_ts_aperture(source_intermed_dict)
            """
            # Need to have a function like this for LINEAR data:

            xml_str = self.form_xml_string(mag_data_dict)

            ### Generate the features:
            signals_list = []
            gen = generators_importers.from_xml(signals_list)
            gen.generate(xml_handle=xml_str)
            gen.sig.add_features_to_xml_string(signals_list)
            gen.sig.x_sdict['src_id'] = new_srcid
            dbi_src = db_importer.Source(make_dict_if_given_xml=False)
            dbi_src.source_dict_to_xml(gen.sig.x_sdict)

            xml_fpath = dbi_src.xml_string

            a = arffify.Maker(search=[],
                              skip_class=False,
                              local_xmls=True,
                              convert_class_abrvs_to_names=False,
                              flag_retrieve_class_abrvs_from_TUTOR=False,
                              dorun=False)
            out_dict = a.generate_arff_line_for_vosourcexml(
                num=new_srcid, xml_fpath=xml_fpath)

            master_list.append(out_dict)
            all_class_list.append(out_dict['class'])
            master_classes_dict[out_dict['class']] = 0
            for feat_tup in out_dict['features']:
                master_features_dict[
                    feat_tup] = 0  # just make sure there is this key in the dict.  0 is filler

        master_features = master_features_dict.keys()
        master_classes = master_classes_dict.keys()
        a = arffify.Maker(search=[],
                          skip_class=False,
                          local_xmls=True,
                          convert_class_abrvs_to_names=False,
                          flag_retrieve_class_abrvs_from_TUTOR=False,
                          dorun=False,
                          add_srcid_to_arff=True)
        a.master_features = master_features
        a.all_class_list = all_class_list
        a.master_classes = master_classes
        a.master_list = master_list


        a.write_arff(outfile=arff_output_fp, \
                     remove_sparse_classes=True, \
                     n_sources_needed_for_class_inclusion=1,
                     include_header=include_arff_header,
                     use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False)