def apply_sn_classifier(xml_fpath): """ Apply Dovi SN classifier to the source described in the given vosource.xml filepath """ signals_list = [] gen = generators_importers.from_xml(signals_list) gen.generate(xml_handle=os.path.expandvars(xml_fpath)) gen.sig.add_features_to_xml_string(gen.signals_list) #NOTE: this is a string: gen.sig.xml_string # For TESTING: #d = mlens3.EventData(os.path.abspath(os.environ.get("TCP_DIR") + "/Data/vosource_tutor12881.xml")) d = mlens3.EventData(gen.sig.xml_string) #gen.sig.write_xml(out_xml_fpath=new_xml_fpath) #NOTE: print gen.sig.x_sdict.keys() #['feat_gen_date', 'src_id', 'ra', 'features', 'feature_docs', 'dec', 'dec_rms', 'class', 'ra_rms', 'ts'] sn = snlc_classifier.Dovi_SN(datamodel=d, x_sdict=gen.sig.x_sdict, doplot=False) #,doplot=True) return sn.final_results
def apply_sn_classifier(xml_fpath): """ Apply Dovi SN classifier to the source described in the given vosource.xml filepath """ signals_list = [] gen = generators_importers.from_xml(signals_list) gen.generate(xml_handle=os.path.expandvars(xml_fpath)) gen.sig.add_features_to_xml_string(gen.signals_list) #NOTE: this is a string: gen.sig.xml_string # For TESTING: #d = mlens3.EventData(os.path.abspath(os.environ.get("TCP_DIR") + "/Data/vosource_tutor12881.xml")) d = mlens3.EventData(gen.sig.xml_string) #gen.sig.write_xml(out_xml_fpath=new_xml_fpath) #NOTE: print gen.sig.x_sdict.keys() #['feat_gen_date', 'src_id', 'ra', 'features', 'feature_docs', 'dec', 'dec_rms', 'class', 'ra_rms', 'ts'] sn = snlc_classifier.Dovi_SN(datamodel=d,x_sdict=gen.sig.x_sdict,doplot=False)#,doplot=True) return sn.final_results
def generate_arff_using_asasdat(self, data_fpaths=[], include_arff_header=False, arff_output_fp=None, skip_class=False): """ Given a list of ASAS data file filepaths, for each source/file: - choose the optimal aperture, depending upon median magnitude, - exclude bad/flagged epochs - generate features from timeseries (placing in intermediate XML-string format) - collect resulting features for all given sources, and place in ARFF style file which will later be read by ML training/classification code. Partially adapted from: TCP/Software/citris33/arff_generation_master_using_generic_ts_data.py:get_dat_arffstrs() """ from get_colors_for_tutor_sources import Parse_Nomad_Colors_List import tutor_database_project_insert adt = tutor_database_project_insert.ASAS_Data_Tools(pars=self.pars) adt.frame_limitmags = self.retrieve_limitmags_from_pkl() sys.path.append(os.environ.get('TCP_DIR') + '/Software/feature_extract/MLData') #sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + '/Software/feature_extract/Code/extractors')) #print os.environ.get("TCP_DIR") import arffify sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract/Code')) import db_importer from data_cleaning import sigmaclip_sdict_ts sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract')) from Code import generators_importers master_list = [] master_features_dict = {} all_class_list = [] master_classes_dict = {} for dat_fpath in data_fpaths: ### This truncates at file extension '.' #new_srcid = "'" + dat_fpath[dat_fpath.rfind('/')+1:dat_fpath.rfind('.')] + "'" #### This is for ACVS files, which have '.' in the source-name: new_srcid = "'" + dat_fpath[dat_fpath.rfind('/')+1:] + "'" ts_str = open(dat_fpath).read() source_intermed_dict = adt.parse_asas_ts_data_str(ts_str) mag_data_dict = adt.filter_best_ts_aperture(source_intermed_dict) try: xml_str = self.form_xml_string(mag_data_dict) except: print "FAILED:form_xml_string()", dat_fpath continue # skip this source #from get_colors_for_tutor_sources import Parse_Nomad_Colors_List #ParseNomadColorsList = Parse_Nomad_Colors_List(fpath=os.path.abspath(os.environ.get("TCP_DIR") + '/Data/best_nomad_src_list')) ParseNomadColorsList = Parse_Nomad_Colors_List(fpath='/home/dstarr/src/TCP/Data/best_nomad_src_for_asas_kepler') ### Generate the features: signals_list = [] gen = generators_importers.from_xml(signals_list) if 1: #import pdb; pdb.set_trace() #print new_xml_str = ParseNomadColorsList.get_colors_for_srcid(xml_str=xml_str, srcid=new_srcid) gen.generate(xml_handle=new_xml_str) gen.sig.add_features_to_xml_string(signals_list) gen.sig.x_sdict['src_id'] = new_srcid dbi_src = db_importer.Source(make_dict_if_given_xml=False) dbi_src.source_dict_to_xml(gen.sig.x_sdict) xml_fpath = dbi_src.xml_string a = arffify.Maker(search=[], skip_class=skip_class, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False) out_dict = a.generate_arff_line_for_vosourcexml(num=new_srcid, xml_fpath=xml_fpath) master_list.append(out_dict) all_class_list.append(out_dict['class']) master_classes_dict[out_dict['class']] = 0 for feat_tup in out_dict['features']: master_features_dict[feat_tup] = 0 # just make sure there is this key in the dict. 0 is filler master_features = master_features_dict.keys() master_classes = master_classes_dict.keys() a = arffify.Maker(search=[], skip_class=skip_class, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False, add_srcid_to_arff=True) a.master_features = master_features a.all_class_list = all_class_list a.master_classes = master_classes a.master_list = master_list a.write_arff(outfile=arff_output_fp, \ remove_sparse_classes=True, \ n_sources_needed_for_class_inclusion=1, include_header=include_arff_header, use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False)
- e.g. .bashrc/execute for bash: export TCP_DIR=/home/pteluser/src/TCP/ """ import os, sys sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract')) sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract/Code')) from Code import generators_importers, feature_interfaces signals_list = [] gen = generators_importers.from_xml(signals_list) gen.generate(xml_handle="../../Data/source_5.xml",register=False) gen.sig.add_features_to_xml_string(gen.signals_list) interface = feature_interfaces.feature_interface noisify_extr = interface.request_extractor('noisify') # grab the noisifying extractor from the interface for i in range(len(signals_list)): signal = signals_list[i] signal.register_signal(initialize=False) interface.notify(noisify_extr) def fetch_noisified(signal, band): noisified = signal.properties['data'][band]['inter']['noisify'].result return noisified
result_dict = self.PTF_LCSN_classifier(data, z=z, dz=dz) #except: # result_dict = {} self.final_results = result_dict if __name__ == '__main__': ##### To (re)generate features for given vosource.xml, it's easiest to use db_importer.py: # The result from this (among other things) is a feature-added xml_string sys.path.append(os.environ.get("TCP_DIR") + '/Software/feature_extract') sys.path.append( os.environ.get("TCP_DIR") + '/Software/feature_extract/Code') #import db_importer from Code import generators_importers signals_list = [] gen = generators_importers.from_xml(signals_list) gen.generate(xml_handle=os.path.expandvars( "$HOME/scratch/sn_tutor_vosource_xmls/vosource_21293.xml")) gen.sig.add_features_to_xml_string(gen.signals_list) #NOTE: this is a string: gen.sig.xml_string # For TESTING: sys.path.append( os.environ.get("TCP_DIR") + '/Software/feature_extract/Code/extractors') import mlens3 # for microlensing classification #d = mlens3.EventData(os.path.abspath(os.environ.get("TCP_DIR") + "/Data/vosource_tutor12881.xml")) #d = mlens3.EventData(os.path.abspath(os.environ.get("TCP_DIR") + "/Data/vosource_sn_withsdssfeats_2004az.xml")) d = mlens3.EventData(gen.sig.xml_string) #d = mlens3.EventData(os.path.abspath("/tmp/test_feature_algorithms.VOSource.xml"))
def generate_arff_using_asasdat(self, data_fpaths=[], include_arff_header=False, arff_output_fp=None): """ Given a list of LINEAR data file filepaths, for each source/file: - choose the optimal aperture, depending upon median magnitude <---only for ASAS!!! - exclude bad/flagged epochs - generate features from timeseries (placing in intermediate XML-string format) - collect resulting features for all given sources, and place in ARFF style file which will later be read by ML training/classification code. Partially adapted from: TCP/Software/citris33/arff_generation_master_using_generic_ts_data.py:get_dat_arffstrs() """ import tutor_database_project_insert adt = tutor_database_project_insert.ASAS_Data_Tools(pars=pars) adt.frame_limitmags = self.retrieve_limitmags_from_pkl() sys.path.append(os.environ.get('TCP_DIR') + '/Software/feature_extract/MLData') #sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + '/Software/feature_extract/Code/extractors')) #print os.environ.get("TCP_DIR") import arffify sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract/Code')) import db_importer from data_cleaning import sigmaclip_sdict_ts sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract')) from Code import generators_importers master_list = [] master_features_dict = {} all_class_list = [] master_classes_dict = {} for dat_fpath in data_fpaths: new_srcid = dat_fpath[dat_fpath.rfind('/')+1:dat_fpath.rfind('.dat')] ts_str = open(dat_fpath).read() source_intermed_dict = adt.parse_asas_ts_data_str(ts_str) """mag_data_dict = adt.filter_best_ts_aperture(source_intermed_dict) """ # Need to have a function like this for LINEAR data: xml_str = self.form_xml_string(mag_data_dict) ### Generate the features: signals_list = [] gen = generators_importers.from_xml(signals_list) gen.generate(xml_handle=xml_str) gen.sig.add_features_to_xml_string(signals_list) gen.sig.x_sdict['src_id'] = new_srcid dbi_src = db_importer.Source(make_dict_if_given_xml=False) dbi_src.source_dict_to_xml(gen.sig.x_sdict) xml_fpath = dbi_src.xml_string a = arffify.Maker(search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False) out_dict = a.generate_arff_line_for_vosourcexml(num=new_srcid, xml_fpath=xml_fpath) master_list.append(out_dict) all_class_list.append(out_dict['class']) master_classes_dict[out_dict['class']] = 0 for feat_tup in out_dict['features']: master_features_dict[feat_tup] = 0 # just make sure there is this key in the dict. 0 is filler master_features = master_features_dict.keys() master_classes = master_classes_dict.keys() a = arffify.Maker(search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False, add_srcid_to_arff=True) a.master_features = master_features a.all_class_list = all_class_list a.master_classes = master_classes a.master_list = master_list a.write_arff(outfile=arff_output_fp, \ remove_sparse_classes=True, \ n_sources_needed_for_class_inclusion=1, include_header=include_arff_header, use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False)
def generate_arff_using_asasdat(self, data_fpaths=[], include_arff_header=False, arff_output_fp=None): """ Given a list of LINEAR data file filepaths, for each source/file: - choose the optimal aperture, depending upon median magnitude <---only for ASAS!!! - exclude bad/flagged epochs - generate features from timeseries (placing in intermediate XML-string format) - collect resulting features for all given sources, and place in ARFF style file which will later be read by ML training/classification code. Partially adapted from: TCP/Software/citris33/arff_generation_master_using_generic_ts_data.py:get_dat_arffstrs() """ import tutor_database_project_insert adt = tutor_database_project_insert.ASAS_Data_Tools(pars=pars) adt.frame_limitmags = self.retrieve_limitmags_from_pkl() sys.path.append( os.environ.get('TCP_DIR') + '/Software/feature_extract/MLData') #sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + '/Software/feature_extract/Code/extractors')) #print os.environ.get("TCP_DIR") import arffify sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract/Code')) import db_importer from data_cleaning import sigmaclip_sdict_ts sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract')) from Code import generators_importers master_list = [] master_features_dict = {} all_class_list = [] master_classes_dict = {} for dat_fpath in data_fpaths: new_srcid = dat_fpath[dat_fpath.rfind('/') + 1:dat_fpath.rfind('.dat')] ts_str = open(dat_fpath).read() source_intermed_dict = adt.parse_asas_ts_data_str(ts_str) """mag_data_dict = adt.filter_best_ts_aperture(source_intermed_dict) """ # Need to have a function like this for LINEAR data: xml_str = self.form_xml_string(mag_data_dict) ### Generate the features: signals_list = [] gen = generators_importers.from_xml(signals_list) gen.generate(xml_handle=xml_str) gen.sig.add_features_to_xml_string(signals_list) gen.sig.x_sdict['src_id'] = new_srcid dbi_src = db_importer.Source(make_dict_if_given_xml=False) dbi_src.source_dict_to_xml(gen.sig.x_sdict) xml_fpath = dbi_src.xml_string a = arffify.Maker(search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False) out_dict = a.generate_arff_line_for_vosourcexml( num=new_srcid, xml_fpath=xml_fpath) master_list.append(out_dict) all_class_list.append(out_dict['class']) master_classes_dict[out_dict['class']] = 0 for feat_tup in out_dict['features']: master_features_dict[ feat_tup] = 0 # just make sure there is this key in the dict. 0 is filler master_features = master_features_dict.keys() master_classes = master_classes_dict.keys() a = arffify.Maker(search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False, add_srcid_to_arff=True) a.master_features = master_features a.all_class_list = all_class_list a.master_classes = master_classes a.master_list = master_list a.write_arff(outfile=arff_output_fp, \ remove_sparse_classes=True, \ n_sources_needed_for_class_inclusion=1, include_header=include_arff_header, use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False)