def _get_timeseries(self): if self.use_xml: if not self.gotxml or self.outname is None: print "dont have the xml to build the timeseries" return try: self.b = db_importer.Source(xml_handle=self.outname) except: print "timeseries import failed. Check your XML file. Maybe rm %s" % self.outname return kk = self.b.ts.keys() ind = 0 if len(kk) > 1: print "note: lots of phototometry keys to choose from...using the first FIXME" ind = -1 for i, k in enumerate(kk): if k[0].lower() == 'r': ind = i break if ind == -1: ind = 0 self.photkey = kk[ind] print "phot key = ", kk[ ind] ## FIXME...maybe want to choose V band first self.ts = self.b.ts else: if self.rec_array is None: print "must give me a recarray!" return self.photkey = "V" self.ts = {self.photkey: self.rec_array}
def parse_xmls_write_dats(self, do_write=True): """ Parse ts data from vosource xmls, write into .dat files """ import glob srcid_list = [] for proj_id, dirpath in self.pars['xml_dirs'].iteritems(): xml_fpaths = glob.glob("%s/*xml" % (dirpath)) for xml_fpath in xml_fpaths: src_id = int(xml_fpath[xml_fpath.rfind('/') + 1:xml_fpath.rfind('.')]) - 100000000 srcid_list.append((proj_id, src_id)) if do_write: ### parse the timeseries signals_list = [] gen = generators_importers.from_xml(signals_list) ### This is taken from from_xml.py::generate(): gen.signalgen = {} gen.sig = db_importer.Source(xml_handle=xml_fpath, doplot=False, make_xml_if_given_dict=False) ### Here we assume only one filter (true for proj_id=[123,126]): t = gen.sig.x_sdict['ts'].values()[0]['t'] m = gen.sig.x_sdict['ts'].values()[0]['m'] m_err = gen.sig.x_sdict['ts'].values()[0]['m_err'] dat_fpath = "%s/%d.dat" % (self.pars['dat_dirpath'], src_id) fp = open(dat_fpath, 'w') for i in xrange(len(t)): fp.write("%lf %lf %lf\n" % (t[i], m[i], m_err[i])) fp.close() return srcid_list
def generate_simptimeseries_xmls(self, simbad_ptf_dict={}): """ Using the entries in given dict, run db_importer.py stuff and generate new .xmls in some dir. """ for simbad_otype, sim_dict in simbad_ptf_dict.iteritems(): for ptf_shortname, ptf_dict in sim_dict.iteritems(): orig_fpath = os.path.expandvars("%s/%s.xml" % (self.pars['out_xmls_dirpath'], ptf_shortname)) s = db_importer.Source(xml_handle=orig_fpath) out_str = s.source_dict_to_xml__simptimeseries(s.x_sdict) temp_xml_fpath = "%s/simpt_%s.xml" % (self.pars['out_xmls_simpt_dirpath'], ptf_shortname) fp = open(temp_xml_fpath, 'w') fp.write(out_str) fp.close()
def generate(self, xml_handle="/home/maxime/feature_extract/Code/source_5.xml", make_xml_if_given_dict=True, register=True): self.signalgen = {} self.sig = db_importer.Source( xml_handle=xml_handle, doplot=False, make_xml_if_given_dict=make_xml_if_given_dict) self.sdict = self.sig.x_sdict self.set_outputs( ) # this adds/fills self.signalgen[<filters>,multiband]{'input':{filled},'features':{empty},'inter':{empty}} # see (1) at EOF for output from above function self.storer.store(self.signalgen, self.signals_list, register=register)
def _load_dotastro_data(fname="013113-7829.1.xml"): """loader for dotastro xml files""" sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract/Code')) import db_importer b = db_importer.Source(xml_handle=fname) kk = b.ts.keys() ind = 0 photkey = kk[ind] ts = b.ts x0 = np.array(ts[photkey]['t']) y = np.array(ts[photkey]['m']) dy = np.array(ts[photkey]['m_err']) name = fname.split(".xml")[0] return x0,y,dy, name
def insert_vosource_info_into_table(self, fpath): """ Given a vosource.xml fpath, parse XML info using db_importer.py, and INSERT corresponding rows into table. """ dbi_src = db_importer.Source(make_dict_if_given_xml=True, make_xml_if_given_dict=False, doplot=False, xml_handle=fpath) src_id = dbi_src.x_sdict['src_id'] sci_class = dbi_src.x_sdict['class'] if len(sci_class) == 0: print 'No class:', src_id return if not sci_class in self.pars['interested_sci_classes']: print 'Not a sci-class of interest:', src_id, sci_class return insert_str = "INSERT INTO %s (src_id, epoch_id, n_epochs, class_final, fpath) VALUES (%d, 0, %d, '%s', '%s')" % (\ self.pars['table_name'], src_id, len(dbi_src.x_sdict['ts'].values()[0]['t']), sci_class, fpath) self.cursor.execute(insert_str)
rdb_name=ingest_tools.pars['rdb_name_2']) srcdbt = ingest_tools.Source_Database_Tools(\ ingest_tools.pars, None, None, \ rdb_host_ip=ingest_tools.pars['rdb_host_ip_4'], \ rdb_user=ingest_tools.pars['rdb_user_4'],\ rdb_name=ingest_tools.pars['rdb_name_4']) feat_db = feature_extraction_interface.Feature_database() feat_db.initialize_mysql_connection(\ rdb_host_ip=ingest_tools.pars['rdb_features_host_ip'],\ rdb_user=ingest_tools.pars['rdb_features_user'], \ rdb_name=ingest_tools.pars['rdb_features_db_name'], \ feat_lookup_tablename=ingest_tools.pars['feat_lookup_tablename'], \ feat_values_tablename=ingest_tools.pars['feat_values_tablename']) dbi_src = db_importer.Source(make_dict_if_given_xml=False) #rfv = Rdb_Form_VOsource(ingest_tools.pars, rdbt, srcdbt, feat_db, dbi_src) #rfv.get_vosource_url_for_srcid(src_id) #sys.exit() import SimpleXMLRPCServer server = SimpleXMLRPCServer.SimpleXMLRPCServer(\ ("lyra.berkeley.edu", \ 34583)) #server = SimpleXMLRPCServer.SimpleXMLRPCServer(\ # ("192.168.1.65", \ # 34583)) server.register_instance(Rdb_Form_VOsource(ingest_tools.pars, rdbt, srcdbt, feat_db, dbi_src)) server.register_multicall_functions() server.register_introspection_functions() server.serve_forever()
def generate_arff_using_raw_xml(xml_str): """ This generates an arff, which contains features """ master_list = [] master_features_dict = {} all_class_list = [] master_classes_dict = {} new_srcid = 1 include_arff_header = True ### Generate the features: tmp_stdout = sys.stdout sys.stdout = open(os.devnull, 'w') signals_list = [] gen = generators_importers.from_xml(signals_list) gen.generate(xml_handle=xml_str) gen.sig.add_features_to_xml_string(signals_list) gen.sig.x_sdict['src_id'] = new_srcid dbi_src = db_importer.Source(make_dict_if_given_xml=False) dbi_src.source_dict_to_xml(gen.sig.x_sdict) sys.stdout.close() sys.stdout = tmp_stdout xml_fpath = dbi_src.xml_string a = arffify.Maker(search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False) out_dict = a.generate_arff_line_for_vosourcexml(num=new_srcid, xml_fpath=xml_fpath) master_list.append(out_dict) all_class_list.append(out_dict['class']) master_classes_dict[out_dict['class']] = 0 for feat_tup in out_dict['features']: master_features_dict[ feat_tup] = 0 # just make sure there is this key in the dict. 0 is filler master_features = master_features_dict.keys() master_classes = master_classes_dict.keys() a = arffify.Maker(search=[], skip_class=True, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False, add_srcid_to_arff=True) a.master_features = master_features a.all_class_list = all_class_list a.master_classes = master_classes a.master_list = master_list fp_out = cStringIO.StringIO() a.write_arff(outfile=fp_out, \ remove_sparse_classes=True, \ n_sources_needed_for_class_inclusion=1, include_header=include_arff_header, use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False) arff_str = fp_out.getvalue() return arff_str
def generate_arff_using_asasdat(self, data_fpaths=[], include_arff_header=False, arff_output_fp=None): """ Given a list of LINEAR data file filepaths, for each source/file: - choose the optimal aperture, depending upon median magnitude <---only for ASAS!!! - exclude bad/flagged epochs - generate features from timeseries (placing in intermediate XML-string format) - collect resulting features for all given sources, and place in ARFF style file which will later be read by ML training/classification code. Partially adapted from: TCP/Software/citris33/arff_generation_master_using_generic_ts_data.py:get_dat_arffstrs() """ import tutor_database_project_insert adt = tutor_database_project_insert.ASAS_Data_Tools(pars=pars) adt.frame_limitmags = self.retrieve_limitmags_from_pkl() sys.path.append( os.environ.get('TCP_DIR') + '/Software/feature_extract/MLData') #sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + '/Software/feature_extract/Code/extractors')) #print os.environ.get("TCP_DIR") import arffify sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract/Code')) import db_importer from data_cleaning import sigmaclip_sdict_ts sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \ 'Software/feature_extract')) from Code import generators_importers master_list = [] master_features_dict = {} all_class_list = [] master_classes_dict = {} for dat_fpath in data_fpaths: new_srcid = dat_fpath[dat_fpath.rfind('/') + 1:dat_fpath.rfind('.dat')] ts_str = open(dat_fpath).read() source_intermed_dict = adt.parse_asas_ts_data_str(ts_str) """mag_data_dict = adt.filter_best_ts_aperture(source_intermed_dict) """ # Need to have a function like this for LINEAR data: xml_str = self.form_xml_string(mag_data_dict) ### Generate the features: signals_list = [] gen = generators_importers.from_xml(signals_list) gen.generate(xml_handle=xml_str) gen.sig.add_features_to_xml_string(signals_list) gen.sig.x_sdict['src_id'] = new_srcid dbi_src = db_importer.Source(make_dict_if_given_xml=False) dbi_src.source_dict_to_xml(gen.sig.x_sdict) xml_fpath = dbi_src.xml_string a = arffify.Maker(search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False) out_dict = a.generate_arff_line_for_vosourcexml( num=new_srcid, xml_fpath=xml_fpath) master_list.append(out_dict) all_class_list.append(out_dict['class']) master_classes_dict[out_dict['class']] = 0 for feat_tup in out_dict['features']: master_features_dict[ feat_tup] = 0 # just make sure there is this key in the dict. 0 is filler master_features = master_features_dict.keys() master_classes = master_classes_dict.keys() a = arffify.Maker(search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False, add_srcid_to_arff=True) a.master_features = master_features a.all_class_list = all_class_list a.master_classes = master_classes a.master_list = master_list a.write_arff(outfile=arff_output_fp, \ remove_sparse_classes=True, \ n_sources_needed_for_class_inclusion=1, include_header=include_arff_header, use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False)