def __init__(self, pars={}): self.pars = pars if self.pars['do_lbl_check']: self.DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator(use_postgre_ptf=True) self.PTFPostgreServer = ptf_master.PTF_Postgre_Server(pars=ingest_tools.pars, \ rdbt=self.DiffObjSourcePopulator.rdbt)
def __init__(self, schema_str=""): self.DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator( use_postgre_ptf=True) self.db = MySQLdb.connect( host=self.DiffObjSourcePopulator.xrsio.pars['rdb_host_ip_2'], user=self.DiffObjSourcePopulator.xrsio.pars['rdb_user'], db=self.DiffObjSourcePopulator.xrsio.pars['rdb_name_2'], port=self.DiffObjSourcePopulator.xrsio.pars['rdb_port_2']) self.cursor = self.db.cursor() self.schema_str = schema_str self.class_schema_definition_dicts = self.init_class_schema( self.schema_str)
def initialize_classes(self): """ Load other singleton classes. NOTE: much of this is adapted from snclassifier_testing_wrapper.py which is adapted from get_classifications_for_caltechid.py..__main__() """ import get_classifications_for_caltechid import ingest_tools import ptf_master self.DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator(use_postgre_ptf=True) self.PTFPostgreServer = ptf_master.PTF_Postgre_Server(pars=ingest_tools.pars, \ rdbt=self.DiffObjSourcePopulator.rdbt) self.Get_Classifications_For_Ptfid = get_classifications_for_caltechid.GetClassificationsForPtfid(rdbt=self.DiffObjSourcePopulator.rdbt, PTFPostgreServer=self.PTFPostgreServer, DiffObjSourcePopulator=self.DiffObjSourcePopulator) self.Caltech_DB = get_classifications_for_caltechid.CaltechDB()
import MySQLdb sys.path.append( os.path.abspath(os.environ.get("TCP_DIR") + 'Software/ingest_tools')) import ptf_master import ingest_tools # just needed to set PDB breakpoints pars = { \ 'mysql_user':"******", \ 'mysql_hostname':"192.168.1.25", \ 'mysql_database':'object_test_db', \ 'mysql_port':3306} db = MySQLdb.connect(host=pars['mysql_hostname'], \ user=pars['mysql_user'], \ db=pars['mysql_database'], \ port=pars['mysql_port']) cursor = db.cursor() DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator( use_postgre_ptf=True) print "DONE: DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator()" # NOTE: the list order corresponds to order of INSERT into MySQL RDB: #""" test_objs = [ \ {'obj_id': 0, 'ra': 224.55255677, 'dec': 18.44210702, 'realbogus': 0.9, 'flux': 1803.0, 't_val': 0.0}, ] for dict_elem in test_objs: ra = dict_elem['ra']
def do_classification(self, vosource_list, class_schema_definition_dicts, do_logging=False): """ Given a list of VOSource XML strings, or filepaths, this generates classifications by calling WEKA and other classifier code. Returns information in classification dictionaries. TODO: maybe only do vosource XML parsing once TODO: DiffObjSourcePopulator usage is very KLUDGY since - it opens an rdt connection - it imports many modules, - etc -> so, we shoud find a better way to pass in/reference a static object which has access to all this stuff. """ if len(vosource_list) == 0: return ({}, {}) plugin_classification_dict = {} # This is returned. for src_id, vosource_xml_str in vosource_list: plugin_classification_dict[src_id] = {} ##### Weka Classification: # TODO: run a full WEKA .model classification # as well as a couple tailored n_epochs .model classification # TODO: Maybe pass in a .model into this function: if do_logging: print "before: self.arffmaker.populate_features_and_classes_using_local_xmls()" self.arffmaker.populate_features_and_classes_using_local_xmls(\ srcid_xml_tuple_list=vosource_list) try: n_epochs_fromfeats = self.arffmaker.master_list[0]['features'][( 'n_points', 'float')] except: print "EXCEPT: self.arffmaker.master_list[0]['features'][('n_points', 'float')] : Empty array?" n_epochs_fromfeats = 0 if do_logging: print "before: n_epochs_fromfeats > 1.0 try/except" if n_epochs_fromfeats > 1.0: class_schema_name_list = self.class_schema_definition_dicts.keys() class_schema_name_list.remove('mlens3 MicroLens') class_schema_name_list.remove('Dovi SN') class_schema_name_list.remove('General') for class_schema_name in class_schema_name_list: #if 1: try: plugin_classification_dict__general = \ self.get_class_probs_using_jvm_weka_instance( \ vosource_list, plugin_name=class_schema_name) for src_id, plugin_dict in plugin_classification_dict__general.\ iteritems(): plugin_classification_dict[src_id].update(plugin_dict) except: print "EXCEPT: Calling get_class_probs_using_jvm_weka_instance()" if do_logging: print "after: n_epochs_fromfeats > 1.0 try/except" #DEBUG# return ({},{}) ##### Microlensing classification: #class_probs_dict__mlens = {} for src_id, vosource_xml_str in vosource_list: ##########s_fp = cStringIO.StringIO(vosource_xml_str) # TODO: I need to create google-pseudo-fp for this string: if do_logging: print "before: mlens3.EventData(vosource_xml_str)" d = mlens3.EventData(vosource_xml_str) ##########del s_fp #.close() if do_logging: print "before: mlens3.Mlens(datamodel=d,doplot=False)" ## run the fitter (turn off doplot for running without pylab) m = mlens3.Mlens(datamodel=d, doplot=False) #,doplot=True) ### prob_mlens should be between 0 and 1...anything above 0.8 is pretty sure bet #prob_mlens = m.final_results["probabilities"]["single-lens"] plugin_classification_dict[src_id][ 'mlens3 MicroLens'] = m.final_results ##### Nat/Dovi case: if do_logging: print "before: sn_classifier.Dovi_SN(datamodel=d,doplot=False)" sn = sn_classifier.Dovi_SN(datamodel=d, doplot=False) #,doplot=True) plugin_classification_dict[src_id]['Dovi SN'] = sn.final_results #import pprint #pprint.pprint(plugin_classification_dict[src_id]['Dovi SN'].get('probabilities',{})) #print 'yo' if do_logging: print "after: for src_id,vosource_xml_str in vosource_list" ##### Combined / Final Classification: # # # # # # # # # # # TODO: if mlens prob >= 0.8 and weka_prob[0] < 0.8 : mlens is primary class (otherwise incorperate it by probability if mlens >= 0.6 as either 2nd or 3rd) # TODO: combine info from previous classifications to make a final classification # i.e.: Use plugin_classification_dict{} to make a 3 element class_probs_dict{<srcid>:[1,2,3]} # TODO: get class_id for mlens3 # NOTE: class_probs_dict is used by generate_insert_classification_using_vosource_list() to INSERT classifications into RDB #class_probs_dict = class_probs_dict__weka # TODO: extend this when other classification modules are used as well. class_probs_dict = {} for src_id, a_dict in plugin_classification_dict.iteritems(): #prob_list = [] class_probs_dict[src_id] = [] for plugin_name, plugin_dict in a_dict.iteritems(): prob_list = [] for class_name, class_dict in plugin_dict.get( 'probabilities', {}).iteritems(): class_id = self.class_schema_definition_dicts[plugin_name][ 'class_name_id_dict'][ class_name] # TODO: get the MLENS class_id from somewhere!!! temp_dict = { 'schema_id': self.class_schema_definition_dicts[plugin_name] ['schema_id'], 'class_id': class_id, 'class_name': class_name, 'plugin_name': plugin_name, 'prob': class_dict['prob'], 'prob_weight': class_dict['prob_weight'] } prob_list.append((class_dict['prob'], temp_dict)) #? OBSOLETE ? : source_class_probs_list.append(temp_dict) #NOTE: for WEKA case, we generate class_ranks 1,2,3. Otherwise, we just pass on probability as class rank=1 if self.class_schema_definition_dicts[plugin_name][ 'predicts_multiple_classes']: prob_list.sort(reverse=True) #NOTE: for WEKA case, we generate class_ranks 1,2,3. Otherwise, we just pass on probability as class rank=1 for i, (prob_float, prob_dict) in enumerate(prob_list[:3]): prob_dict['class_rank'] = i class_probs_dict[src_id].append(prob_dict) else: for i, (prob_float, prob_dict) in enumerate(prob_list): prob_dict['class_rank'] = i class_probs_dict[src_id].append(prob_dict) # 2) step in and make sure general_classif_dict{} is created below: # 3) make sure classification & schema TABLE can take the new general/overview class & schema (update ingest.pars) # 4) TEST and migrate changes to ipengine nodes. Then run for recent PTF night. ######### # KLUDGE: this is ugly (importing ptf_master.py from within here.) the Diff_Obj_Source_Populator # class should exist in a seperate module / file: if do_logging: print "before: DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator" DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator( use_postgre_ptf=False) #True) if do_logging: print "after: DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator" src_id = int( vosource_list[0] [0]) # we can assume at this point that len(vosource_list) > 0 select_str = """SELECT id, realbogus, ujd, source_test_db.srcid_lookup.ra, source_test_db.srcid_lookup.decl FROM object_test_db.obj_srcid_lookup JOIN object_test_db.ptf_events ON (object_test_db.ptf_events.id = object_test_db.obj_srcid_lookup.obj_id) JOIN source_test_db.srcid_lookup USING (src_id) where survey_id=3 AND src_id=%d""" % ( src_id) if do_logging: print select_str DiffObjSourcePopulator.rdbt.cursor.execute(select_str) rdb_rows = DiffObjSourcePopulator.rdbt.cursor.fetchall() if do_logging: print "after select .execute()" general_classif_source_dict = { 'obj_id': [], 'realbogus': [], 'ujd': [], 'ra': rdb_rows[0][3], 'dec': rdb_rows[0][4], 'src_id': src_id } for row in rdb_rows: general_classif_source_dict['obj_id'].append(row[0]) general_classif_source_dict['realbogus'].append(row[1]) general_classif_source_dict['ujd'].append(row[2]) if do_logging: print "before: Get_Classifications_For_Ptfid = get_classifications_for_caltechid.GetClassifications" #PTFPostgreServer = ptf_master.PTF_Postgre_Server(pars=ingest_tools.pars, \ # rdbt=DiffObjSourcePopulator.rdbt) PTFPostgreServer = None Get_Classifications_For_Ptfid = get_classifications_for_caltechid.GetClassificationsForPtfid( rdbt=DiffObjSourcePopulator.rdbt) if do_logging: print "before: general_classif_dict = get_classifications_for_tcp_marked_variables.get_overall" general_classif_dict = get_classifications_for_tcp_marked_variables.get_overall_classification_without_repopulation( DiffObjSourcePopulator, PTFPostgreServer, Get_Classifications_For_Ptfid, Caltech_DB=None, matching_source_dict=general_classif_source_dict) if do_logging: print "after: general_classif_dict = get_classifications_for_tcp_marked_variables.get_overall" DiffObjSourcePopulator.rdbt.cursor.close() if general_classif_dict.has_key('science_class'): class_type = general_classif_dict['science_class'] else: class_type = general_classif_dict['overall_type'] try: table_class_id = class_schema_definition_dicts['General'][ 'class_list'].index(class_type) except: table_class_id = 0 # This is the "other" class, which may represent new periodic classes which havent been added to ingest_tools.py..pars['class_schema_definition_dicts']['General']['class_list'] class_probs_dict[src_id].append({ 'class_id': table_class_id, 'class_name': general_classif_dict['overall_type'], 'class_rank': 0, 'plugin_name': 'General', 'prob': general_classif_dict.get('class_prob', 1.0), 'prob_weight': 1.0, 'schema_id': class_schema_definition_dicts['General']['schema_id'] }) # how do I add # TODO: then update the (class_probs_dict, plugin_classification_dict) information so that # these classifications can be INSERTED into MySQL table # TODO: new schema will need to be defined, which will allow INSERT of new classification schema. ####################### # - this will eventually be called 1 stack up, using the singly passed: # plugin_classification_dict{} ##### DEBUG: #print class_probs_dict if do_logging: print "(end of) do_classification()" return (class_probs_dict, plugin_classification_dict)