Python Diff_Obj_Source_Populator Examples, ptf_master.Diff_Obj_Source_Populator Python Examples

Example #1

0

Show file

File: parse_userclassified_datasets.py Project: vishalbelsare/ML-timeseries-platform

    def __init__(self, pars={}):
        self.pars = pars

        if self.pars['do_lbl_check']:
            self.DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator(use_postgre_ptf=True)
            self.PTFPostgreServer = ptf_master.PTF_Postgre_Server(pars=ingest_tools.pars, \
                                                     rdbt=self.DiffObjSourcePopulator.rdbt)

Example #2

0

Show file

 def __init__(self, schema_str=""):
     self.DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator(
         use_postgre_ptf=True)
     self.db = MySQLdb.connect(
         host=self.DiffObjSourcePopulator.xrsio.pars['rdb_host_ip_2'],
         user=self.DiffObjSourcePopulator.xrsio.pars['rdb_user'],
         db=self.DiffObjSourcePopulator.xrsio.pars['rdb_name_2'],
         port=self.DiffObjSourcePopulator.xrsio.pars['rdb_port_2'])
     self.cursor = self.db.cursor()
     self.schema_str = schema_str
     self.class_schema_definition_dicts = self.init_class_schema(
         self.schema_str)

Example #3

0

Show file

    def initialize_classes(self):
        """ Load other singleton classes.
        NOTE: much of this is adapted from snclassifier_testing_wrapper.py
              which is adapted from get_classifications_for_caltechid.py..__main__()
        """
        import get_classifications_for_caltechid
        import ingest_tools
        import ptf_master

        self.DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator(use_postgre_ptf=True)
        self.PTFPostgreServer = ptf_master.PTF_Postgre_Server(pars=ingest_tools.pars, \
                                                         rdbt=self.DiffObjSourcePopulator.rdbt)
        self.Get_Classifications_For_Ptfid = get_classifications_for_caltechid.GetClassificationsForPtfid(rdbt=self.DiffObjSourcePopulator.rdbt, PTFPostgreServer=self.PTFPostgreServer, DiffObjSourcePopulator=self.DiffObjSourcePopulator)
        self.Caltech_DB = get_classifications_for_caltechid.CaltechDB()

Example #4

0

Show file

File: test_ptfmaster_task.py Project: vishalbelsare/ML-timeseries-platform

import MySQLdb
sys.path.append(
    os.path.abspath(os.environ.get("TCP_DIR") + 'Software/ingest_tools'))
import ptf_master
import ingest_tools  # just needed to set PDB breakpoints
pars = { \
    'mysql_user':"******", \
    'mysql_hostname':"192.168.1.25", \
    'mysql_database':'object_test_db', \
    'mysql_port':3306}
db = MySQLdb.connect(host=pars['mysql_hostname'], \
                         user=pars['mysql_user'], \
                         db=pars['mysql_database'], \
                         port=pars['mysql_port'])
cursor = db.cursor()
DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator(
    use_postgre_ptf=True)
print "DONE: DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator()"

# NOTE: the list order corresponds to order of INSERT into MySQL RDB:
#"""
test_objs = [ \
    {'obj_id':    0,
     'ra':        224.55255677,
     'dec':        18.44210702,
     'realbogus': 0.9,
     'flux':      1803.0,
     't_val':     0.0},
    ]

for dict_elem in test_objs:
    ra = dict_elem['ra']

Example #5

0

Show file

    def do_classification(self,
                          vosource_list,
                          class_schema_definition_dicts,
                          do_logging=False):
        """ Given a list of VOSource XML strings, or filepaths,
        this generates classifications by calling WEKA and other classifier
        code.  Returns information in classification dictionaries.

        TODO: maybe only do vosource XML parsing once

        TODO: DiffObjSourcePopulator usage is very KLUDGY since
             - it opens an rdt connection
             - it imports many modules,
             - etc
             ->  so, we shoud find a better way to pass in/reference  a static object which has
             access to all this stuff.

        """
        if len(vosource_list) == 0:
            return ({}, {})

        plugin_classification_dict = {}  # This is returned.
        for src_id, vosource_xml_str in vosource_list:
            plugin_classification_dict[src_id] = {}

        ##### Weka Classification:
        # TODO: run a full WEKA .model classification
        #       as well as a couple tailored n_epochs .model classification
        # TODO: Maybe pass in a .model into this function:
        if do_logging:
            print "before: self.arffmaker.populate_features_and_classes_using_local_xmls()"

        self.arffmaker.populate_features_and_classes_using_local_xmls(\
                                             srcid_xml_tuple_list=vosource_list)
        try:
            n_epochs_fromfeats = self.arffmaker.master_list[0]['features'][(
                'n_points', 'float')]
        except:
            print "EXCEPT: self.arffmaker.master_list[0]['features'][('n_points', 'float')] : Empty array?"
            n_epochs_fromfeats = 0

        if do_logging:
            print "before: n_epochs_fromfeats > 1.0 try/except"

        if n_epochs_fromfeats > 1.0:
            class_schema_name_list = self.class_schema_definition_dicts.keys()
            class_schema_name_list.remove('mlens3 MicroLens')
            class_schema_name_list.remove('Dovi SN')
            class_schema_name_list.remove('General')
            for class_schema_name in class_schema_name_list:
                #if 1:
                try:
                    plugin_classification_dict__general = \
                                         self.get_class_probs_using_jvm_weka_instance( \
                                                 vosource_list, plugin_name=class_schema_name)
                    for src_id, plugin_dict in plugin_classification_dict__general.\
                                                                            iteritems():
                        plugin_classification_dict[src_id].update(plugin_dict)
                except:
                    print "EXCEPT: Calling get_class_probs_using_jvm_weka_instance()"
        if do_logging:
            print "after: n_epochs_fromfeats > 1.0 try/except"

        #DEBUG# return ({},{})
        ##### Microlensing classification:
        #class_probs_dict__mlens = {}
        for src_id, vosource_xml_str in vosource_list:
            ##########s_fp = cStringIO.StringIO(vosource_xml_str)
            # TODO: I need to create google-pseudo-fp for this string:
            if do_logging:
                print "before: mlens3.EventData(vosource_xml_str)"
            d = mlens3.EventData(vosource_xml_str)
            ##########del s_fp #.close()

            if do_logging:
                print "before: mlens3.Mlens(datamodel=d,doplot=False)"

## run the fitter (turn off doplot for running without pylab)
            m = mlens3.Mlens(datamodel=d, doplot=False)  #,doplot=True)
            ### prob_mlens should be between 0 and 1...anything above 0.8 is pretty sure bet
            #prob_mlens =  m.final_results["probabilities"]["single-lens"]
            plugin_classification_dict[src_id][
                'mlens3 MicroLens'] = m.final_results

            ##### Nat/Dovi case:
            if do_logging:
                print "before: sn_classifier.Dovi_SN(datamodel=d,doplot=False)"
            sn = sn_classifier.Dovi_SN(datamodel=d,
                                       doplot=False)  #,doplot=True)
            plugin_classification_dict[src_id]['Dovi SN'] = sn.final_results
            #import pprint
            #pprint.pprint(plugin_classification_dict[src_id]['Dovi SN'].get('probabilities',{}))
            #print 'yo'
        if do_logging:
            print "after: for src_id,vosource_xml_str in vosource_list"

        ##### Combined / Final Classification:

        # # # # # # # #
        # # # TODO: if mlens prob >= 0.8 and weka_prob[0] < 0.8 : mlens is primary class (otherwise incorperate it by probability if mlens >= 0.6 as either 2nd or 3rd)
        # TODO: combine info from previous classifications to make a final classification
        #    i.e.: Use plugin_classification_dict{} to make a 3 element class_probs_dict{<srcid>:[1,2,3]}
        # TODO: get class_id for mlens3

        # NOTE: class_probs_dict is used by generate_insert_classification_using_vosource_list() to INSERT classifications into RDB
        #class_probs_dict = class_probs_dict__weka # TODO: extend this when other classification modules are used as well.

        class_probs_dict = {}
        for src_id, a_dict in plugin_classification_dict.iteritems():
            #prob_list = []
            class_probs_dict[src_id] = []
            for plugin_name, plugin_dict in a_dict.iteritems():
                prob_list = []
                for class_name, class_dict in plugin_dict.get(
                        'probabilities', {}).iteritems():
                    class_id = self.class_schema_definition_dicts[plugin_name][
                        'class_name_id_dict'][
                            class_name]  # TODO: get the MLENS class_id from somewhere!!!

                    temp_dict = {
                        'schema_id':
                        self.class_schema_definition_dicts[plugin_name]
                        ['schema_id'],
                        'class_id':
                        class_id,
                        'class_name':
                        class_name,
                        'plugin_name':
                        plugin_name,
                        'prob':
                        class_dict['prob'],
                        'prob_weight':
                        class_dict['prob_weight']
                    }

                    prob_list.append((class_dict['prob'], temp_dict))
                    #? OBSOLETE ? : source_class_probs_list.append(temp_dict)

                #NOTE: for WEKA case, we generate class_ranks 1,2,3.  Otherwise, we just pass on probability as class rank=1
                if self.class_schema_definition_dicts[plugin_name][
                        'predicts_multiple_classes']:
                    prob_list.sort(reverse=True)

                    #NOTE: for WEKA case, we generate class_ranks 1,2,3.  Otherwise, we just pass on probability as class rank=1
                    for i, (prob_float, prob_dict) in enumerate(prob_list[:3]):
                        prob_dict['class_rank'] = i
                        class_probs_dict[src_id].append(prob_dict)
                else:
                    for i, (prob_float, prob_dict) in enumerate(prob_list):
                        prob_dict['class_rank'] = i
                        class_probs_dict[src_id].append(prob_dict)

        # 2) step in and make sure general_classif_dict{} is created below:
        # 3) make sure classification & schema TABLE can take the new general/overview class & schema (update ingest.pars)
        # 4) TEST and migrate changes to ipengine nodes.  Then run for recent PTF night.

        #########
        # KLUDGE: this is ugly (importing ptf_master.py from within here.)  the Diff_Obj_Source_Populator
        #           class should exist in a seperate module / file:

        if do_logging:
            print "before: DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator"

        DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator(
            use_postgre_ptf=False)  #True)

        if do_logging:
            print "after: DiffObjSourcePopulator = ptf_master.Diff_Obj_Source_Populator"

        src_id = int(
            vosource_list[0]
            [0])  # we can assume at this point that len(vosource_list) > 0

        select_str = """SELECT id, realbogus, ujd, source_test_db.srcid_lookup.ra, source_test_db.srcid_lookup.decl FROM object_test_db.obj_srcid_lookup JOIN object_test_db.ptf_events ON (object_test_db.ptf_events.id = object_test_db.obj_srcid_lookup.obj_id) JOIN source_test_db.srcid_lookup USING (src_id) where survey_id=3 AND src_id=%d""" % (
            src_id)
        if do_logging:
            print select_str

        DiffObjSourcePopulator.rdbt.cursor.execute(select_str)
        rdb_rows = DiffObjSourcePopulator.rdbt.cursor.fetchall()
        if do_logging:
            print "after select .execute()"
        general_classif_source_dict = {
            'obj_id': [],
            'realbogus': [],
            'ujd': [],
            'ra': rdb_rows[0][3],
            'dec': rdb_rows[0][4],
            'src_id': src_id
        }
        for row in rdb_rows:
            general_classif_source_dict['obj_id'].append(row[0])
            general_classif_source_dict['realbogus'].append(row[1])
            general_classif_source_dict['ujd'].append(row[2])

        if do_logging:
            print "before: Get_Classifications_For_Ptfid = get_classifications_for_caltechid.GetClassifications"

        #PTFPostgreServer = ptf_master.PTF_Postgre_Server(pars=ingest_tools.pars, \
        #                                                 rdbt=DiffObjSourcePopulator.rdbt)
        PTFPostgreServer = None
        Get_Classifications_For_Ptfid = get_classifications_for_caltechid.GetClassificationsForPtfid(
            rdbt=DiffObjSourcePopulator.rdbt)
        if do_logging:
            print "before: general_classif_dict = get_classifications_for_tcp_marked_variables.get_overall"

        general_classif_dict = get_classifications_for_tcp_marked_variables.get_overall_classification_without_repopulation(
            DiffObjSourcePopulator,
            PTFPostgreServer,
            Get_Classifications_For_Ptfid,
            Caltech_DB=None,
            matching_source_dict=general_classif_source_dict)

        if do_logging:
            print "after: general_classif_dict = get_classifications_for_tcp_marked_variables.get_overall"

        DiffObjSourcePopulator.rdbt.cursor.close()

        if general_classif_dict.has_key('science_class'):
            class_type = general_classif_dict['science_class']
        else:
            class_type = general_classif_dict['overall_type']

        try:
            table_class_id = class_schema_definition_dicts['General'][
                'class_list'].index(class_type)
        except:
            table_class_id = 0  # This is the "other" class, which may represent new periodic classes which havent been added to ingest_tools.py..pars['class_schema_definition_dicts']['General']['class_list']

        class_probs_dict[src_id].append({
            'class_id':
            table_class_id,
            'class_name':
            general_classif_dict['overall_type'],
            'class_rank':
            0,
            'plugin_name':
            'General',
            'prob':
            general_classif_dict.get('class_prob', 1.0),
            'prob_weight':
            1.0,
            'schema_id':
            class_schema_definition_dicts['General']['schema_id']
        })

        # how do I add

        # TODO: then update the (class_probs_dict, plugin_classification_dict) information so that
        #       these classifications can be INSERTED into MySQL table
        # TODO: new schema will need to be defined, which will allow INSERT of new classification schema.

        #######################
        #  - this will eventually be called 1 stack up, using the singly passed:
        #                plugin_classification_dict{}
        ##### DEBUG:
        #print class_probs_dict
        if do_logging:
            print "(end of) do_classification()"
        return (class_probs_dict, plugin_classification_dict)