Esempio n. 1
0
    def _get_timeseries(self):
        if self.use_xml:
            if not self.gotxml or self.outname is None:
                print "dont have the xml to build the timeseries"
                return
            try:
                self.b = db_importer.Source(xml_handle=self.outname)
            except:
                print "timeseries import failed. Check your XML file. Maybe rm %s" % self.outname
                return

            kk = self.b.ts.keys()
            ind = 0
            if len(kk) > 1:
                print "note: lots of phototometry keys to choose from...using the first FIXME"
                ind = -1
                for i, k in enumerate(kk):
                    if k[0].lower() == 'r':
                        ind = i
                        break
                if ind == -1:
                    ind = 0

            self.photkey = kk[ind]
            print "phot key = ", kk[
                ind]  ## FIXME...maybe want to choose V band first
            self.ts = self.b.ts
        else:
            if self.rec_array is None:
                print "must give me a recarray!"
                return
            self.photkey = "V"
            self.ts = {self.photkey: self.rec_array}
    def parse_xmls_write_dats(self, do_write=True):
        """ Parse ts data from vosource xmls, write into .dat files
        """
        import glob
        srcid_list = []
        for proj_id, dirpath in self.pars['xml_dirs'].iteritems():
            xml_fpaths = glob.glob("%s/*xml" % (dirpath))
            for xml_fpath in xml_fpaths:
                src_id = int(xml_fpath[xml_fpath.rfind('/') +
                                       1:xml_fpath.rfind('.')]) - 100000000
                srcid_list.append((proj_id, src_id))

                if do_write:
                    ### parse the timeseries
                    signals_list = []
                    gen = generators_importers.from_xml(signals_list)
                    ###  This is taken from from_xml.py::generate():
                    gen.signalgen = {}
                    gen.sig = db_importer.Source(xml_handle=xml_fpath,
                                                 doplot=False,
                                                 make_xml_if_given_dict=False)
                    ### Here we assume only one filter (true for proj_id=[123,126]):
                    t = gen.sig.x_sdict['ts'].values()[0]['t']
                    m = gen.sig.x_sdict['ts'].values()[0]['m']
                    m_err = gen.sig.x_sdict['ts'].values()[0]['m_err']

                    dat_fpath = "%s/%d.dat" % (self.pars['dat_dirpath'],
                                               src_id)

                    fp = open(dat_fpath, 'w')
                    for i in xrange(len(t)):
                        fp.write("%lf %lf %lf\n" % (t[i], m[i], m_err[i]))

                    fp.close()
        return srcid_list
Esempio n. 3
0
 def generate_simptimeseries_xmls(self, simbad_ptf_dict={}):
     """ Using the entries in given dict, run db_importer.py stuff and generate new .xmls in some dir.
     """
     for simbad_otype, sim_dict in simbad_ptf_dict.iteritems():
         for ptf_shortname, ptf_dict in sim_dict.iteritems():
             orig_fpath = os.path.expandvars("%s/%s.xml" % (self.pars['out_xmls_dirpath'], ptf_shortname))
             s = db_importer.Source(xml_handle=orig_fpath)
             out_str = s.source_dict_to_xml__simptimeseries(s.x_sdict)
             temp_xml_fpath = "%s/simpt_%s.xml" % (self.pars['out_xmls_simpt_dirpath'], ptf_shortname)
             fp = open(temp_xml_fpath, 'w')
             fp.write(out_str)
             fp.close()
 def generate(self,
              xml_handle="/home/maxime/feature_extract/Code/source_5.xml",
              make_xml_if_given_dict=True,
              register=True):
     self.signalgen = {}
     self.sig = db_importer.Source(
         xml_handle=xml_handle,
         doplot=False,
         make_xml_if_given_dict=make_xml_if_given_dict)
     self.sdict = self.sig.x_sdict
     self.set_outputs(
     )  # this adds/fills self.signalgen[<filters>,multiband]{'input':{filled},'features':{empty},'inter':{empty}}
     # see (1) at EOF for output from above function
     self.storer.store(self.signalgen, self.signals_list, register=register)
def _load_dotastro_data(fname="013113-7829.1.xml"):
    
    """loader for dotastro xml files"""
    sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                                    'Software/feature_extract/Code'))
    import db_importer

    b = db_importer.Source(xml_handle=fname)
    kk = b.ts.keys()
    ind = 0
    photkey = kk[ind]
    ts = b.ts
    x0 = np.array(ts[photkey]['t'])
    y = np.array(ts[photkey]['m'])
    dy = np.array(ts[photkey]['m_err'])
    name = fname.split(".xml")[0]
    return x0,y,dy, name
 def insert_vosource_info_into_table(self, fpath):
     """ Given a vosource.xml fpath, parse XML info using db_importer.py, and
     INSERT corresponding rows into table.
     """
     dbi_src = db_importer.Source(make_dict_if_given_xml=True,
                                  make_xml_if_given_dict=False,
                                  doplot=False,
                                  xml_handle=fpath)
     src_id = dbi_src.x_sdict['src_id']
     sci_class = dbi_src.x_sdict['class']
     if len(sci_class) == 0:
         print 'No class:', src_id
         return
     if not sci_class in self.pars['interested_sci_classes']:
         print 'Not a sci-class of interest:', src_id, sci_class
         return
     insert_str = "INSERT INTO %s (src_id, epoch_id, n_epochs, class_final, fpath) VALUES (%d, 0, %d, '%s', '%s')" % (\
                                self.pars['table_name'],
                                src_id,
                                len(dbi_src.x_sdict['ts'].values()[0]['t']),
                                sci_class, 
                                fpath)
     self.cursor.execute(insert_str)
Esempio n. 7
0
                rdb_name=ingest_tools.pars['rdb_name_2'])
    srcdbt = ingest_tools.Source_Database_Tools(\
                ingest_tools.pars, None, None, \
                rdb_host_ip=ingest_tools.pars['rdb_host_ip_4'], \
                rdb_user=ingest_tools.pars['rdb_user_4'],\
                rdb_name=ingest_tools.pars['rdb_name_4'])

    feat_db = feature_extraction_interface.Feature_database()
    feat_db.initialize_mysql_connection(\
                    rdb_host_ip=ingest_tools.pars['rdb_features_host_ip'],\
                    rdb_user=ingest_tools.pars['rdb_features_user'], \
                    rdb_name=ingest_tools.pars['rdb_features_db_name'], \
        feat_lookup_tablename=ingest_tools.pars['feat_lookup_tablename'], \
        feat_values_tablename=ingest_tools.pars['feat_values_tablename'])

    dbi_src = db_importer.Source(make_dict_if_given_xml=False)

    #rfv = Rdb_Form_VOsource(ingest_tools.pars, rdbt, srcdbt, feat_db, dbi_src)
    #rfv.get_vosource_url_for_srcid(src_id)
    #sys.exit()
    import SimpleXMLRPCServer
    server = SimpleXMLRPCServer.SimpleXMLRPCServer(\
                          ("lyra.berkeley.edu", \
                           34583))
    #server = SimpleXMLRPCServer.SimpleXMLRPCServer(\
    #                      ("192.168.1.65", \
    #                       34583))
    server.register_instance(Rdb_Form_VOsource(ingest_tools.pars, rdbt, srcdbt, feat_db, dbi_src))
    server.register_multicall_functions()
    server.register_introspection_functions()
    server.serve_forever()
Esempio n. 8
0
def generate_arff_using_raw_xml(xml_str):
    """ This generates an arff, which contains features
    """
    master_list = []
    master_features_dict = {}
    all_class_list = []
    master_classes_dict = {}

    new_srcid = 1
    include_arff_header = True

    ### Generate the features:
    tmp_stdout = sys.stdout
    sys.stdout = open(os.devnull, 'w')
    signals_list = []
    gen = generators_importers.from_xml(signals_list)
    gen.generate(xml_handle=xml_str)
    gen.sig.add_features_to_xml_string(signals_list)
    gen.sig.x_sdict['src_id'] = new_srcid
    dbi_src = db_importer.Source(make_dict_if_given_xml=False)
    dbi_src.source_dict_to_xml(gen.sig.x_sdict)
    sys.stdout.close()
    sys.stdout = tmp_stdout

    xml_fpath = dbi_src.xml_string

    a = arffify.Maker(search=[],
                      skip_class=False,
                      local_xmls=True,
                      convert_class_abrvs_to_names=False,
                      flag_retrieve_class_abrvs_from_TUTOR=False,
                      dorun=False)
    out_dict = a.generate_arff_line_for_vosourcexml(num=new_srcid,
                                                    xml_fpath=xml_fpath)

    master_list.append(out_dict)
    all_class_list.append(out_dict['class'])
    master_classes_dict[out_dict['class']] = 0
    for feat_tup in out_dict['features']:
        master_features_dict[
            feat_tup] = 0  # just make sure there is this key in the dict.  0 is filler

    master_features = master_features_dict.keys()
    master_classes = master_classes_dict.keys()
    a = arffify.Maker(search=[],
                      skip_class=True,
                      local_xmls=True,
                      convert_class_abrvs_to_names=False,
                      flag_retrieve_class_abrvs_from_TUTOR=False,
                      dorun=False,
                      add_srcid_to_arff=True)
    a.master_features = master_features
    a.all_class_list = all_class_list
    a.master_classes = master_classes
    a.master_list = master_list

    fp_out = cStringIO.StringIO()
    a.write_arff(outfile=fp_out, \
                 remove_sparse_classes=True, \
                 n_sources_needed_for_class_inclusion=1,
                 include_header=include_arff_header,
                 use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False)
    arff_str = fp_out.getvalue()
    return arff_str
Esempio n. 9
0
    def generate_arff_using_asasdat(self,
                                    data_fpaths=[],
                                    include_arff_header=False,
                                    arff_output_fp=None):
        """ Given a list of LINEAR data file filepaths, for each source/file:
        - choose the optimal aperture, depending upon median magnitude <---only for ASAS!!!
        - exclude bad/flagged epochs
        - generate features from timeseries (placing in intermediate XML-string format)
        - collect resulting features for all given sources, and place in ARFF style file
              which will later be read by ML training/classification code.
              
        Partially adapted from: TCP/Software/citris33/arff_generation_master_using_generic_ts_data.py:get_dat_arffstrs()
        """
        import tutor_database_project_insert
        adt = tutor_database_project_insert.ASAS_Data_Tools(pars=pars)
        adt.frame_limitmags = self.retrieve_limitmags_from_pkl()

        sys.path.append(
            os.environ.get('TCP_DIR') + '/Software/feature_extract/MLData')
        #sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + '/Software/feature_extract/Code/extractors'))
        #print os.environ.get("TCP_DIR")
        import arffify

        sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                      'Software/feature_extract/Code'))
        import db_importer
        from data_cleaning import sigmaclip_sdict_ts
        sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + \
                      'Software/feature_extract'))
        from Code import generators_importers

        master_list = []
        master_features_dict = {}
        all_class_list = []
        master_classes_dict = {}

        for dat_fpath in data_fpaths:
            new_srcid = dat_fpath[dat_fpath.rfind('/') +
                                  1:dat_fpath.rfind('.dat')]
            ts_str = open(dat_fpath).read()
            source_intermed_dict = adt.parse_asas_ts_data_str(ts_str)
            """mag_data_dict = adt.filter_best_ts_aperture(source_intermed_dict)
            """
            # Need to have a function like this for LINEAR data:

            xml_str = self.form_xml_string(mag_data_dict)

            ### Generate the features:
            signals_list = []
            gen = generators_importers.from_xml(signals_list)
            gen.generate(xml_handle=xml_str)
            gen.sig.add_features_to_xml_string(signals_list)
            gen.sig.x_sdict['src_id'] = new_srcid
            dbi_src = db_importer.Source(make_dict_if_given_xml=False)
            dbi_src.source_dict_to_xml(gen.sig.x_sdict)

            xml_fpath = dbi_src.xml_string

            a = arffify.Maker(search=[],
                              skip_class=False,
                              local_xmls=True,
                              convert_class_abrvs_to_names=False,
                              flag_retrieve_class_abrvs_from_TUTOR=False,
                              dorun=False)
            out_dict = a.generate_arff_line_for_vosourcexml(
                num=new_srcid, xml_fpath=xml_fpath)

            master_list.append(out_dict)
            all_class_list.append(out_dict['class'])
            master_classes_dict[out_dict['class']] = 0
            for feat_tup in out_dict['features']:
                master_features_dict[
                    feat_tup] = 0  # just make sure there is this key in the dict.  0 is filler

        master_features = master_features_dict.keys()
        master_classes = master_classes_dict.keys()
        a = arffify.Maker(search=[],
                          skip_class=False,
                          local_xmls=True,
                          convert_class_abrvs_to_names=False,
                          flag_retrieve_class_abrvs_from_TUTOR=False,
                          dorun=False,
                          add_srcid_to_arff=True)
        a.master_features = master_features
        a.all_class_list = all_class_list
        a.master_classes = master_classes
        a.master_list = master_list


        a.write_arff(outfile=arff_output_fp, \
                     remove_sparse_classes=True, \
                     n_sources_needed_for_class_inclusion=1,
                     include_header=include_arff_header,
                     use_str_srcid=True)#, classes_arff_str='', remove_sparse_classes=False)