コード例 #1
0
 def test_CasJobs_submitJob(self):
     jobId = CasJobs.submitJob(sql=CasJobs_TestQuery + " into MyDB." +
                               CasJobs_TestTableName1,
                               context=CasJobs_TestDatabase)
     jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True)
     df = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName1,
                               context="MyDB",
                               format="csv")
     self.assertNotEqual(jobId, "")
コード例 #2
0
def testCasJobsSubmit(token):
    tbl="mriiscplanck1_"+str(randint(0,1000000))

    sql="""select top 10 galaxyId,snapnum,stellarmass into """+tbl+""" from MRIIscPlanck1"""

    jobId= CasJobs.submitJob(sql,context="Henriques2015a",token=token)
    print("jobId=",jobId)
    jobDesc = CasJobs.waitForJob(jobId)
    print( jobDesc)
コード例 #3
0
    def wait_jobdone(self, jobId):
        jobStatus = CasJobs.getJobStatus(jobId)['Status']
        while jobStatus < 2:
            time.sleep(10)
            jobStatus = CasJobs.getJobStatus(jobId)['Status']
        if jobStatus in [2, 3, 4]:
            raise RuntimeError("Job is canceled or failed")
        print("Job {} is finished".format(jobId))

        return jobStatus
コード例 #4
0
def createQueries(username, password, amount, table_name, context):
    CasJobs_Query = "SELECT top " + str(
        amount
    ) + " statement into [myscratch:default]." + table_name + " FROM sdssweblogs.SqlLog WHERE CHARINDEX('FROM PhotoObjAll', statement) > 0 AND CHARINDEX('p.dec BETWEEN', statement) > 0 AND CHARINDEX('p.ra BETWEEN', statement) > 0 AND access='Skyserver.Search.SQL'"
    login(username, password)

    jobId = CasJobs.submitJob(sql=CasJobs_Query, context=context)
    jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True)
    print(
        "Data has been saved on sciserver at context myscratch with tablename "
        + table_name)
コード例 #5
0
def metadata_worker():
    while True:
        global consumer
        token = consumer.poll(0)
        # file_name = input("Input file name \n")
        if token is None:
            break
            print("all done")
        else:
            token = msgpack.unpackb(token, raw=True)
            if token[0] == 'metadata':
                CasJobs.executeQuery("INSERT Successful INTO TABLE test_table",
                                     context='MyDB',
                                     format='json')
コード例 #6
0
def testCasJobsQuery(token):
    sql="""select top 10 galaxyId,snapnum,stellarmass from MRIIscPlanck1"""

    queryResponse=CasJobs.executeQuery(sql,context="Henriques2015a",token=token)
    gals= pandas.read_csv(queryResponse,index_col=None)
    print gals
    return gals
コード例 #7
0
 def test_CasJobs_getNumpyArrayFromQuery(self):
     #CasJobs.getNumpyArrayFromQuery
     array = CasJobs.getNumpyArrayFromQuery(queryString=CasJobs_TestQuery,
                                            context=CasJobs_TestDatabase)
     newArray = pandas.read_csv(StringIO(CasJobs_TestTableCSV),
                                index_col=None).as_matrix()
     self.assertEqual(array.all(), newArray.all())
コード例 #8
0
ファイル: retrieve.py プロジェクト: Physarah/craftutils
def retrieve_sdss_photometry(ra: float, dec: float):
    """
    Retrieve SDSS photometry for a given field, in a 0.2 x 0.2 degree box centred on the passed coordinates
    coordinates. (Note - the width of the box is in RA degrees, not corrected for spherical distortion)
    :param ra: Right Ascension of the centre of the desired field, in degrees.
    :param dec: Declination of the centre of the desired field, in degrees.
    :return: Retrieved photometry table, as a pandas dataframe, if successful; if not, None.
    """
    try:
        from SciServer import Authentication, CasJobs
    except ImportError:
        print("It seems that SciScript/SciServer is not installed, or not accessible to this environment. "
              "\nIf you wish to automatically download SDSS data, please install "
              "\nSciScript (https://github.com/sciserver/SciScript-Python); "
              "\notherwise, retrieve the data manually from "
              "\nhttp://skyserver.sdss.org/dr16/en/tools/search/sql.aspx")
        return None

    print(f"Querying SDSS DR16 archive for field centring on RA={ra}, DEC={dec}")
    user = keys['sciserver_user']
    password = keys["sciserver_pwd"]
    Authentication.login(UserName=user, Password=password)
    # Construct an SQL query to send to SciServer
    query = "SELECT objid,ra,dec"
    for f in sdss_filters:
        query += f",psfMag_{f},psfMagErr_{f},fiberMag_{f},fiberMagErr_{f},fiber2Mag_{f},fiber2MagErr_{f},petroMag_{f},petroMagErr_{f} "
    query += "FROM PhotoObj "
    query += f"WHERE ra BETWEEN {ra - 0.1} AND {ra + 0.1} "
    query += f"AND dec BETWEEN {dec - 0.1} AND {dec + 0.1} "
    print(f"Retrieving photometry from SDSS DR16 via SciServer for field at {ra}, {dec}...")
    df = CasJobs.executeQuery(sql=query, context='DR16')
    if len(df.index) == 0:
        df = None
    return df
コード例 #9
0
def createData(username, password, dec_low, dec_high, ra_low, ra_high,
               table_name, context, all_data):
    CasJobs_Database = "DR16"
    CasJobs_Query = "Select ra,dec into [myscratch:default]." + table_name + " from dr16.photoobjall "

    if (not all_data):
        CasJobs_Query += "where dec between " + str(dec_low) + " and " + str(
            dec_high) + " and ra between " + str(ra_low) + " and " + str(
                ra_high)

    login(username, password)

    jobId = CasJobs.submitJob(sql=CasJobs_Query, context=context)
    jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True)
    print(
        "Data has been saved on sciserver at context myscratch with tablename "
        + table_name)
コード例 #10
0
 def download_query(self,
                    query,
                    table_name,
                    save_to='data',
                    context='MyDB'):
     """Perform a query based on the context given"""
     if context != 'MyDB':
         jobid = cj.submitJob(query, context=context)
         cj.waitForJob(jobid, verbose=True)
         job_status = cj.getJobStatus(jobid)
         if job_status['Success'] != 5:
             raise CasJobsError(
                 'Error Performing the query, {}'.format(job_status))
     # Now the results are safely saved in myDB
     # You can go ahead and download by using a session
     self.download_csv(table_name)
     self._download_from_scidrive(table_name, save_to)
コード例 #11
0
def testCasJobsQuery(token):
    sql="""select top 10 galaxyId,snapnum,stellarmass from MRIIscPlanck1"""

    gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="pandas", token=token)
    print("PANDAS")
    print(gals)
    gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="csv", token=token)
    print("CSV")
    print(gals)
    gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="json", token=token)
    print("JSON")
    print(gals)
    gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="readable", token=token)
    print("READABLE")
    gals=pandas.read_csv(gals)
    print(gals)
    gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="blabal", token=token)
    print("ERROR")
    print(gals)
    return gals
コード例 #12
0
 def load_sncoor(self,
                 sncoor,
                 sncoor_tname='sncoor',
                 xmatch_tname='xmatchres'):
     tables = CasJobs.getTables(context="MyDB")
     tablenames = [x['Name'] for x in tables]
     for tname in [sncoor_tname, xmatch_tname]:
         if tname in tablenames:
             self.delete_table(tname)
     self.sncoor_tname = sncoor_tname
     self.xmatch_tname = xmatch_tname
     self.sncoor = sncoor
コード例 #13
0
 def test_CasJobs_writeFitsFileFromQuery(self):
     #CasJobs.getFitsFileFromQuery
     try:
         result = CasJobs.writeFitsFileFromQuery(
             fileName=CasJobs_TestFitsFile,
             queryString=CasJobs_TestQuery,
             context="MyDB")
         self.assertEqual(result, True)
         self.assertEqual(os.path.isfile(CasJobs_TestFitsFile), True)
     finally:
         try:
             os.remove(CasJobs_TestFitsFile)
         except:
             pass
コード例 #14
0
    def test_CasJobs_uploadPandasDataFrameToTable_uploadCSVDataToTable(self):
        try:

            df = pandas.read_csv(StringIO(CasJobs_TestTableCSV),
                                 index_col=None)

            result = CasJobs.uploadPandasDataFrameToTable(
                dataFrame=df, tableName=CasJobs_TestTableName2, context="MyDB")
            table = CasJobs.executeQuery(sql="select * from " +
                                         CasJobs_TestTableName2,
                                         context="MyDB",
                                         format="pandas")
            result2 = CasJobs.executeQuery(sql="DROP TABLE " +
                                           CasJobs_TestTableName2,
                                           context="MyDB",
                                           format="csv")
            self.assertEqual(result, True)
            self.assertItemsEqual(table, df)

            result = CasJobs.uploadCSVDataToTable(
                csvData=CasJobs_TestTableCSV,
                tableName=CasJobs_TestTableName2,
                context="MyDB")
            df2 = CasJobs.executeQuery(sql="select * from " +
                                       CasJobs_TestTableName2,
                                       context="MyDB",
                                       format="pandas")
            result2 = CasJobs.executeQuery(sql="DROP TABLE " +
                                           CasJobs_TestTableName2,
                                           context="MyDB",
                                           format="csv")
            self.assertEqual(result, True)
            self.assertItemsEqual(df, df2)

        finally:
            try:
                csv = CasJobs.executeQuery(sql="DROP TABLE " +
                                           CasJobs_TestTableName2,
                                           context="MyDB",
                                           format="csv")
            except:
                pass
コード例 #15
0
 def test_CasJobs_cancelJob(self):
     jobId = CasJobs.submitJob(sql=CasJobs_TestQuery,
                               context=CasJobs_TestDatabase)
     isCanceled = CasJobs.cancelJob(jobId=jobId)
     self.assertEqual(isCanceled, True)
コード例 #16
0
 def test_CasJobs_waitForJob(self):
     jobId = CasJobs.submitJob(sql=CasJobs_TestQuery,
                               context=CasJobs_TestDatabase)
     jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True)
     self.assertGreaterEqual(jobDescription["Status"], 3)
コード例 #17
0
# <li><em>Name:</em> the name of the table</li>
# <li><em>Rows:</em> the number of rows in the table</li>
# <li><em>Size:</em> the size of the table in kilobytes</li>
# </ul>
#
# The code cell gives two options for printing the list of tables: using Python's <code>pprint</code> library or using the <code>tables_formatted(tableList)</code> convenience function defined above. The convenience function sorts the list of tables alphabetically by name, and displays the dates into datetime values. Try uncommenting and commenting those lines in the Code cell below to see what both options do.

# In[ ]:

#check_token = SciServer.Authentication.getToken()

my_token = SciServer.Authentication.login('portmanm', 'uB53!BYLQXr5XhN')
#this_context = "MyDB"    # Your MyDB
this_context = 'DR7'  # SDSS Data Release 14

tables = CasJobs.getTables(context=this_context)
#print('Tables in '+this_context+':\n')

#pprint(tables)   # Standard human-readable printing using Python's pprint module
# tables_formatted(tables)  # Sorting and better printing using a convenience function

# ## Run a quick query and get results directly
#
# Now that you know what contexts (datasets) are available to you, and you know what tables can be found in those contexts, you are ready to write and submit a query to that context. A query is a request for data, written in SQL (Structured Query Language), a programming language designed for efficient database searches.
#
# SkyServer features a <a href="http://skyserver.sdss.org/public/en/help/howto/search/searchhowtohome.aspx" target="_blank">tutorial for learning SQL</a>, as well as <a href="http://skyserver.sdss.org/public/en/help/docs/sql_help.aspx" target="_blank">tips for writing good queries</a> and a long list of <a href="http://skyserver.sdss.org/public/en/help/docs/realquery.aspx" target="_blank">sample queries</a> that you can adapt to create your queries (links open in new windows).
#
# Once you have written a query, you can get results by running (executing) it in CasJobs. To run a query in CasJobs directly from a Code cell in SciServer Compute, use the <strong><code>CasJobs.executeQuery(sql,...)</code></strong> function. The function takes as input a string containing a properly-formatted SQL query (and optional parameters listed below), and returns a table containing query results (in one of several formats with a default of a <a href="https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html" target="_blank">pandas dataframe</a>).
#
# The <em>sql</em> parameter is required. The <em>context</em> parameter is recommended to explicitly state the context to which the query will be submitted; the default value is 'MyDB'. For a full list of parameters taken by the <code>CasJobs.executeQuery(sql,...)</code> function, see <a href="http://www.sciserver.org/docs/sciscript-python/SciServer.html#module-SciServer.CasJobs" target="_blank">its documentation on the SciServer documentation site</a>.
#
コード例 #18
0
 def test_CasJobs_getJobStatus(self):
     jobId = CasJobs.submitJob(sql=CasJobs_TestQuery,
                               context=CasJobs_TestDatabase)
     jobDescription = CasJobs.getJobStatus(jobId)
     self.assertEqual(jobDescription["JobID"], jobId)
コード例 #19
0
 def test_CasJobs_getTables(self):
     tables = CasJobs.getTables(context="MyDB")
コード例 #20
0
 def test_CasJobs_executeQuery(self):
     df = CasJobs.executeQuery(sql=CasJobs_TestQuery,
                               context=CasJobs_TestDatabase,
                               format="pandas")
     self.assertEqual(CasJobs_TestTableCSV, df.to_csv(index=False))
コード例 #21
0
    FROM fGetNearbyObjEq(" + ra + ", " + dec + ", " + radius + ") n, Star p \
    LEFT OUTER JOIN specObj s ON s.bestObjID = p.objID \
    LEFT OUTER JOIN photoz pz ON pz.objid = p.objid \
    WHERE n.objID = p.objID AND(s.class IS NULL or s.class='STAR') "

# Query for galaxy with modelMag and cmodelMag and extinctions
q_gal = "SELECT p.ra, p.dec, p.modelMag_u, p.modelMagErr_u, p.modelMag_g, p.modelMagErr_g, p.modelMag_r, p.modelMagErr_r, p.modelMag_i, p.modelMagErr_i, p.modelMag_z, p.modelMagErr_z, \
    p.petroMag_u, p.petroMagErr_u, p.petroMag_g, p.petroMagErr_g, p.petroMag_r, p.petroMagErr_r, p.petroMag_i, p.petroMagErr_i, p.petroMag_z, p.petroMagErr_z, \
    p.cmodelMag_u, p.cmodelMagErr_u, p.cmodelMag_g, p.cmodelMagErr_g, p.cmodelMag_r, p.cmodelMagErr_r, p.cmodelMag_i, p.cmodelMagErr_i, p.cmodelMag_z, p.cmodelMagErr_z, \
    extinction_u, extinction_g, extinction_r, extinction_i, extinction_z, clean, s.z, s.zerr, pz.z as photoz, pz.zerr as photozerr \
    FROM fGetNearbyObjEq(" + ra + ", " + dec + ", " + radius + ") n, Galaxy p \
    LEFT OUTER JOIN specObj s ON s.bestObjID = p.objID \
    LEFT OUTER JOIN photoz pz ON pz.objid = p.objid \
    WHERE n.objID = p.objID AND(s.class IS NULL or s.class='GALAXY') "

star_data = CasJobs.executeQuery(q_star, context=context, format='pandas')
gal_data = CasJobs.executeQuery(q_gal, context=context, format='pandas')

star_tab = Table(star_data.values,
                 names=star_data.columns,
                 meta={
                     'clustername': clustername,
                     'ra': ra,
                     'dec': dec,
                     'radius': radius
                 },
                 dtype=[float for x in range(len(star_data.columns))])
gal_tab = Table(gal_data.values,
                names=gal_data.columns,
                meta={
                    'clustername': clustername,
コード例 #22
0
ファイル: photo.py プロジェクト: ViskaWei/CountSketch2020
def main():
    try:
        os.mkdir(PRETRAIN)
    except:
        print('here we go!')
    if PRE_NORM:
        dfphoto, dfspec, df_lbl = prepro_photo_spec(PHOTO_DATA,
                                                    SPEC_DATA,
                                                    base,
                                                    ftr,
                                                    wpath=PRETRAIN)
    if PRE_HH:
        print('=====================ENCODE PHOTO ====================')
        photo_stream = get_encode_stream(dfphoto, base, dtype)
        spec_stream = get_encode_stream(dfspec, base, dtype)
        # np.savetxt(f'{PRETRAIN}/photo_stream.txt',photo_stream)
        # np.savetxt(f'{PRETRAIN}/spec_stream.txt',spec_stream)
        df_lbl['encode'] = spec_stream
        df_lbl.to_csv(f'{PRETRAIN}/spec_lbl_encode.csv', index=False)
        photo_HH = get_HH_pd(photo_stream,
                             base,
                             ftr_len,
                             dtype,
                             EXACT,
                             topk,
                             r=16,
                             d=1000000,
                             c=None,
                             device=None)
        if not EXACT:
            assert len(photo_HH) <= topk
        else:
            photo_HH = photo_HH[:topk]
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
        spec_HH = get_HH_pd(spec_stream, base, ftr_len, dtype, True, topk)
        spec_HH.to_csv(f'{PRETRAIN}/spec_HH.csv', index=False)
    elif PRE_UMAP or MAP_SPEC:
        photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv')
        spec_HH = pd.read_csv(f'{PRETRAIN}/spec_HH.csv')
        df_lbl = pd.read_csv(f'{PRETRAIN}/spec_lbl_encode.csv')
    print('photo_HH', photo_HH)
    print('spec_HH', spec_HH)

    if PRE_UMAP:
        print('=============GETTING UMAP============')
        try:
            photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp)
        except:
            photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp)
        joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT_b{base}.sav')
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
    elif MAP_SPEC:
        photo_uT = joblib.load(f'pretrain/photo_uT_b{base}.sav')

    if MAP_SPEC:
        if not PRE_NORM:
            dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv')
        dfspec_block = (dfspec * (base - 1)).round()
        assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <=
                                                  base - 1)
        spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys())
        spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False)
    else:
        spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv')

    spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1)
    spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False)

    if UPLOAD_SCI:
        username = '******'
        password = '******'
        # password = getpass.getpass()
        sciserver_token = Authentication.login(username, password)
        CasJobs.uploadPandasDataFrameToTable(
            dataFrame=photo_HH,
            tableName=f'{name}b{base}e{EXACT}std',
            context="MyDB")
コード例 #23
0
def main():
    try:
        os.mkdir(PRETRAIN)
    except:
        'lets GO'
    print(PREPRO_CUTOFF, PREPRO_NORM, PREPRO_STREAM, PREPRO_HH, PREPRO_UMAP,
          PREPRO_KMEAN, SAVE_ALL, PREDICT_ALL, UPLOAD_SCI)
    if PREPRO_NORM:
        print(
            f'=================LOADING N={num} Smoothing {ISSMTH} ================='
        )
        data1Ds, pc = process_dataset_pc(data_dir, num, pca_comp, ISSMTH, SMTH,
                                         TEST)
        intensity, pca_results = process_pca(data1Ds, pc, num)
        # df_pca=pd.DataFrame(pca_results, columns=list(range(pca_comp)))
        df_norm, mask, ftr_len0 = process_intensity(pca_results,
                                                    intensity,
                                                    pca_comp,
                                                    PREPRO_CUTOFF,
                                                    ONPCA,
                                                    ONINT,
                                                    r=0.01,
                                                    wdir=PRETRAIN)
        assert ftr_len0 == ftr_len
        mask2d = mask.reshape((num, 1004 * 1344))
        if SAVE_ALL:
            np.savetxt(f'{PRETRAIN}/mask_all.txt', mask)
        else:
            mask0 = mask2d[pidx]
            idxii = int(mask2d[:pidx].sum())
            idxjj = int(mask2d[:(pidx + 1)].sum())
            assert idxjj - idxii == mask0.sum()
            print(mask0.shape, mask.sum(), 'saving mask')
            np.savetxt(f'{PRETRAIN}/mask{pidx}.txt', mask0)
        # df_norm.to_csv(f'{PRETRAIN}/df_norm.csv',index=False)
        # df_normt=df_norm[idxii:idxjj]


#         df_normt.to_csv(f'{PRETRAIN}/df_norm{pidx}.csv',index=False)
# elif PREPRO_STREAM:
#     print(f'=================LOADING df_norm =================')
#     df_norm=pd.read_csv(f'{PRETRAIN}/df_norm.csv')

    if PREPRO_STREAM:
        print(f'=================ENCODING Base={base} =================')
        stream = process_rebin(df_norm, base, dtype)
        if SAVE_ALL:
            np.savetxt(f'{PRETRAIN}/stream_b{base}.txt', stream)
        else:
            stream0 = stream[idxii:idxjj]
            np.savetxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt', stream0)
    elif PREPRO_HH:
        print(f'=================LOADING STREAM =================')
        stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt')
        if not PREDICT_ALL:
            stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt')
    if PREPRO_HH:
        assert EXACT == 0
        topk = 20000
        print(f'=================DECODE {ftr_len} DIM =================')
        HH_pd = get_HH_pd(stream,
                          base,
                          ftr_len,
                          dtype,
                          EXACT,
                          topk,
                          r=16,
                          d=1000000,
                          c=None,
                          device=None)
        HH_pd.to_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv', index=False)
    elif PREPRO_UMAP:
        print(f'=================LOADING HH_pd==============')
        HH_pd = pd.read_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv')
        print(HH_pd.head())

    if PREPRO_UMAP:
        print(f'=================GETTING UMAP =================')
        # # lb,ub=int(HH_pd['freq'][0]*lbr),int(HH_pd['freq'][0])
        # HH_pdc=HH_pd[HH_pd['freq']>lb]
        # # print(len(HH_pdc),len(HH_pd),HH_pd['freq'][0],'lb',lb,'HHratio',lbr)
        # if len(HH_pdc)>20000:
        HH_pdc = HH_pd[:20000]
        print(len(HH_pdc), len(HH_pd), HH_pd['freq'][0])
        print(f'=================LOADING HH_pd==============')
        umapT = get_umap_pd(HH_pdc, list(range(ftr_len)))
        # print(HH_pdc.keys())
        HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False)
    elif PREPRO_KMEAN:
        HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv')

    if PREPRO_KMEAN:
        print(f'=================KMEAN CLUSTERING =================')
        kmap = get_kmean_lbl(HH_pdc, N_cluster, u1='u1', u2='u2')
        joblib.dump(kmap, f'{PRETRAIN}/kmap_k{N_cluster}e{EXACT}.sav')
        HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False)
    else:
        HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv')

    if PREDICT_ALL:
        print(f'=================PREDICTING ALL {num} LABEL==============')
        if not PREPRO_NORM: mask = np.loadtxt(f'{PRETRAIN}/mask_all.txt')
        if not PREPRO_HH: stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt')
        pred_k = get_pred_stream(stream,
                                 mask,
                                 HH_pdc,
                                 f'k{N_cluster}',
                                 val='HH',
                                 bg=0,
                                 color=0,
                                 sgn=1)
        pred_k = pred_k.reshape((num, 1004, 1344))
        print(
            f'=================SAVING PREDICTION of ALL {num} LABEL=============='
        )
        np.savetxt(f'{PRETRAIN}/pred_k{N_cluster}e{EXACT}.txt', pred_k)
    else:
        print(f'=================PREDICTING id{pidx} LABEL==============')
        if not PREPRO_NORM: mask0 = np.loadtxt(f'{PRETRAIN}/mask{pidx}.txt')
        if not PREPRO_HH:
            stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt')
        pred_k = get_pred_stream(stream0,
                                 mask0,
                                 HH_pdc,
                                 f'k{N_cluster}',
                                 val='HH',
                                 bg=0,
                                 color=0,
                                 sgn=1)
        pred_k = pred_k.reshape((1004, 1344))
        print(
            f'=================SAVING PREDICTION of id{pidx} LABEL=============='
        )
        np.savetxt(
            f'{PRETRAIN}/pred_k{N_cluster}{pidx}_f{name}b{base}sm1c3sige{EXACT}.txt',
            pred_k)

    if UPLOAD_SCI:
        username = '******'
        password = '******'
        # password = getpass.getpass()
        sciserver_token = Authentication.login(username, password)
        CasJobs.uploadPandasDataFrameToTable(
            dataFrame=HH_pdc,
            tableName=f'b{base}sm{SMTH}f{name}sig3e{EXACT}_v1',
            context="MyDB")
コード例 #24
0
ファイル: #downloadSDSS.py プロジェクト: pedfx/bidhu
import matplotlib.pyplot as plt
import skimage.io
import SciServer.CasJobs as CasJobs
import SciServer.SciDrive

query = """
select top 16 g.objId,g.ra,g.dec,g.petror90_r, dbo.fGetUrlFitsCFrame(F.FIELDiD,'r') as fieldURL
from galaxy g
  ,  field f
where f.run=g.run
  and f.camcol=g.camcol
  and f.field=g.field
  and f.rerun=g.rerun
"""
# query CasJobs table. Using DR12 as context
queryResponse = CasJobs.executeQuery(query, "DR12")
# parse results into pandas.DataFrame for further in memory processing
gals = pandas.read_csv(queryResponse, index_col=None)
gals['objId'] = gals['objId'].astype(np.int64)

width = 200
height = 200
pixelsize = 0.396
plt.figure(figsize=(15, 15))
subPlotNum = 1

for index, gal in gals.iterrows():
    scale = 2 * gal['petror90_r'] / pixelsize / width
    url = "http://skyservice.pha.jhu.edu/DR12/ImgCutout/getjpeg.aspx?ra=" + str(
        gal['ra'])
    url += "&dec=" + str(
コード例 #25
0
ファイル: Photo_Z.py プロジェクト: CheerfulUser/YSE_PZ
	def do(self,user='******',password='******',search=1,path_to_model='YSE_App/data_ingest/YSE_DNN_photoZ_model_315.hdf5'):
		"""
		Predicts photometric redshifts from RA and DEC points in SDSS

		An outline of the algorithem is:

		first pull from SDSS u,g,r,i,z magnitudes from SDSS; 
			should be able to handle a list/array of RA and DEC

		place u,g,r,i,z into a vector, append the derived information into the data array

		predict the information from the model

		return the predictions in the same order to the user

		inputs:
			Ra: list or array of len N, right ascensions of target galaxies in decimal degrees
			Dec: list or array of len N, declination of target galaxies in decimal degrees
			search: float, arcmin tolerance to search for the object in SDSS Catalogue
			path_to_model: str, filepath to saved model for prediction
		
		Returns:
			predictions: array of len N, photometric redshift of input galaxy

		"""

		try:
			nowdate = datetime.datetime.utcnow() - datetime.timedelta(1)
			from django.db.models import Q #HAS To Remain Here, I dunno why
			print('Entered the photo_z cron')		
			#save time b/c the other cron jobs print a time for completion

			transients = (Transient.objects.filter(Q(host__photo_z__isnull=True) & Q(host__isnull=False)))

			#print('Number of test transients:', len(transients))
			RA=[] #Needs to be list b/c don't know how many hosts are None
			DEC=[]
			outer_mask = [] #create an integer index mask that we will place values into because some hosts dont have a RA and DEC assigned 
			transients_withhost = []
			
			for i,transient_obj in enumerate(transients):
				if transient_obj.host != None:
					RA.append(transient_obj.host.ra)
					DEC.append(transient_obj.host.dec)
					outer_mask.append(i) #provides integer index mask

			outer_mask = np.array(outer_mask) #make that an array

			N_outer = len(transients) #gives size of returned array

			Ra = np.array(RA)
			Dec = np.array(DEC)

			N = len(Ra)#gives size of query array
			Q = N//1000#decompose the length of transients needing classification

			if N%1000 != 0: 
				Q=Q+1 #catch remainder and start a new batch
			total_job = [] #store all pandas job dataframes
			for j in range(Q): #iterate over batches
				if j == (Q-1):
					Ra_batch = Ra[j*1000:((j+1)*1000 + N%1000)] #grab batch remainder
					Dec_batch = Dec[j*1000:((j+1)*1000 + N%1000)]
				else:
					Ra_batch = Ra[j*1000:(j+1)*1000] #other wise grab batch of 1000
					Dec_batch = Dec[j*1000:(j+1)*1000]

				hold=[] #a list for holding the strings that I want to place into an sql query
				for val in range(len(Ra_batch)):
					string = '({},{},{}),|'.format(str(val),str(Ra[val]),str(Dec[val]))
					hold.append(string)

				#Now construct the full query
				sql = "CREATE TABLE #UPLOAD(|id INT PRIMARY KEY,|up_ra FLOAT,|up_dec FLOAT|)|INSERT INTO #UPLOAD|	VALUES|"
				for data in hold:
					sql = sql + data
				#there is a comma that needs to be deleted from the last entry for syntax to work
				sql = sql[0:(len(sql)-2)] + '|'
				#append the rest to it
				sql = sql + "SELECT|p.u,p.g,p.r,p.i,p.z,p.extinction_u,p.extinction_g,p.extinction_r,p.extinction_i,p.extinction_z,p.petroRad_u,p.petroRad_g,p.petroRad_r,p.petroRad_i,p.petroRad_z,p.petroR50_r,p.petroR90_r,zi.e_bv_sfd|FROM #UPLOAD as U|OUTER APPLY dbo.fGetNearestObjEq((U.up_ra),(U.up_dec),{}) as N|LEFT JOIN PhotoObjAll AS p ON N.objid=p.objID|JOIN SpecObjAll za on (p.objID = za.bestObjID)|JOIN galSpecInfo zi ON (zi.SpecObjID = za.specObjID)".format(str(search))
				#change all | to new line: when we change to Unix system will need to change this new line 
				sql = sql.replace('|','\n')
				#login, change to some other credentials later
				Authentication.login('awe2','StandardPassword')
				job = CasJobs.executeQuery(sql,'DR12','pandas') #this line sends and retrieves the result
				print('Query {} of {} complete'.format(j+1,Q))
				job['dered_u'] = job['u'].values - job['extinction_u'].values
				job['dered_g'] = job['g'].values - job['extinction_g'].values
				job['dered_r'] = job['r'].values - job['extinction_r'].values
				job['dered_i'] = job['i'].values - job['extinction_i'].values
				job['dered_z'] = job['z'].values - job['extinction_z'].values
				
				job['u-g']= job['dered_u'].values - job['dered_g'].values
				job['g-r']= job['dered_g'].values - job['dered_r'].values
				job['r-i']= job['dered_r'].values - job['dered_i'].values
				job['i-z']= job['dered_i'].values - job['dered_z'].values
				job['u_over_z']= job['dered_u'].values / job['dered_z'].values
				
				job['C'] = job['petroR90_r'].values/job['petroR50_r'].values
				total_job.append(job)

			print('left the query loop')
			query_result = pd.concat(total_job)
			#now feed to a RF model for prediction
			X = query_result[['dered_u','dered_g','dered_r','dered_i','dered_z','u-g','g-r','r-i','i-z','u_over_z','petroRad_u','petroRad_g','petroRad_r','petroRad_i','petroRad_z','petroR50_r','petroR90_r','C','e_bv_sfd']].values
			print(X.shape)
			#define and load in the model
			def create_model(learning_rate):
    
				model = keras.Sequential([])
				model.add(keras.layers.Dense(input_shape=(19,),units=19,activation=keras.activations.linear)) #tried relu
				#model.add(keras.layers.Dropout(rate=0.1))
				model.add(keras.layers.Dense(units=19,activation=tf.nn.relu)) 
				#model.add(keras.layers.Dropout(rate=0.1))
				model.add(keras.layers.Dense(units=19,activation=tf.nn.relu))
				#model.add(keras.layers.Dropout(rate=0.1))
				model.add(keras.layers.Dense(units=19,activation=tf.nn.relu)) #tf.nn.relu
				#model.add(keras.layers.Dropout(rate=0.1))
				model.add(keras.layers.Dense(units=315,activation=keras.activations.softmax))
				
				#RMS = keras.optimizers.RMSprop(learning_rate=learning_rate)
				adam = keras.optimizers.Adam(lr=learning_rate)
				model.compile(optimizer=adam, loss='categorical_crossentropy') 
				return(model)

			keras.backend.clear_session()
			model = create_model(learning_rate = 1e-3)#couldve been anything for this, just gonna predict
			model.load_weights(path_to_model)
			
			#Need to deal with NANs now since many objects are outside the SDSS footprint, later models will learn to deal with this
			#ideas: need to retain a mask of where the nans are in the row
			mask = np.invert((query_result.isna().any(1).values)) #true was inside SDSS footprint
			#also will want this mask in indices so we can insert the predicted data correctly
			indices=[]
			for i,val in enumerate(mask):
				if val == True:
					indices.append(i)
			
			#predict on data that is not NAN
			predictions = model.predict(X[mask,:], verbose=2)			
			
			#make nan array with size of what user asked for
			return_me = np.ones(N)*np.nan
			#now replace nan with the predictions in order
			return_me[indices] = predictions
			return_me_outer = np.ones(N_outer) * np.nan
			return_me_outer[outer_mask] = return_me
			print('time taken:', datetime.datetime.utcnow() - nowdate)
			print('uploading now')
			tz,mpz = [],[]
			for t,pz in zip(transients,return_me):
				if pz != pz: continue
				host = t.host
				#import pdb; pdb.set_trace()
				host.photo_z = pz
				host.save()
				tz += [host.redshift]
				mpz += [pz]
			plt.plot(tz,mpz,'.')
			plt.savefig('test.png')

			print('time taken with upload:', datetime.datetime.utcnow() - nowdate)
		except Exception as e:
			exc_type, exc_obj, exc_tb = sys.exc_info()
			print("""Photo-z cron failed with error %s at line number %s"""%(e,exc_tb.tb_lineno))
コード例 #26
0
 def test_CasJobs_getPandasDataFrameFromQuery(self):
     #CasJobs.getPandasDataFrameFromQuery
     df = CasJobs.getPandasDataFrameFromQuery(queryString=CasJobs_TestQuery,
                                              context=CasJobs_TestDatabase)
     self.assertEqual(df.to_csv(index=False), CasJobs_TestTableCSV)
コード例 #27
0
ファイル: skyserver.py プロジェクト: ViskaWei/CountSketch2020
import umap
import os
from collections import Counter
import seaborn as sns
import warnings
warnings.simplefilter("ignore")
from SciServer import Authentication, CasJobs
username = '******'

password = getpass.getpass()
sciserver_token = Authentication.login(username, password)
sciserver_token
# https://github.com/sciserver/SciScript-Python/blob/master/Examples/Examples_SciScript-Python.ipynb

result = CasJobs.uploadPandasDataFrameToTable(dataFrame=df,
                                              tableName=CasJobs_TestTableName2,
                                              context="MyDB")
sql_n, ii = 20, 0
sql0 = """
       select
               {}
              objid, class, subclass, u-g as ug, g-r as gr, r-i as ri, i-z as iz, 
              u-r as ur, g-i as gi, r-z as rz,
              u-i as ui, g-z as gz, u-z as uz,  (z-15)/7 as z 
from (
       SELECT p.objid, s.class AS class, s.SUBCLASS AS subclass,          
            p.psfMag_u AS u, 
            p.psfMag_g AS g, 
            p.psfMag_r AS r, 
            p.psfMag_i AS i, 
            p.psfMag_z AS z
コード例 #28
0
def testTurbulence():
    token=testTurbulenceToken()
    print("turbulence token = ",token)
    schemaName = CasJobs.getSchemaName(token)
    print("schemaname=",schemaName)
コード例 #29
0
 def submit_job(self, querystr, context="DR15"):
     jobId = CasJobs.submitJob(sql=querystr, context=context)
     print("Query Job is submitted. JobID={}".format(jobId))
     return jobId
コード例 #30
0
password = "******"

token = Authentication.login(username, password)

user = Authentication.getKeystoneUserWithToken(token)

#csv = open("./metadata_table.csv", 'r')
#csv_str = csv.read()
#csv.close()

#success = CasJobs.uploadCSVDataToTable(csv_str, "osc_metadata")

#print(success)

query1 = "create table osc_metadata (FileName varchar(255), FileSize varchar(255))"
CasJobs.executeQuery(sql=query1, format='json')

# get file name and size
f = argv[1]
f_name = os.path.basename(f)
f_size = os.path.getsize(f)

print(f_name)

print(f_size)

# upload data to table
query2 = "insert into osc_metadata (FileName, FileSize) values (" + "'" + f_name[
    0:2] + "''" + f_name[2:3] + "''" + f_name[3:12] + "''" + f_name[
        12] + "''" + f_name[13:] + "'" + ", " + "'" + str(f_size) + "'" + ")"
コード例 #31
0
ファイル: #downloadSDSS.py プロジェクト: pedfx/BIDHU
import skimage.io
import SciServer.CasJobs as CasJobs
import SciServer.SciDrive


query = """
select top 16 g.objId,g.ra,g.dec,g.petror90_r, dbo.fGetUrlFitsCFrame(F.FIELDiD,'r') as fieldURL
from galaxy g
  ,  field f
where f.run=g.run
  and f.camcol=g.camcol
  and f.field=g.field
  and f.rerun=g.rerun
"""
# query CasJobs table. Using DR12 as context
queryResponse = CasJobs.executeQuery(query, "DR12")
# parse results into pandas.DataFrame for further in memory processing
gals = pandas.read_csv(queryResponse, index_col=None)
gals['objId'] = gals['objId'].astype(np.int64)

width = 200
height = 200
pixelsize = 0.396
plt.figure(figsize=(15, 15))
subPlotNum = 1

for index, gal in gals.iterrows():
	scale = 2 * gal['petror90_r'] / pixelsize / width
	url = "http://skyservice.pha.jhu.edu/DR12/ImgCutout/getjpeg.aspx?ra=" + str(gal['ra'])
	url += "&dec=" + str(gal['dec']) + "&scale=""" + str(scale) + "&width=" + str(width)
	url += "&height=" + str(height)
コード例 #32
0
def testCasJobsTables():
    schema= CasJobs.getSchemaName()
    print("schema=",schema)
    tables= CasJobs.getTables("MyDB")
    print(tables)
コード例 #33
0
ファイル: split_data.py プロジェクト: tsfraser/HackTheBubble
# Don't show python warnings
warnings.filterwarnings('ignore')

print("SciServer library imported")
print('Supporting libraries imported')

print('Settings applied')

# This query finds all objects from galSpecInfo
# with extra data pulled from PhotoObjAll.
query = """
SELECT g.specObjID, p.type, p.u, p.g, p.r, p.i, p.z, g.spectrotype
FROM galSpecInfo AS g
    JOIN PhotoObjAll as p ON p.specObjID = g.specObjID
ORDER BY g.specObjID
"""

# Then, query the database. The answer is a table that is
# being returned as a dataframe.
q = CasJobs.executeQuery(query, "dr14")

# Check data and drop any row that has NaN.
q.dropna()
# Check that all types in columns are as expected.
q.info()

# Write to file.
q.to_csv("result.csv")

print("SQL query finished.")
コード例 #34
0
def testUploadDataFrame(df,token, tablename):
    response= CasJobs.uploadPandasDataFrameToTable(df,tablename,token=token)
コード例 #35
0
#Defining databse context and query, and other variables

CasJobs_TestDatabase = "MyDB"
CasJobs_TestQuery = "select 4 as Column1, 5 as Column2 "
CasJobs_TestTableName1 = "MyNewtable1"
CasJobs_TestTableName2 = "MyNewtable2"
CasJobs_TestTableCSV = u"Column1,Column2\n4,5\n"
CasJobs_TestFitsFile = "SciScriptTestFile.fits"
CasJobs_TestCSVFile = "SciScriptTestFile.csv"


# In[ ]:

#get user schema info

casJobsId = CasJobs.getSchemaName()
print(casJobsId)


# In[ ]:

#get info about tables inside MyDB database context:

tables = CasJobs.getTables(context="MyDB")
print(tables)


# In[ ]:

#execute a quick SQL query:
コード例 #36
0
f.peak,f.integr,f.rms
FROM SpecPhoto AS sp
JOIN wise_xmatch AS xm ON sp.objid = xm.sdss_objid
JOIN FIRST AS f ON sp.objid = f.objid
JOIN wise_allsky AS w ON xm.wise_cntr = w.cntr
WHERE
(sp.psfmag_r-sp.extinction_r > 0)
"""

#quick synchronous job - probably isn't what you want for any meaningful runs:
#queryResponse = CasJobs.executeQuery(query, "dr14", format='pandas')
#gals = queryResponse

#check tables in myDB
tname = 'test'
data = CasJobs.getTables('MyDB')
if data:
    print(data)
    print(
        'There are already tables in your database under that name, removing them before doing new query...'
    )
    #CLEAN UP: delete table from myDB
    SkyQuery.dropTable(tableName=tname, datasetName='myDB')

#long asynchronous job
print(
    'submitting SQL job to SciServer... could take some time depending on query length and SciServer load...'
)
jobID = CasJobs.submitJob(query + "into myDB." + tname, "dr14")
CasJobs.waitForJob(jobID, verbose=True)
コード例 #37
0
 def test_CasJobs_getSchemaName(self):
     casJobsId = CasJobs.getSchemaName()
     self.assertNotEqual(casJobsId, "")