def test_CasJobs_submitJob(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery + " into MyDB." + CasJobs_TestTableName1, context=CasJobs_TestDatabase) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) df = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName1, context="MyDB", format="csv") self.assertNotEqual(jobId, "")
def testCasJobsSubmit(token): tbl="mriiscplanck1_"+str(randint(0,1000000)) sql="""select top 10 galaxyId,snapnum,stellarmass into """+tbl+""" from MRIIscPlanck1""" jobId= CasJobs.submitJob(sql,context="Henriques2015a",token=token) print("jobId=",jobId) jobDesc = CasJobs.waitForJob(jobId) print( jobDesc)
def wait_jobdone(self, jobId): jobStatus = CasJobs.getJobStatus(jobId)['Status'] while jobStatus < 2: time.sleep(10) jobStatus = CasJobs.getJobStatus(jobId)['Status'] if jobStatus in [2, 3, 4]: raise RuntimeError("Job is canceled or failed") print("Job {} is finished".format(jobId)) return jobStatus
def createQueries(username, password, amount, table_name, context): CasJobs_Query = "SELECT top " + str( amount ) + " statement into [myscratch:default]." + table_name + " FROM sdssweblogs.SqlLog WHERE CHARINDEX('FROM PhotoObjAll', statement) > 0 AND CHARINDEX('p.dec BETWEEN', statement) > 0 AND CHARINDEX('p.ra BETWEEN', statement) > 0 AND access='Skyserver.Search.SQL'" login(username, password) jobId = CasJobs.submitJob(sql=CasJobs_Query, context=context) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) print( "Data has been saved on sciserver at context myscratch with tablename " + table_name)
def metadata_worker(): while True: global consumer token = consumer.poll(0) # file_name = input("Input file name \n") if token is None: break print("all done") else: token = msgpack.unpackb(token, raw=True) if token[0] == 'metadata': CasJobs.executeQuery("INSERT Successful INTO TABLE test_table", context='MyDB', format='json')
def testCasJobsQuery(token): sql="""select top 10 galaxyId,snapnum,stellarmass from MRIIscPlanck1""" queryResponse=CasJobs.executeQuery(sql,context="Henriques2015a",token=token) gals= pandas.read_csv(queryResponse,index_col=None) print gals return gals
def test_CasJobs_getNumpyArrayFromQuery(self): #CasJobs.getNumpyArrayFromQuery array = CasJobs.getNumpyArrayFromQuery(queryString=CasJobs_TestQuery, context=CasJobs_TestDatabase) newArray = pandas.read_csv(StringIO(CasJobs_TestTableCSV), index_col=None).as_matrix() self.assertEqual(array.all(), newArray.all())
def retrieve_sdss_photometry(ra: float, dec: float): """ Retrieve SDSS photometry for a given field, in a 0.2 x 0.2 degree box centred on the passed coordinates coordinates. (Note - the width of the box is in RA degrees, not corrected for spherical distortion) :param ra: Right Ascension of the centre of the desired field, in degrees. :param dec: Declination of the centre of the desired field, in degrees. :return: Retrieved photometry table, as a pandas dataframe, if successful; if not, None. """ try: from SciServer import Authentication, CasJobs except ImportError: print("It seems that SciScript/SciServer is not installed, or not accessible to this environment. " "\nIf you wish to automatically download SDSS data, please install " "\nSciScript (https://github.com/sciserver/SciScript-Python); " "\notherwise, retrieve the data manually from " "\nhttp://skyserver.sdss.org/dr16/en/tools/search/sql.aspx") return None print(f"Querying SDSS DR16 archive for field centring on RA={ra}, DEC={dec}") user = keys['sciserver_user'] password = keys["sciserver_pwd"] Authentication.login(UserName=user, Password=password) # Construct an SQL query to send to SciServer query = "SELECT objid,ra,dec" for f in sdss_filters: query += f",psfMag_{f},psfMagErr_{f},fiberMag_{f},fiberMagErr_{f},fiber2Mag_{f},fiber2MagErr_{f},petroMag_{f},petroMagErr_{f} " query += "FROM PhotoObj " query += f"WHERE ra BETWEEN {ra - 0.1} AND {ra + 0.1} " query += f"AND dec BETWEEN {dec - 0.1} AND {dec + 0.1} " print(f"Retrieving photometry from SDSS DR16 via SciServer for field at {ra}, {dec}...") df = CasJobs.executeQuery(sql=query, context='DR16') if len(df.index) == 0: df = None return df
def createData(username, password, dec_low, dec_high, ra_low, ra_high, table_name, context, all_data): CasJobs_Database = "DR16" CasJobs_Query = "Select ra,dec into [myscratch:default]." + table_name + " from dr16.photoobjall " if (not all_data): CasJobs_Query += "where dec between " + str(dec_low) + " and " + str( dec_high) + " and ra between " + str(ra_low) + " and " + str( ra_high) login(username, password) jobId = CasJobs.submitJob(sql=CasJobs_Query, context=context) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) print( "Data has been saved on sciserver at context myscratch with tablename " + table_name)
def download_query(self, query, table_name, save_to='data', context='MyDB'): """Perform a query based on the context given""" if context != 'MyDB': jobid = cj.submitJob(query, context=context) cj.waitForJob(jobid, verbose=True) job_status = cj.getJobStatus(jobid) if job_status['Success'] != 5: raise CasJobsError( 'Error Performing the query, {}'.format(job_status)) # Now the results are safely saved in myDB # You can go ahead and download by using a session self.download_csv(table_name) self._download_from_scidrive(table_name, save_to)
def testCasJobsQuery(token): sql="""select top 10 galaxyId,snapnum,stellarmass from MRIIscPlanck1""" gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="pandas", token=token) print("PANDAS") print(gals) gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="csv", token=token) print("CSV") print(gals) gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="json", token=token) print("JSON") print(gals) gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="readable", token=token) print("READABLE") gals=pandas.read_csv(gals) print(gals) gals= CasJobs.executeQuery(sql,context="Henriques2015a",format="blabal", token=token) print("ERROR") print(gals) return gals
def load_sncoor(self, sncoor, sncoor_tname='sncoor', xmatch_tname='xmatchres'): tables = CasJobs.getTables(context="MyDB") tablenames = [x['Name'] for x in tables] for tname in [sncoor_tname, xmatch_tname]: if tname in tablenames: self.delete_table(tname) self.sncoor_tname = sncoor_tname self.xmatch_tname = xmatch_tname self.sncoor = sncoor
def test_CasJobs_writeFitsFileFromQuery(self): #CasJobs.getFitsFileFromQuery try: result = CasJobs.writeFitsFileFromQuery( fileName=CasJobs_TestFitsFile, queryString=CasJobs_TestQuery, context="MyDB") self.assertEqual(result, True) self.assertEqual(os.path.isfile(CasJobs_TestFitsFile), True) finally: try: os.remove(CasJobs_TestFitsFile) except: pass
def test_CasJobs_uploadPandasDataFrameToTable_uploadCSVDataToTable(self): try: df = pandas.read_csv(StringIO(CasJobs_TestTableCSV), index_col=None) result = CasJobs.uploadPandasDataFrameToTable( dataFrame=df, tableName=CasJobs_TestTableName2, context="MyDB") table = CasJobs.executeQuery(sql="select * from " + CasJobs_TestTableName2, context="MyDB", format="pandas") result2 = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context="MyDB", format="csv") self.assertEqual(result, True) self.assertItemsEqual(table, df) result = CasJobs.uploadCSVDataToTable( csvData=CasJobs_TestTableCSV, tableName=CasJobs_TestTableName2, context="MyDB") df2 = CasJobs.executeQuery(sql="select * from " + CasJobs_TestTableName2, context="MyDB", format="pandas") result2 = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context="MyDB", format="csv") self.assertEqual(result, True) self.assertItemsEqual(df, df2) finally: try: csv = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context="MyDB", format="csv") except: pass
def test_CasJobs_cancelJob(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase) isCanceled = CasJobs.cancelJob(jobId=jobId) self.assertEqual(isCanceled, True)
def test_CasJobs_waitForJob(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) self.assertGreaterEqual(jobDescription["Status"], 3)
# <li><em>Name:</em> the name of the table</li> # <li><em>Rows:</em> the number of rows in the table</li> # <li><em>Size:</em> the size of the table in kilobytes</li> # </ul> # # The code cell gives two options for printing the list of tables: using Python's <code>pprint</code> library or using the <code>tables_formatted(tableList)</code> convenience function defined above. The convenience function sorts the list of tables alphabetically by name, and displays the dates into datetime values. Try uncommenting and commenting those lines in the Code cell below to see what both options do. # In[ ]: #check_token = SciServer.Authentication.getToken() my_token = SciServer.Authentication.login('portmanm', 'uB53!BYLQXr5XhN') #this_context = "MyDB" # Your MyDB this_context = 'DR7' # SDSS Data Release 14 tables = CasJobs.getTables(context=this_context) #print('Tables in '+this_context+':\n') #pprint(tables) # Standard human-readable printing using Python's pprint module # tables_formatted(tables) # Sorting and better printing using a convenience function # ## Run a quick query and get results directly # # Now that you know what contexts (datasets) are available to you, and you know what tables can be found in those contexts, you are ready to write and submit a query to that context. A query is a request for data, written in SQL (Structured Query Language), a programming language designed for efficient database searches. # # SkyServer features a <a href="http://skyserver.sdss.org/public/en/help/howto/search/searchhowtohome.aspx" target="_blank">tutorial for learning SQL</a>, as well as <a href="http://skyserver.sdss.org/public/en/help/docs/sql_help.aspx" target="_blank">tips for writing good queries</a> and a long list of <a href="http://skyserver.sdss.org/public/en/help/docs/realquery.aspx" target="_blank">sample queries</a> that you can adapt to create your queries (links open in new windows). # # Once you have written a query, you can get results by running (executing) it in CasJobs. To run a query in CasJobs directly from a Code cell in SciServer Compute, use the <strong><code>CasJobs.executeQuery(sql,...)</code></strong> function. The function takes as input a string containing a properly-formatted SQL query (and optional parameters listed below), and returns a table containing query results (in one of several formats with a default of a <a href="https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html" target="_blank">pandas dataframe</a>). # # The <em>sql</em> parameter is required. The <em>context</em> parameter is recommended to explicitly state the context to which the query will be submitted; the default value is 'MyDB'. For a full list of parameters taken by the <code>CasJobs.executeQuery(sql,...)</code> function, see <a href="http://www.sciserver.org/docs/sciscript-python/SciServer.html#module-SciServer.CasJobs" target="_blank">its documentation on the SciServer documentation site</a>. #
def test_CasJobs_getJobStatus(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase) jobDescription = CasJobs.getJobStatus(jobId) self.assertEqual(jobDescription["JobID"], jobId)
def test_CasJobs_getTables(self): tables = CasJobs.getTables(context="MyDB")
def test_CasJobs_executeQuery(self): df = CasJobs.executeQuery(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase, format="pandas") self.assertEqual(CasJobs_TestTableCSV, df.to_csv(index=False))
FROM fGetNearbyObjEq(" + ra + ", " + dec + ", " + radius + ") n, Star p \ LEFT OUTER JOIN specObj s ON s.bestObjID = p.objID \ LEFT OUTER JOIN photoz pz ON pz.objid = p.objid \ WHERE n.objID = p.objID AND(s.class IS NULL or s.class='STAR') " # Query for galaxy with modelMag and cmodelMag and extinctions q_gal = "SELECT p.ra, p.dec, p.modelMag_u, p.modelMagErr_u, p.modelMag_g, p.modelMagErr_g, p.modelMag_r, p.modelMagErr_r, p.modelMag_i, p.modelMagErr_i, p.modelMag_z, p.modelMagErr_z, \ p.petroMag_u, p.petroMagErr_u, p.petroMag_g, p.petroMagErr_g, p.petroMag_r, p.petroMagErr_r, p.petroMag_i, p.petroMagErr_i, p.petroMag_z, p.petroMagErr_z, \ p.cmodelMag_u, p.cmodelMagErr_u, p.cmodelMag_g, p.cmodelMagErr_g, p.cmodelMag_r, p.cmodelMagErr_r, p.cmodelMag_i, p.cmodelMagErr_i, p.cmodelMag_z, p.cmodelMagErr_z, \ extinction_u, extinction_g, extinction_r, extinction_i, extinction_z, clean, s.z, s.zerr, pz.z as photoz, pz.zerr as photozerr \ FROM fGetNearbyObjEq(" + ra + ", " + dec + ", " + radius + ") n, Galaxy p \ LEFT OUTER JOIN specObj s ON s.bestObjID = p.objID \ LEFT OUTER JOIN photoz pz ON pz.objid = p.objid \ WHERE n.objID = p.objID AND(s.class IS NULL or s.class='GALAXY') " star_data = CasJobs.executeQuery(q_star, context=context, format='pandas') gal_data = CasJobs.executeQuery(q_gal, context=context, format='pandas') star_tab = Table(star_data.values, names=star_data.columns, meta={ 'clustername': clustername, 'ra': ra, 'dec': dec, 'radius': radius }, dtype=[float for x in range(len(star_data.columns))]) gal_tab = Table(gal_data.values, names=gal_data.columns, meta={ 'clustername': clustername,
def main(): try: os.mkdir(PRETRAIN) except: print('here we go!') if PRE_NORM: dfphoto, dfspec, df_lbl = prepro_photo_spec(PHOTO_DATA, SPEC_DATA, base, ftr, wpath=PRETRAIN) if PRE_HH: print('=====================ENCODE PHOTO ====================') photo_stream = get_encode_stream(dfphoto, base, dtype) spec_stream = get_encode_stream(dfspec, base, dtype) # np.savetxt(f'{PRETRAIN}/photo_stream.txt',photo_stream) # np.savetxt(f'{PRETRAIN}/spec_stream.txt',spec_stream) df_lbl['encode'] = spec_stream df_lbl.to_csv(f'{PRETRAIN}/spec_lbl_encode.csv', index=False) photo_HH = get_HH_pd(photo_stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) if not EXACT: assert len(photo_HH) <= topk else: photo_HH = photo_HH[:topk] photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) spec_HH = get_HH_pd(spec_stream, base, ftr_len, dtype, True, topk) spec_HH.to_csv(f'{PRETRAIN}/spec_HH.csv', index=False) elif PRE_UMAP or MAP_SPEC: photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv') spec_HH = pd.read_csv(f'{PRETRAIN}/spec_HH.csv') df_lbl = pd.read_csv(f'{PRETRAIN}/spec_lbl_encode.csv') print('photo_HH', photo_HH) print('spec_HH', spec_HH) if PRE_UMAP: print('=============GETTING UMAP============') try: photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp) except: photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp) joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT_b{base}.sav') photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif MAP_SPEC: photo_uT = joblib.load(f'pretrain/photo_uT_b{base}.sav') if MAP_SPEC: if not PRE_NORM: dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv') dfspec_block = (dfspec * (base - 1)).round() assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <= base - 1) spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys()) spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False) else: spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv') spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1) spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=photo_HH, tableName=f'{name}b{base}e{EXACT}std', context="MyDB")
def main(): try: os.mkdir(PRETRAIN) except: 'lets GO' print(PREPRO_CUTOFF, PREPRO_NORM, PREPRO_STREAM, PREPRO_HH, PREPRO_UMAP, PREPRO_KMEAN, SAVE_ALL, PREDICT_ALL, UPLOAD_SCI) if PREPRO_NORM: print( f'=================LOADING N={num} Smoothing {ISSMTH} =================' ) data1Ds, pc = process_dataset_pc(data_dir, num, pca_comp, ISSMTH, SMTH, TEST) intensity, pca_results = process_pca(data1Ds, pc, num) # df_pca=pd.DataFrame(pca_results, columns=list(range(pca_comp))) df_norm, mask, ftr_len0 = process_intensity(pca_results, intensity, pca_comp, PREPRO_CUTOFF, ONPCA, ONINT, r=0.01, wdir=PRETRAIN) assert ftr_len0 == ftr_len mask2d = mask.reshape((num, 1004 * 1344)) if SAVE_ALL: np.savetxt(f'{PRETRAIN}/mask_all.txt', mask) else: mask0 = mask2d[pidx] idxii = int(mask2d[:pidx].sum()) idxjj = int(mask2d[:(pidx + 1)].sum()) assert idxjj - idxii == mask0.sum() print(mask0.shape, mask.sum(), 'saving mask') np.savetxt(f'{PRETRAIN}/mask{pidx}.txt', mask0) # df_norm.to_csv(f'{PRETRAIN}/df_norm.csv',index=False) # df_normt=df_norm[idxii:idxjj] # df_normt.to_csv(f'{PRETRAIN}/df_norm{pidx}.csv',index=False) # elif PREPRO_STREAM: # print(f'=================LOADING df_norm =================') # df_norm=pd.read_csv(f'{PRETRAIN}/df_norm.csv') if PREPRO_STREAM: print(f'=================ENCODING Base={base} =================') stream = process_rebin(df_norm, base, dtype) if SAVE_ALL: np.savetxt(f'{PRETRAIN}/stream_b{base}.txt', stream) else: stream0 = stream[idxii:idxjj] np.savetxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt', stream0) elif PREPRO_HH: print(f'=================LOADING STREAM =================') stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt') if not PREDICT_ALL: stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt') if PREPRO_HH: assert EXACT == 0 topk = 20000 print(f'=================DECODE {ftr_len} DIM =================') HH_pd = get_HH_pd(stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) HH_pd.to_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv', index=False) elif PREPRO_UMAP: print(f'=================LOADING HH_pd==============') HH_pd = pd.read_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv') print(HH_pd.head()) if PREPRO_UMAP: print(f'=================GETTING UMAP =================') # # lb,ub=int(HH_pd['freq'][0]*lbr),int(HH_pd['freq'][0]) # HH_pdc=HH_pd[HH_pd['freq']>lb] # # print(len(HH_pdc),len(HH_pd),HH_pd['freq'][0],'lb',lb,'HHratio',lbr) # if len(HH_pdc)>20000: HH_pdc = HH_pd[:20000] print(len(HH_pdc), len(HH_pd), HH_pd['freq'][0]) print(f'=================LOADING HH_pd==============') umapT = get_umap_pd(HH_pdc, list(range(ftr_len))) # print(HH_pdc.keys()) HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False) elif PREPRO_KMEAN: HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv') if PREPRO_KMEAN: print(f'=================KMEAN CLUSTERING =================') kmap = get_kmean_lbl(HH_pdc, N_cluster, u1='u1', u2='u2') joblib.dump(kmap, f'{PRETRAIN}/kmap_k{N_cluster}e{EXACT}.sav') HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False) else: HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv') if PREDICT_ALL: print(f'=================PREDICTING ALL {num} LABEL==============') if not PREPRO_NORM: mask = np.loadtxt(f'{PRETRAIN}/mask_all.txt') if not PREPRO_HH: stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt') pred_k = get_pred_stream(stream, mask, HH_pdc, f'k{N_cluster}', val='HH', bg=0, color=0, sgn=1) pred_k = pred_k.reshape((num, 1004, 1344)) print( f'=================SAVING PREDICTION of ALL {num} LABEL==============' ) np.savetxt(f'{PRETRAIN}/pred_k{N_cluster}e{EXACT}.txt', pred_k) else: print(f'=================PREDICTING id{pidx} LABEL==============') if not PREPRO_NORM: mask0 = np.loadtxt(f'{PRETRAIN}/mask{pidx}.txt') if not PREPRO_HH: stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt') pred_k = get_pred_stream(stream0, mask0, HH_pdc, f'k{N_cluster}', val='HH', bg=0, color=0, sgn=1) pred_k = pred_k.reshape((1004, 1344)) print( f'=================SAVING PREDICTION of id{pidx} LABEL==============' ) np.savetxt( f'{PRETRAIN}/pred_k{N_cluster}{pidx}_f{name}b{base}sm1c3sige{EXACT}.txt', pred_k) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=HH_pdc, tableName=f'b{base}sm{SMTH}f{name}sig3e{EXACT}_v1', context="MyDB")
import matplotlib.pyplot as plt import skimage.io import SciServer.CasJobs as CasJobs import SciServer.SciDrive query = """ select top 16 g.objId,g.ra,g.dec,g.petror90_r, dbo.fGetUrlFitsCFrame(F.FIELDiD,'r') as fieldURL from galaxy g , field f where f.run=g.run and f.camcol=g.camcol and f.field=g.field and f.rerun=g.rerun """ # query CasJobs table. Using DR12 as context queryResponse = CasJobs.executeQuery(query, "DR12") # parse results into pandas.DataFrame for further in memory processing gals = pandas.read_csv(queryResponse, index_col=None) gals['objId'] = gals['objId'].astype(np.int64) width = 200 height = 200 pixelsize = 0.396 plt.figure(figsize=(15, 15)) subPlotNum = 1 for index, gal in gals.iterrows(): scale = 2 * gal['petror90_r'] / pixelsize / width url = "http://skyservice.pha.jhu.edu/DR12/ImgCutout/getjpeg.aspx?ra=" + str( gal['ra']) url += "&dec=" + str(
def do(self,user='******',password='******',search=1,path_to_model='YSE_App/data_ingest/YSE_DNN_photoZ_model_315.hdf5'): """ Predicts photometric redshifts from RA and DEC points in SDSS An outline of the algorithem is: first pull from SDSS u,g,r,i,z magnitudes from SDSS; should be able to handle a list/array of RA and DEC place u,g,r,i,z into a vector, append the derived information into the data array predict the information from the model return the predictions in the same order to the user inputs: Ra: list or array of len N, right ascensions of target galaxies in decimal degrees Dec: list or array of len N, declination of target galaxies in decimal degrees search: float, arcmin tolerance to search for the object in SDSS Catalogue path_to_model: str, filepath to saved model for prediction Returns: predictions: array of len N, photometric redshift of input galaxy """ try: nowdate = datetime.datetime.utcnow() - datetime.timedelta(1) from django.db.models import Q #HAS To Remain Here, I dunno why print('Entered the photo_z cron') #save time b/c the other cron jobs print a time for completion transients = (Transient.objects.filter(Q(host__photo_z__isnull=True) & Q(host__isnull=False))) #print('Number of test transients:', len(transients)) RA=[] #Needs to be list b/c don't know how many hosts are None DEC=[] outer_mask = [] #create an integer index mask that we will place values into because some hosts dont have a RA and DEC assigned transients_withhost = [] for i,transient_obj in enumerate(transients): if transient_obj.host != None: RA.append(transient_obj.host.ra) DEC.append(transient_obj.host.dec) outer_mask.append(i) #provides integer index mask outer_mask = np.array(outer_mask) #make that an array N_outer = len(transients) #gives size of returned array Ra = np.array(RA) Dec = np.array(DEC) N = len(Ra)#gives size of query array Q = N//1000#decompose the length of transients needing classification if N%1000 != 0: Q=Q+1 #catch remainder and start a new batch total_job = [] #store all pandas job dataframes for j in range(Q): #iterate over batches if j == (Q-1): Ra_batch = Ra[j*1000:((j+1)*1000 + N%1000)] #grab batch remainder Dec_batch = Dec[j*1000:((j+1)*1000 + N%1000)] else: Ra_batch = Ra[j*1000:(j+1)*1000] #other wise grab batch of 1000 Dec_batch = Dec[j*1000:(j+1)*1000] hold=[] #a list for holding the strings that I want to place into an sql query for val in range(len(Ra_batch)): string = '({},{},{}),|'.format(str(val),str(Ra[val]),str(Dec[val])) hold.append(string) #Now construct the full query sql = "CREATE TABLE #UPLOAD(|id INT PRIMARY KEY,|up_ra FLOAT,|up_dec FLOAT|)|INSERT INTO #UPLOAD| VALUES|" for data in hold: sql = sql + data #there is a comma that needs to be deleted from the last entry for syntax to work sql = sql[0:(len(sql)-2)] + '|' #append the rest to it sql = sql + "SELECT|p.u,p.g,p.r,p.i,p.z,p.extinction_u,p.extinction_g,p.extinction_r,p.extinction_i,p.extinction_z,p.petroRad_u,p.petroRad_g,p.petroRad_r,p.petroRad_i,p.petroRad_z,p.petroR50_r,p.petroR90_r,zi.e_bv_sfd|FROM #UPLOAD as U|OUTER APPLY dbo.fGetNearestObjEq((U.up_ra),(U.up_dec),{}) as N|LEFT JOIN PhotoObjAll AS p ON N.objid=p.objID|JOIN SpecObjAll za on (p.objID = za.bestObjID)|JOIN galSpecInfo zi ON (zi.SpecObjID = za.specObjID)".format(str(search)) #change all | to new line: when we change to Unix system will need to change this new line sql = sql.replace('|','\n') #login, change to some other credentials later Authentication.login('awe2','StandardPassword') job = CasJobs.executeQuery(sql,'DR12','pandas') #this line sends and retrieves the result print('Query {} of {} complete'.format(j+1,Q)) job['dered_u'] = job['u'].values - job['extinction_u'].values job['dered_g'] = job['g'].values - job['extinction_g'].values job['dered_r'] = job['r'].values - job['extinction_r'].values job['dered_i'] = job['i'].values - job['extinction_i'].values job['dered_z'] = job['z'].values - job['extinction_z'].values job['u-g']= job['dered_u'].values - job['dered_g'].values job['g-r']= job['dered_g'].values - job['dered_r'].values job['r-i']= job['dered_r'].values - job['dered_i'].values job['i-z']= job['dered_i'].values - job['dered_z'].values job['u_over_z']= job['dered_u'].values / job['dered_z'].values job['C'] = job['petroR90_r'].values/job['petroR50_r'].values total_job.append(job) print('left the query loop') query_result = pd.concat(total_job) #now feed to a RF model for prediction X = query_result[['dered_u','dered_g','dered_r','dered_i','dered_z','u-g','g-r','r-i','i-z','u_over_z','petroRad_u','petroRad_g','petroRad_r','petroRad_i','petroRad_z','petroR50_r','petroR90_r','C','e_bv_sfd']].values print(X.shape) #define and load in the model def create_model(learning_rate): model = keras.Sequential([]) model.add(keras.layers.Dense(input_shape=(19,),units=19,activation=keras.activations.linear)) #tried relu #model.add(keras.layers.Dropout(rate=0.1)) model.add(keras.layers.Dense(units=19,activation=tf.nn.relu)) #model.add(keras.layers.Dropout(rate=0.1)) model.add(keras.layers.Dense(units=19,activation=tf.nn.relu)) #model.add(keras.layers.Dropout(rate=0.1)) model.add(keras.layers.Dense(units=19,activation=tf.nn.relu)) #tf.nn.relu #model.add(keras.layers.Dropout(rate=0.1)) model.add(keras.layers.Dense(units=315,activation=keras.activations.softmax)) #RMS = keras.optimizers.RMSprop(learning_rate=learning_rate) adam = keras.optimizers.Adam(lr=learning_rate) model.compile(optimizer=adam, loss='categorical_crossentropy') return(model) keras.backend.clear_session() model = create_model(learning_rate = 1e-3)#couldve been anything for this, just gonna predict model.load_weights(path_to_model) #Need to deal with NANs now since many objects are outside the SDSS footprint, later models will learn to deal with this #ideas: need to retain a mask of where the nans are in the row mask = np.invert((query_result.isna().any(1).values)) #true was inside SDSS footprint #also will want this mask in indices so we can insert the predicted data correctly indices=[] for i,val in enumerate(mask): if val == True: indices.append(i) #predict on data that is not NAN predictions = model.predict(X[mask,:], verbose=2) #make nan array with size of what user asked for return_me = np.ones(N)*np.nan #now replace nan with the predictions in order return_me[indices] = predictions return_me_outer = np.ones(N_outer) * np.nan return_me_outer[outer_mask] = return_me print('time taken:', datetime.datetime.utcnow() - nowdate) print('uploading now') tz,mpz = [],[] for t,pz in zip(transients,return_me): if pz != pz: continue host = t.host #import pdb; pdb.set_trace() host.photo_z = pz host.save() tz += [host.redshift] mpz += [pz] plt.plot(tz,mpz,'.') plt.savefig('test.png') print('time taken with upload:', datetime.datetime.utcnow() - nowdate) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() print("""Photo-z cron failed with error %s at line number %s"""%(e,exc_tb.tb_lineno))
def test_CasJobs_getPandasDataFrameFromQuery(self): #CasJobs.getPandasDataFrameFromQuery df = CasJobs.getPandasDataFrameFromQuery(queryString=CasJobs_TestQuery, context=CasJobs_TestDatabase) self.assertEqual(df.to_csv(index=False), CasJobs_TestTableCSV)
import umap import os from collections import Counter import seaborn as sns import warnings warnings.simplefilter("ignore") from SciServer import Authentication, CasJobs username = '******' password = getpass.getpass() sciserver_token = Authentication.login(username, password) sciserver_token # https://github.com/sciserver/SciScript-Python/blob/master/Examples/Examples_SciScript-Python.ipynb result = CasJobs.uploadPandasDataFrameToTable(dataFrame=df, tableName=CasJobs_TestTableName2, context="MyDB") sql_n, ii = 20, 0 sql0 = """ select {} objid, class, subclass, u-g as ug, g-r as gr, r-i as ri, i-z as iz, u-r as ur, g-i as gi, r-z as rz, u-i as ui, g-z as gz, u-z as uz, (z-15)/7 as z from ( SELECT p.objid, s.class AS class, s.SUBCLASS AS subclass, p.psfMag_u AS u, p.psfMag_g AS g, p.psfMag_r AS r, p.psfMag_i AS i, p.psfMag_z AS z
def testTurbulence(): token=testTurbulenceToken() print("turbulence token = ",token) schemaName = CasJobs.getSchemaName(token) print("schemaname=",schemaName)
def submit_job(self, querystr, context="DR15"): jobId = CasJobs.submitJob(sql=querystr, context=context) print("Query Job is submitted. JobID={}".format(jobId)) return jobId
password = "******" token = Authentication.login(username, password) user = Authentication.getKeystoneUserWithToken(token) #csv = open("./metadata_table.csv", 'r') #csv_str = csv.read() #csv.close() #success = CasJobs.uploadCSVDataToTable(csv_str, "osc_metadata") #print(success) query1 = "create table osc_metadata (FileName varchar(255), FileSize varchar(255))" CasJobs.executeQuery(sql=query1, format='json') # get file name and size f = argv[1] f_name = os.path.basename(f) f_size = os.path.getsize(f) print(f_name) print(f_size) # upload data to table query2 = "insert into osc_metadata (FileName, FileSize) values (" + "'" + f_name[ 0:2] + "''" + f_name[2:3] + "''" + f_name[3:12] + "''" + f_name[ 12] + "''" + f_name[13:] + "'" + ", " + "'" + str(f_size) + "'" + ")"
import skimage.io import SciServer.CasJobs as CasJobs import SciServer.SciDrive query = """ select top 16 g.objId,g.ra,g.dec,g.petror90_r, dbo.fGetUrlFitsCFrame(F.FIELDiD,'r') as fieldURL from galaxy g , field f where f.run=g.run and f.camcol=g.camcol and f.field=g.field and f.rerun=g.rerun """ # query CasJobs table. Using DR12 as context queryResponse = CasJobs.executeQuery(query, "DR12") # parse results into pandas.DataFrame for further in memory processing gals = pandas.read_csv(queryResponse, index_col=None) gals['objId'] = gals['objId'].astype(np.int64) width = 200 height = 200 pixelsize = 0.396 plt.figure(figsize=(15, 15)) subPlotNum = 1 for index, gal in gals.iterrows(): scale = 2 * gal['petror90_r'] / pixelsize / width url = "http://skyservice.pha.jhu.edu/DR12/ImgCutout/getjpeg.aspx?ra=" + str(gal['ra']) url += "&dec=" + str(gal['dec']) + "&scale=""" + str(scale) + "&width=" + str(width) url += "&height=" + str(height)
def testCasJobsTables(): schema= CasJobs.getSchemaName() print("schema=",schema) tables= CasJobs.getTables("MyDB") print(tables)
# Don't show python warnings warnings.filterwarnings('ignore') print("SciServer library imported") print('Supporting libraries imported') print('Settings applied') # This query finds all objects from galSpecInfo # with extra data pulled from PhotoObjAll. query = """ SELECT g.specObjID, p.type, p.u, p.g, p.r, p.i, p.z, g.spectrotype FROM galSpecInfo AS g JOIN PhotoObjAll as p ON p.specObjID = g.specObjID ORDER BY g.specObjID """ # Then, query the database. The answer is a table that is # being returned as a dataframe. q = CasJobs.executeQuery(query, "dr14") # Check data and drop any row that has NaN. q.dropna() # Check that all types in columns are as expected. q.info() # Write to file. q.to_csv("result.csv") print("SQL query finished.")
def testUploadDataFrame(df,token, tablename): response= CasJobs.uploadPandasDataFrameToTable(df,tablename,token=token)
#Defining databse context and query, and other variables CasJobs_TestDatabase = "MyDB" CasJobs_TestQuery = "select 4 as Column1, 5 as Column2 " CasJobs_TestTableName1 = "MyNewtable1" CasJobs_TestTableName2 = "MyNewtable2" CasJobs_TestTableCSV = u"Column1,Column2\n4,5\n" CasJobs_TestFitsFile = "SciScriptTestFile.fits" CasJobs_TestCSVFile = "SciScriptTestFile.csv" # In[ ]: #get user schema info casJobsId = CasJobs.getSchemaName() print(casJobsId) # In[ ]: #get info about tables inside MyDB database context: tables = CasJobs.getTables(context="MyDB") print(tables) # In[ ]: #execute a quick SQL query:
f.peak,f.integr,f.rms FROM SpecPhoto AS sp JOIN wise_xmatch AS xm ON sp.objid = xm.sdss_objid JOIN FIRST AS f ON sp.objid = f.objid JOIN wise_allsky AS w ON xm.wise_cntr = w.cntr WHERE (sp.psfmag_r-sp.extinction_r > 0) """ #quick synchronous job - probably isn't what you want for any meaningful runs: #queryResponse = CasJobs.executeQuery(query, "dr14", format='pandas') #gals = queryResponse #check tables in myDB tname = 'test' data = CasJobs.getTables('MyDB') if data: print(data) print( 'There are already tables in your database under that name, removing them before doing new query...' ) #CLEAN UP: delete table from myDB SkyQuery.dropTable(tableName=tname, datasetName='myDB') #long asynchronous job print( 'submitting SQL job to SciServer... could take some time depending on query length and SciServer load...' ) jobID = CasJobs.submitJob(query + "into myDB." + tname, "dr14") CasJobs.waitForJob(jobID, verbose=True)
def test_CasJobs_getSchemaName(self): casJobsId = CasJobs.getSchemaName() self.assertNotEqual(casJobsId, "")