def test_CasJobs_submitJob(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery + " into MyDB." + CasJobs_TestTableName1, context=CasJobs_TestDatabase) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) df = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName1, context="MyDB", format="csv") self.assertNotEqual(jobId, "")
def testCasJobsSubmit(token): tbl="mriiscplanck1_"+str(randint(0,1000000)) sql="""select top 10 galaxyId,snapnum,stellarmass into """+tbl+""" from MRIIscPlanck1""" jobId= CasJobs.submitJob(sql,context="Henriques2015a",token=token) print("jobId=",jobId) jobDesc = CasJobs.waitForJob(jobId) print( jobDesc)
def createQueries(username, password, amount, table_name, context): CasJobs_Query = "SELECT top " + str( amount ) + " statement into [myscratch:default]." + table_name + " FROM sdssweblogs.SqlLog WHERE CHARINDEX('FROM PhotoObjAll', statement) > 0 AND CHARINDEX('p.dec BETWEEN', statement) > 0 AND CHARINDEX('p.ra BETWEEN', statement) > 0 AND access='Skyserver.Search.SQL'" login(username, password) jobId = CasJobs.submitJob(sql=CasJobs_Query, context=context) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) print( "Data has been saved on sciserver at context myscratch with tablename " + table_name)
def createData(username, password, dec_low, dec_high, ra_low, ra_high, table_name, context, all_data): CasJobs_Database = "DR16" CasJobs_Query = "Select ra,dec into [myscratch:default]." + table_name + " from dr16.photoobjall " if (not all_data): CasJobs_Query += "where dec between " + str(dec_low) + " and " + str( dec_high) + " and ra between " + str(ra_low) + " and " + str( ra_high) login(username, password) jobId = CasJobs.submitJob(sql=CasJobs_Query, context=context) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) print( "Data has been saved on sciserver at context myscratch with tablename " + table_name)
def download_query(self, query, table_name, save_to='data', context='MyDB'): """Perform a query based on the context given""" if context != 'MyDB': jobid = cj.submitJob(query, context=context) cj.waitForJob(jobid, verbose=True) job_status = cj.getJobStatus(jobid) if job_status['Success'] != 5: raise CasJobsError( 'Error Performing the query, {}'.format(job_status)) # Now the results are safely saved in myDB # You can go ahead and download by using a session self.download_csv(table_name) self._download_from_scidrive(table_name, save_to)
#check tables in myDB tname = 'test' data = CasJobs.getTables('MyDB') if data: print(data) print( 'There are already tables in your database under that name, removing them before doing new query...' ) #CLEAN UP: delete table from myDB SkyQuery.dropTable(tableName=tname, datasetName='myDB') #long asynchronous job print( 'submitting SQL job to SciServer... could take some time depending on query length and SciServer load...' ) jobID = CasJobs.submitJob(query + "into myDB." + tname, "dr14") CasJobs.waitForJob(jobID, verbose=True) try: #Download table from MYDB into Pandas dataframe. additional parms: , top=10 print('Attempting to download table...') data_table = SkyQuery.getTable(tableName=tname, datasetName='MyDB') print('Done! Table shape is: ' + str(data_table.shape)) #save df to disk in case of super long queries: filename = 'test_query_table_1000' print('Saving tables to disk as: ' + filename) save_obj(data_table, filename) except: print( 'ERROR, No data found in your SciServer database, query may not have completed, check your SQL syntax?' )
def test_CasJobs_waitForJob(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) self.assertGreaterEqual(jobDescription["Status"], 3)
def test_CasJobs_cancelJob(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase) isCanceled = CasJobs.cancelJob(jobId=jobId) self.assertEqual(isCanceled, True)
def test_CasJobs_getJobStatus(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase) jobDescription = CasJobs.getJobStatus(jobId) self.assertEqual(jobDescription["JobID"], jobId)
# In[ ]: #execute a quick SQL query: df = CasJobs.executeQuery(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase, format="pandas") print(df) # In[ ]: #submit a job, which inserts the query results into a table in the MyDB database context. #Wait until the job is done and get its status. jobId = CasJobs.submitJob(sql=CasJobs_TestQuery + " into MyDB." + CasJobs_TestTableName1, context="MyDB") jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=False) print(jobId) print(jobDescription) # In[ ]: # drop or delete table in MyDB database context df = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName1, context="MyDB", format="pandas") print(df) # In[ ]:
# In[ ]: bigtablename = 'hugetable' # Example of a longer query: get magnitudes and sizes (Petrosian radii) of one million galaxies verylongquery = 'select top 10 objid, ra, dec \n' verylongquery += 'u, g, r, i, z, err_u, err_g, err_r, err_i, err_z, petror90_r \n' verylongquery += 'into mydb.' + bigtablename + '\n' verylongquery += 'from galaxy\n' verylongquery += 'where clean = 1' print('Submitting query:\n', verylongquery) print('\n') thisjobid = CasJobs.submitJob(sql=verylongquery, context=this_context) print('Job submitted with jobId = ', thisjobid) print('\n') waited = CasJobs.waitForJob( jobId=thisjobid) # waited is a dummy variable; just print wait msg jobDescription = CasJobs.getJobStatus(thisjobid) print('\n') print('Information about the job:') #pprint(jobDescription) jobDescriber(jobDescription) # ## Thank you!
order by 1''' sql_drop = """IF OBJECT_ID('spectrain') IS NOT NULL DROP TABLE spectrain""" CasJobs.executeQuery(sql=sql_drop, context='MYDB', format="pandas") sql_get = """ select * from MYDB.NIPSspecphoto order by objid {} OFFSET {} ROWS FETCH NEXT 1407000 ROWS ONLY;""".format('', 1000000 * ii) # ii+=1 df = CasJobs.executeQuery(sql='SELECT * FROM N15', context='MYDB', format="pandas") jobid = CasJobs.submitJob(sql=sql_spec, context='DR13') CasJobs.waitForJob(jobid) df = CasJobs.executeQuery(sql=sql_spec, context='DR13', format="pandas") df = CasJobs.executeQuery(sql='SELECT * FROM MYDB.spectrain', context='MYDB', format="pandas") df.shape df.to_csv('../data/sdss_stars/DR13/544k_spec_objid.csv', index=False) ''' select class, subclass, u-g as ug, g-r as gr, r-i as ri, i-z as iz, u-r as ur, g-i as gi, r-z as rz, u-i as ui, g-z as gz, u-z as uz, (u-15)/7 as u, (g-15)/7 as g, (i-15)/7 as i, (z-15)/7 as z, (r-15)/5 as r, u-0.5*ext_u as u05, g-0.5*ext_g as g05, r-0.5*ext_r as r05, i-0.5*ext_i as i05, z-0.5*ext_z as z05,
def submit_job(self, querystr, context="DR15"): jobId = CasJobs.submitJob(sql=querystr, context=context) print("Query Job is submitted. JobID={}".format(jobId)) return jobId
def do(self, user='******', password='******', search=1, path_to_model='YSE_DNN_photoZ_model_315.hdf5', debug=True): """ Predicts photometric redshifts from RA and DEC points in SDSS An outline of the algorithem is: first pull from SDSS u,g,r,i,z magnitudes from SDSS; should be able to handle a list/array of RA and DEC place u,g,r,i,z into a vector, append the derived information into the data array predict the information from the model return the predictions in the same order to the user inputs: Ra: list or array of len N, right ascensions of target galaxies in decimal degrees Dec: list or array of len N, declination of target galaxies in decimal degrees search: float, arcmin tolerance to search for the object in SDSS Catalogue path_to_model: str, filepath to saved model for prediction Returns: predictions: array of len N, photometric redshift of input galaxy """ try: nowdate = datetime.datetime.utcnow() - datetime.timedelta(1) from django.db.models import Q #HAS To Remain Here, I dunno why print('Entered the photo_z cron') #save time b/c the other cron jobs print a time for completion transients = Transient.objects.filter( Q(host__isnull=False) & Q(host__photo_z__isnull=True)) my_index = np.array( range(0, len(transients)) ) #dummy index used in DF, then used to create a mapping from matched galaxies back to these hosts transient_dictionary = dict(zip(my_index, transients)) RA = [] DEC = [] for i, T in enumerate(transients): #get rid of fake entries if T.host.ra and T.host.dec: RA.append(T.host.ra) DEC.append(T.host.dec) else: transient_dictionary.pop(i) DF_pre = pd.DataFrame() DF_pre['myindex'] = list(transient_dictionary.keys()) DF_pre['RA'] = RA DF_pre['DEC'] = DEC SciServer.Authentication.login(user, password) try: SciServer.SkyQuery.dropTable('SDSSTable1', datasetName='MyDB') SciServer.SkyQuery.dropTable('SDSSTable2', datasetName='MyDB') except Exception as e: print('tables are not in CAS MyDB, continuing') print( 'if system says table is already in DB, contact Andrew Engel' ) SciServer.CasJobs.uploadPandasDataFrameToTable(DF_pre, 'SDSSTable1', context='MyDB') myquery = 'SELECT s.myindex, p.z, p.zErr, p.nnAvgZ ' myquery += 'INTO MyDB.SDSSTable2 ' myquery += 'FROM MyDB.SDSSTable1 s ' myquery += 'CROSS APPLY dbo.fGetNearestObjEq(s.RA,s.DEC,1.0/60.0) AS nb ' myquery += 'INNER JOIN Photoz p ON p.objID = nb.objID' jobID = CasJobs.submitJob(sql=myquery, context="DR16") waited = SciServer.CasJobs.waitForJob(jobId=jobID) jobDescription = CasJobs.getJobStatus(jobID) jobDescriber(jobDescription) PhotoDF = SciServer.SkyQuery.getTable('SDSSTable2', datasetName='MyDB', top=10) #change this PhotoDF.drop_duplicates( subset='#myindex', inplace=True) #neccessary to patch some bugs PhotoDF.dropna(inplace=True) whats_left = PhotoDF['#myindex'].values point_estimates = PhotoDF['z'].values error = PhotoDF['zErr'].values other_z = PhotoDF['nnAvgZ'] point_estimates[error < -9998] = other_z[ error < -9998] #if there is a bad error, then authors write this is more effective for i, value in enumerate(whats_left): T = transient_dictionary[value] T.host.photo_z = point_estimates[i] T.host.photo_z_err = error[i] #T.host.photo_z_posterior = posterior[i] #Gautham suggested we add it to the host model T.host.photo_z_source = 'SDSS' T.host.save( ) #takes a long time and then my query needs to be reset which is also a long time print('time taken with upload:', datetime.datetime.utcnow() - nowdate) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() print("""Photo-z cron failed with error %s at line number %s""" % (e, exc_tb.tb_lineno)) #html_msg = """Photo-z cron failed with error %s at line number %s"""%(e,exc_tb.tb_lineno) #sendemail(from_addr, user.email, subject, html_msg, # djangoSettings.SMTP_LOGIN, djangoSettings.SMTP_PASSWORD, smtpserver) #IDK = YSE() #why is this suddenly neccessary??? #IDK.do()