from skyhookdmclient import SkyhookDM sk = SkyhookDM() sk.connect('localhost', 'hepdatapool') dst = sk.getDataset('aod') f = dst.getFiles()[0] table = sk.runQuery(f, 'select *, project Events;75.Muon_phi') tables = sk.runQuery( dst, 'select *, project Events;75.Muon_eta,Events;75.Muon_phi,Events;75.Muon_mass' )
from skyhookdmclient import SkyhookDM sk = SkyhookDM() sk.connect('localhost', 'hepdatapool') dst = sk.getDataset('mm') f = dst.getFiles()[0] table = sk.runQuery( f, 'select *, project Events;1.Muon_dzErr,Events;1.SV_x,Events;1.Jet_puId,Events;1.HLT_AK8PFHT900_TrimMass50,Events;1.FatJet_n3b1' ) tables = sk.runQuery( dst, 'select *, project Events;1.Muon_dzErr,Events;1.SV_x,Events;1.Jet_puId,Events;1.HLT_AK8PFHT900_TrimMass50,Events;1.FatJet_n3b1' )
# This is an example which simulates the use case here: https://github.com/CoffeaTeam/coffea/blob/master/binder/muonspectrum_v1.ipynb import time import uproot import uproot_methods import awkward from skyhookdmclient import SkyhookDM import numpy from coffea import hist sk = SkyhookDM() sk.connect('192.170.236.173', 'hepdatapool') dst = sk.getDataset('nanoexample') tstart = time.time() masshist = hist.Hist("Counts", hist.Bin("mass", r"$m_{\mu\mu}$ [GeV]", 30000, 0.25, 300)) tables = sk.runQuery( dst, 'select *, project Events;1.nMuon,Events;1.Muon_pt,Events;1.Muon_eta,Events;1.Muon_phi,Events;1.Muon_mass,Events;1.Muon_charge' ) table = tables[0] table.set_entrysteps(2) for chunk in table: p4 = uproot_methods.TLorentzVectorArray.from_ptetaphim( chunk.pop('MUON_PT'), chunk.pop('MUON_ETA'), chunk.pop('MUON_PHI'),
# suppose from skyhookdmclient import SkyhookDM sk = SkyhookDM() sk.connect('ipaddr', 'hepdatapool') dst = sk.getDataset('nanodst') tables = sk.runQuery( dst, 'select *, project Events;1.nMuon, Events;1.Muon_pt, Events;1.Muon_eta, Events;1.Muon_mass, Events;1.Muon_charge' ) dataframe = tables[0].to_pandas() print(dataframe)
from skyhookdmclient import SkyhookDM sk = SkyhookDM() sk.connect('localhost', 'hepdatapool') urls = ['http://opendata.cern.ch/record/12352/files/VBF_HToTauTau.root'] sk.writeDataset(urls, 'nanodst') dst = sk.getDataset('nanodst') files = dst.getFiles() file = files[0] rootnode = file.getRoot() trees = rootnode.getChildren() tree = trees[0] branches = tree.getChildren() table = sk.runQuery( file, 'select *, project Events;1.nMuon, Events;1.Muon_pt, Events;1.Muon_eta, Events;1.Muon_mass, Events;1.Muon_charge' ) dataframe = table.to_pandas() print(dataframe)
from skyhookdmclient import SkyhookDM sk = SkyhookDM() #please change the ip address of ip_address to the correct ip_address of Skyhook_Driver before run this example. sk.connect('ip_address','hepdatapool') # write data urls = ['https://github.com/uccross/skyhookdm-pythonclient/raw/master/client/skyhookdmclient/rsc/nano_aod.root'] sk.writeDataset(urls,'nanoexample') dst = sk.getDataset('nanoexample') # read metadata files = dst.getFiles() file = files[0] schema = file.getSchema() rt = file.getRoot() children = rt.getChildren() file.getAttributes() # read data table = sk.runQuery(file,'select *, project Events;1.Muon_dzErr,Events;1.SV_x,Events;1.Jet_puId,Events;1.HLT_AK8PFHT900_TrimMass50,Events;1.FatJet_n3b1') tables = sk.runQuery(dst,'select *, project Events;1.Muon_dzErr,Events;1.SV_x,Events;1.Jet_puId,Events;1.HLT_AK8PFHT900_TrimMass50,Events;1.FatJet_n3b1') print(table) print(tables)
# Import the SkyhookDM library from skyhookdmclient import SkyhookDM # Create a SkyhookDM() object sk = SkyhookDM() # Connect to the Skyhook Driver given the ip_address. sk.connect('ip_address', 'hepdatapool') # Write the dataset to Ceph. As the following data is already loaded into ceph. I commented them out for now. # urls = ['./Run2012B_DoubleMuParked.root','./Run2012C_DoubleMuParked.root'] # sk.writeDataset(urls,'demodst') # Get the dataset dst = sk.getDataset('demodst') # Run queries which return a number of tables according to the number of the files included in the dataset. # tables = sk.runQuery(dst,'select *, project Events;1.nMuon, Events;1.Muon_pt, Events;1.Muon_eta, Events;1.Muon_mass, Events;1.Muon_charge') tables = sk.runQuery(dst, 'select *, project Events;1.nMuon, Events;1.Muon_pt') # Convert the second table to pandas dataframe. It may take a few secs. dataframe = tables[1].to_pandas() # Print the data frame. print(dataframe)
import pyarrow as pa import pandas as pd # Setup SkyhookDM from skyhookdmclient import SkyhookDM # Create a new SkyhookDM object sk = SkyhookDM() # Connect to the skyhook driver given the IP of the skyhook driver. sk.connect('ip_address', 'hepdatapool') # Create a panda dataframe with one column named 'a' df = pd.DataFrame({"a": [1, 2, 3]}) # Convert the panda dataframe to arrow table. table = pa.Table.from_pandas(df) # Write the arrow table to the Ceph cluster. # This function is under development and it should accept more arguments such as metadata of the table. sk.writeArrowTable(table, 'tname') # Query functions of SkyhookDM are not compatible with the data written by using the writeArrowTable() function for now.