Ejemplo n.º 1
0
from skyhookdmclient import SkyhookDM

sk = SkyhookDM()
sk.connect('localhost', 'hepdatapool')
dst = sk.getDataset('aod')
f = dst.getFiles()[0]
table = sk.runQuery(f, 'select *, project Events;75.Muon_phi')
tables = sk.runQuery(
    dst,
    'select *, project Events;75.Muon_eta,Events;75.Muon_phi,Events;75.Muon_mass'
)
Ejemplo n.º 2
0
from skyhookdmclient import SkyhookDM
sk = SkyhookDM()
sk.connect('localhost', 'hepdatapool')
dst = sk.getDataset('mm')
f = dst.getFiles()[0]
table = sk.runQuery(
    f,
    'select *, project Events;1.Muon_dzErr,Events;1.SV_x,Events;1.Jet_puId,Events;1.HLT_AK8PFHT900_TrimMass50,Events;1.FatJet_n3b1'
)
tables = sk.runQuery(
    dst,
    'select *, project Events;1.Muon_dzErr,Events;1.SV_x,Events;1.Jet_puId,Events;1.HLT_AK8PFHT900_TrimMass50,Events;1.FatJet_n3b1'
)
Ejemplo n.º 3
0
# This is an example which simulates the use case here: https://github.com/CoffeaTeam/coffea/blob/master/binder/muonspectrum_v1.ipynb

import time
import uproot
import uproot_methods
import awkward
from skyhookdmclient import SkyhookDM
import numpy
from coffea import hist

sk = SkyhookDM()
sk.connect('192.170.236.173', 'hepdatapool')
dst = sk.getDataset('nanoexample')

tstart = time.time()
masshist = hist.Hist("Counts",
                     hist.Bin("mass", r"$m_{\mu\mu}$ [GeV]", 30000, 0.25, 300))

tables = sk.runQuery(
    dst,
    'select *, project Events;1.nMuon,Events;1.Muon_pt,Events;1.Muon_eta,Events;1.Muon_phi,Events;1.Muon_mass,Events;1.Muon_charge'
)
table = tables[0]

table.set_entrysteps(2)

for chunk in table:
    p4 = uproot_methods.TLorentzVectorArray.from_ptetaphim(
        chunk.pop('MUON_PT'),
        chunk.pop('MUON_ETA'),
        chunk.pop('MUON_PHI'),
Ejemplo n.º 4
0
# suppose
from skyhookdmclient import SkyhookDM
sk = SkyhookDM()
sk.connect('ipaddr', 'hepdatapool')
dst = sk.getDataset('nanodst')
tables = sk.runQuery(
    dst,
    'select *, project Events;1.nMuon, Events;1.Muon_pt, Events;1.Muon_eta, Events;1.Muon_mass, Events;1.Muon_charge'
)
dataframe = tables[0].to_pandas()
print(dataframe)
Ejemplo n.º 5
0
from skyhookdmclient import SkyhookDM
sk = SkyhookDM()
sk.connect('localhost', 'hepdatapool')
urls = ['http://opendata.cern.ch/record/12352/files/VBF_HToTauTau.root']
sk.writeDataset(urls, 'nanodst')
dst = sk.getDataset('nanodst')
files = dst.getFiles()
file = files[0]
rootnode = file.getRoot()
trees = rootnode.getChildren()
tree = trees[0]
branches = tree.getChildren()
table = sk.runQuery(
    file,
    'select *, project Events;1.nMuon, Events;1.Muon_pt, Events;1.Muon_eta, Events;1.Muon_mass, Events;1.Muon_charge'
)
dataframe = table.to_pandas()
print(dataframe)
Ejemplo n.º 6
0
from skyhookdmclient import SkyhookDM
sk = SkyhookDM()

#please change the ip address of ip_address to the correct ip_address of Skyhook_Driver before run this example.
sk.connect('ip_address','hepdatapool')

# write data
urls = ['https://github.com/uccross/skyhookdm-pythonclient/raw/master/client/skyhookdmclient/rsc/nano_aod.root']
sk.writeDataset(urls,'nanoexample')

dst = sk.getDataset('nanoexample')

# read metadata
files = dst.getFiles()
file = files[0]
schema = file.getSchema()
rt = file.getRoot()
children = rt.getChildren()
file.getAttributes()

# read data
table = sk.runQuery(file,'select *, project Events;1.Muon_dzErr,Events;1.SV_x,Events;1.Jet_puId,Events;1.HLT_AK8PFHT900_TrimMass50,Events;1.FatJet_n3b1')
tables = sk.runQuery(dst,'select *, project Events;1.Muon_dzErr,Events;1.SV_x,Events;1.Jet_puId,Events;1.HLT_AK8PFHT900_TrimMass50,Events;1.FatJet_n3b1')

print(table)
print(tables)
Ejemplo n.º 7
0
# Import the SkyhookDM library
from skyhookdmclient import SkyhookDM

# Create a SkyhookDM() object
sk = SkyhookDM()

# Connect to the Skyhook Driver given the ip_address.
sk.connect('ip_address', 'hepdatapool')

# Write the dataset to Ceph. As the following data is already loaded into ceph. I commented them out for now.
# urls = ['./Run2012B_DoubleMuParked.root','./Run2012C_DoubleMuParked.root']
# sk.writeDataset(urls,'demodst')

# Get the dataset
dst = sk.getDataset('demodst')

# Run queries which return a number of tables according to the number of the files included in the dataset.
# tables = sk.runQuery(dst,'select *, project Events;1.nMuon, Events;1.Muon_pt, Events;1.Muon_eta, Events;1.Muon_mass, Events;1.Muon_charge')
tables = sk.runQuery(dst, 'select *, project Events;1.nMuon, Events;1.Muon_pt')

# Convert the second table to pandas dataframe. It may take a few secs.
dataframe = tables[1].to_pandas()

# Print the data frame.
print(dataframe)
Ejemplo n.º 8
0
import pyarrow as pa
import pandas as pd

# Setup SkyhookDM
from skyhookdmclient import SkyhookDM

# Create a new SkyhookDM object
sk = SkyhookDM()

# Connect to the skyhook driver given the IP of the skyhook driver.
sk.connect('ip_address', 'hepdatapool')

# Create a panda dataframe with one column named 'a'
df = pd.DataFrame({"a": [1, 2, 3]})

# Convert the panda dataframe to arrow table.
table = pa.Table.from_pandas(df)

# Write the arrow table to the Ceph cluster.
# This function is under development and it should accept more arguments such as metadata of the table.
sk.writeArrowTable(table, 'tname')

# Query functions of SkyhookDM are not compatible with the data written by using the writeArrowTable() function for now.