Beispiel #1
0
    def convert(
        self,
        data,
        sort_field="panda_queue",
        should_be_sorted_by="panda_queue",
        *args,
        **kwargs
    ):
        """Convert the AGIS data to the desired format of being ordered by Panda queues

        :param data: data to be converted in the desired format"""

        json_data = {}

        if isinstance(data, dict):
            for key, d in data.items():
                if sort_field in d:
                    json_data[d[sort_field]] = d
        elif isinstance(data, list):
            for d in data:
                if sort_field in d:
                    json_data[d[sort_field]] = d
        else:
            logger.error("Data is not type dict or list but: {}".format(type(data)))

        return json_data
Beispiel #2
0
def getRegionFromExpression(expr):
    #this is where you implement custom mappings to region names
    if args.analysis == '1Lbb':
        if expr == 'SRLM' or expr == 'SRMM' or expr == 'SRHM':
            return 0
        if expr == 'SRLMincl' or expr == 'SRMMincl' or expr == 'SRHMincl':
            return expr
        if 'SRLM' in expr:
            return 'SRLM'
        if 'SRMM' in expr:
            return 'SRMM'
        if 'SRHM' in expr:
            return 'SRHM'
    elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'):
        if args.background == 'zjets':
            #return without e.g. '_bin0' at the end
            return re.sub('_bin\d*', '', expr)
        else:
            return expr
    for region in args.regions:
        if expr == region:
            return expr
    else:
        logger.error('Region not found: {}'.format(expr))
        return 0
Beispiel #3
0
    def convert(self,
                data,
                append_mode=False,
                sort_field="panda_queue",
                should_be_ordered_by="panda_queue",
                *args,
                **kwargs):
        """Convert the REBUS data to the desired format of being ordered by Panda queues

        :param data: data to be converted in the desired format"""

        json_data = RebusDict()

        if isinstance(data, dict):
            for key, d in data.items():
                if key == "NULL":
                    # CRIC has this huge NULL entry?!
                    continue
                if isinstance(d.get(sort_field, []), list):
                    for site in d.get(sort_field, []):
                        logger.debug("Adding {}".format(site))
                        logger.debug(d)
                        json_data.update(object={site: d},
                                         append_mode=append_mode)
                elif isinstance(d.get(sort_field, {}), collections.Hashable):
                    logger.debug("Adding {}".format(d.get(sort_field, {})))
                    json_data.update(object={d[sort_field]: d},
                                     append_mode=append_mode)
        else:
            logger.error("Data is not type dict or list but: {}".format(
                type(data)))

        return json_data
Beispiel #4
0
def get_xsec(xsec_file, my_dsid=None):
    with open(xsec_file) as f:
        f.readline()
        for l in f:
            # DSID is the first field
            if int(my_dsid) == int(l.rstrip().split()[0]):
                xsec = float(l.rstrip().split()[2])
                filter_eff = float(l.rstrip().split()[3])
                kfactor = float(l.rstrip().split()[4])

                logger.info("Got xsec: {}".format(xsec))
                return xsec * filter_eff * kfactor
        else:
            logger.error("Didn't find a xsec ... sorry!")
            return None
Beispiel #5
0
        def getxsec(filename):
            if "oneStep" in filename:
                mass_string = filename.replace('GG_oneStep_',
                                               '').replace('.root',
                                                           '').split('_')[0]
            elif "Wh_hbb" in filename:
                mass_string = filename.replace('C1N2_Wh_hbb_',
                                               '').replace('.root',
                                                           '').split('_')[0]
            else:
                logger.error("Cannot figure out signal model.")
                raise ValueError("Sorry, need to exit here.")

            mass_string = mass_string.replace("p0", "").replace("p5", ".5")

            if not dict.has_key(mass_string):
                lowerMass = int(float(mass_string)) // 5 * 5
                upperMass = lowerMass + 5

                xsec = (dict[str(lowerMass)] + dict[str(upperMass)]) / 2.0
                return xsec
            else:
                return dict[mass_string]
Beispiel #6
0
        return json.load(f)


panda_queues = getJSON("data/scraped_cric_pandaqueue.json")
panda_resources = getJSON("data/map_PQ_names.json")
site_resources = getJSON("data/scraped_cric_sites.json")
ddm_resources = getJSON("data/scraped_cric_ddm.json")
pledges_resources = getJSON("data/scraped_rebus_pledges.json")
federations_resources = getJSON("data/scraped_rebus_federations.json")
benchmarks_resources = getJSON("data/scraped_elasticsearch_benchmark.json")

# get the actual job numbers from panda
err, siteResourceStats = Client.get_job_statistics_per_site_label_resource(10)

if err:
    logger.error("Panda error: " + str(err))
    msg = "Panda server returned and error.\n\nError:\n" + str(e)
    subj = "[QMonit error] PandaServer"
    notifications.send_email(
        message=msg,
        subject=subj,
        **{"password": config.get("credentials_adcmon", "password")}
    )


# idb client instance for uploading data later on
db_name = "monit_jobs" if not args.testDB else "test_monit_jobs"

try:
    client = InfluxDBClient(
        "dbod-eschanet.cern.ch", 8080, username, password, db_name, True, False
Beispiel #7
0
def run():

    # Each time the scrapers are run, we update the PQ map
    pqs = pq_map.PQ_names_map(file="data/map_PQ_names.json")
    if not pqs.update(
        ifile="data/scraped_cric_pandaqueue.json",
        ofile="data/map_PQ_names.json",
        key="panda_resource",
    ):
        logger.warning("PQ map is not available")

    if argparse.interval == "10m":
        # Now run all the scrapers that should run in 10min intervals
        # First the PQ CRIC information
        cric = CRIC()
        raw_data = cric.download(
            url="https://atlas-cric.cern.ch/api/atlas/pandaqueue/query/?json"
        )
        json_data = cric.convert(data=raw_data, sort_field="panda_resource")
        if cric.save(file="data/scraped_cric_pandaqueue.json", data=json_data):
            logger.info("Scraped PQ CRIC")
        else:
            logger.error("Problem scraping PQ CRIC")

    elif argparse.interval == "1h":
        # Run all the scrapers that only need to be run once per hour (because they don't change too often)

        # Next the ATLAS sites CRIC information
        cric = CRIC()
        raw_data = cric.download(
            url="https://atlas-cric.cern.ch/api/atlas/site/query/?json"
        )
        json_data = cric.convert(data=raw_data, sort_field="name")
        if cric.save(file="data/scraped_cric_sites.json", data=json_data):
            logger.info("Scraped sites CRIC")
        else:
            logger.error("Problem scraping sites CRIC")

        # Now the DDM info from CRIC
        raw_data = cric.download(
            url="https://atlas-cric.cern.ch/api/atlas/ddmendpoint/query/?json"
        )
        json_data = cric.convert(data=raw_data, sort_field="site")
        if cric.save(file="data/scraped_cric_ddm.json", data=json_data):
            logger.info("Scraped DDM CRIC")
        else:
            logger.error("Problem scraping DDM CRIC")

        # Next up is REBUS, start with the actual federation map
        rebus = REBUS()
        raw_data = rebus.download(
            url="https://wlcg-cric.cern.ch/api/core/federation/query/?json"
        )
        json_data = rebus.convert(data=raw_data, sort_field="rcsites")
        if rebus.save(file="data/scraped_rebus_federations.json", data=json_data):
            logger.info("Scraped federations CRIC")
        else:
            logger.error("Problem scraping federations CRIC")

        # then the pledges
        # can actually use same JSON raw data as before
        json_data = rebus.convert(
            data=raw_data, sort_field="accounting_name", append_mode=True
        )
        if rebus.save(file="data/scraped_rebus_pledges.json", data=json_data):
            logger.info("Scraped pledges CRIC")
        else:
            logger.error("Problem scraping pledges CRIC")

        # we also get datadisk information from monit Grafana
        url = config.get("credentials_monit_grafana", "url")
        token = config.get("credentials_monit_grafana", "token")

        now = int(round(time.time() * 1000))
        date_to = now - 12 * 60 * 60 * 1000
        date_from = date_to - 24 * 60 * 60 * 1000

        period = """"gte":{0},"lte":{1}""".format(date_from, date_to)

        data = (
            """{"search_type":"query_then_fetch","ignore_unavailable":true,"index":["monit_prod_rucioacc_enr_site*"]}\n{"size":0,"query":{"bool":{"filter":[{"range":{"metadata.timestamp":{"""
            + period
            + ""","format":"epoch_millis"}}},{"query_string":{"analyze_wildcard":true,"query":"data.account:* AND data.campaign:* AND data.country:* AND data.cloud:* AND data.datatype:* AND data.datatype_grouped:* AND data.prod_step:* AND data.provenance:* AND data.rse:* AND data.scope:* AND data.experiment_site:* AND data.stream_name:* AND data.tier:* AND data.token:(\\\"ATLASDATADISK\\\" OR \\\"ATLASSCRATCHDISK\\\") AND data.tombstone:(\\\"primary\\\" OR \\\"secondary\\\") AND NOT(data.tombstone:UNKNOWN) AND data.rse:/.*().*/ AND NOT data.rse:/.*(none).*/"}}]}},"aggs":{"4":{"terms":{"field":"data.rse","size":500,"order":{"_term":"desc"},"min_doc_count":1},"aggs":{"1":{"sum":{"field":"data.files"}},"3":{"sum":{"field":"data.bytes"}}}}}}\n"""
        )

        headers = {
            "Accept": "application/json",
            "Content-Type": "application/json",
            "Authorization": "Bearer %s" % token,
        }

        grafana = Grafana(url=url, request=data, headers=headers)
        raw_data = grafana.download()
        pprint.pprint(raw_data)
        json_data = grafana.convert(data=raw_data.json())
        if grafana.save(file="data/scraped_grafana_datadisk.json", data=json_data):
            logger.info("Scraped datadisks from monit grafana")
        else:
            logger.error("Problem scraping datadisks from monit grafana")

        # TODO: not running ES scraper for now since the benchmark jobs are no longer being run
        # #get credentials
        # password = config.get("credentials_elasticsearch", "password")
        # username = config.get("credentials_elasticsearch", "username")
        # host = config.get("credentials_elasticsearch", "host")
        # arg = ([{'host': host, 'port': 9200}])
        # elasticsearch = ElasticSearch(arg,**{'http_auth':(username, password)})
        # kwargs = {
        #     'index' : "benchmarks-*",
        #     'body' : {
        #         "size" : 10000,"query" : {"match_all" : {},},
        #         "collapse": {"field": "metadata.PanDAQueue","inner_hits": {"name": "most_recent","size": 50,"sort": [{"timestamp": "desc"}]}
        #         }
        #     },
        #     'filter_path' : [""]
        # }
        # raw_data = elasticsearch.download(**kwargs)
        # json_data = elasticsearch.convert(data=raw_data)
        #
        # if elasticsearch.save(file='data/scraped_elasticsearch_benchmark.json', data=json_data):
        #     logger.info('Scraped benchmark results from ES')
        # else:
        #     logger.error('Problem scraping benchmark results from ES')

    else:
        # Nothing to do otherwise
        print("Dropping out")
Beispiel #8
0
        #         "collapse": {"field": "metadata.PanDAQueue","inner_hits": {"name": "most_recent","size": 50,"sort": [{"timestamp": "desc"}]}
        #         }
        #     },
        #     'filter_path' : [""]
        # }
        # raw_data = elasticsearch.download(**kwargs)
        # json_data = elasticsearch.convert(data=raw_data)
        #
        # if elasticsearch.save(file='data/scraped_elasticsearch_benchmark.json', data=json_data):
        #     logger.info('Scraped benchmark results from ES')
        # else:
        #     logger.error('Problem scraping benchmark results from ES')

    else:
        # Nothing to do otherwise
        print("Dropping out")


if __name__ == "__main__":
    try:
        run()
    except Exception, e:
        logger.error("Got error while running scrapers. " + str(e))
        msg = "QMonit failed to run a scraper job.\n\nError:\n" + str(e)
        subj = "[QMonit error] InfluxDB"
        notifications.send_email(
            message=msg,
            subject=subj,
            **{"password": config.get("credentials_adcmon", "password")}
        )
Beispiel #9
0
def main():

    for f in args.files:
        logger.info('Got file: {}'.format(os.path.basename(f.name)))
        if not os.path.basename(f.name)[-4:] == ".tex":
            logger.error(
                'This is not a tex file. Do not try to fool me again! Skipping...'
            )
            continue

        #check if we can get a background matched!
        if args.background.lower() in os.path.basename(f.name).lower():
            logger.info('Found process: {}'.format(args.background))
        else:
            logger.error('No process found! Dropping out.')
            sys.exit()

        #now check if we can get the systematic variation name matched
        sys_matches = [
            s for s in args.systematics
            if s.lower() in os.path.basename(f.name).lower()
        ]
        if len(sys_matches) > 1:
            logger.warning(
                'Found more than one systematic variation matching filename: {}'
                .format(sys_matches))
            logger.warning('Will only take first one.')
        elif len(sys_matches) == 1:
            logger.info('Found systematic variation: {}'.format(
                sys_matches[0]))
        elif len(sys_matches) == 0:
            logger.error('No systematic variation found! Dropping out.')
            sys.exit()
        systematic = sys_matches[0]

        ##let's check if we are using an up or a down variation (or symmetric...)
        is_up = False
        is_down = False
        if "up" in os.path.basename(f.name).lower():
            is_up = True
            logger.info('This should be an UP variation.')
        elif "down" in os.path.basename(f.name).lower():
            is_down = True
            logger.info('This should be a DOWN variation.')
        else:
            logger.warning(
                'Probably neither up nor down, but a symmetrised table. Sure?')
        ##now comes the ugly parsing part
        ##can we do this at least not too ugly?

        lines = []
        #first, get the relevant part from the tex file. If the user has made it easy and tagged the respective parts with %tex2hf, we can simply use what's between it
        keywords = False
        with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as s:
            if s.find(b'tex2hf') != -1:
                logger.info(
                    'Found keywords in file, so now we can just use what is between them'
                )
                keywords = True
        if keywords == True:
            copy = False
            for line in f:
                if "tex2hf" in line.strip():
                    copy = not copy
                    continue
                elif copy:
                    lines.append(line.strip())
        else:
            #otherwise just drop out, I don't want to think about this any further ...
            logger.error(
                'You need to provide keywords. I am too lazy to think about something else. Put "tex2hf" before the first and after the last line (as a comment of course, you do not want this to show up in the table, do you?).'
            )
            sys.exit()

        for line in lines:
            #get rid of any symbols we don't need
            line = line.strip().replace("$", "").replace("\\", "")
            #latex columns, get the region first. Need to strip all whitespace
            region = "".join(line.split("&")[0].split())
            region = getRegionFromExpression(region)
            if region == 0:
                continue
            #then the uncertainty, usually in the last column

            # print("{} : {}".format(region, line.split("&")[-1]))
            uncertainty = round(
                float(
                    line.split("&")[-1].replace("pm", "").replace(
                        "%", "").strip()) / 100, 4)
            # print(uncertainty)
            if is_up:
                if uncertainty < -1.0:
                    uncertainty = -1.0
                    logger.warning(
                        'Uncertainty larger than 100%. Truncating to 1.-1.')
                values[systematic][region]["up"].append(uncertainty)
            elif is_down:
                if uncertainty < -1.0:
                    uncertainty = -1.0
                    logger.warning(
                        'Uncertainty larger than 100%. Truncating to 1.-1.')
                values[systematic][region]["down"].append(uncertainty)
            else:
                up_unc = abs(uncertainty)
                down_unc = -up_unc
                if abs(uncertainty) > 1.0:
                    logger.warning(
                        'Uncertainty larger than 100%. Truncating to 1.-1.')
                    down_unc = -1
                values[systematic][region]["up"].append(up_unc)
                values[systematic][region]["down"].append(down_unc)
Beispiel #10
0
    elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'):
        if args.background == 'zjets':
            #return without e.g. '_bin0' at the end
            return re.sub('_bin\d*', '', expr)
        else:
            return expr
    for region in args.regions:
        if expr == region:
            return expr
    else:
        logger.error('Region not found: {}'.format(expr))
        return 0


if not (args.analysis or (args.background and args.regions)):
    logger.error('No analysis nor processes/regions given! Dropping out.')
    sys.exit()
elif not args.analysis and (args.background and args.regions):
    logger.info(
        'Did not provide analysis, but provided background and regions, so lets guess.'
    )
if args.analysis:
    logger.info('Considering analysis: %s' % args.analysis)
    if args.analysis == '1Lbb':
        args.regions = [
            'SRLMincl', 'SRMMincl', 'SRHMincl', 'SRLM', 'SRMM', 'SRHM', 'WR',
            'STCR', 'TRLM', 'TRMM', 'TRHM', 'VRtt1on', 'VRtt2on', 'VRtt3on',
            'VRtt1off', 'VRtt2off', 'VRtt3off'
        ]
    elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'):
        regions = [
Beispiel #11
0
    '/cvmfs/atlas.cern.ch/repo/sw/database/GroupData/dev/PMGTools/PMGxsecDB_mc16.txt',
    default=None)
parser.add_argument('--xsec',
                    '-x',
                    help='Actual xsection (in case no xsec file) in pb',
                    default=None)
parser.add_argument('--fb',
                    help='Normalise to 1/fb instead of 1/pb',
                    action='store_true')
parser.add_argument('--applyGenWeight',
                    help='Apply generator weight to the normalisation branch',
                    action='store_true')
args = parser.parse_args()

if not os.path.isfile(args.inputfile):
    logger.error("Provided ROOT file does not exist or is not a file!")
    raise ValueError("Sorry, need to exit here.")

if args.xsec_file:
    if not os.path.isfile(args.xsec_file):
        logger.error("Provided xsec file does not exist or is not a file!")
        raise ValueError("Sorry, need to exit here.")
    else:
        if not args.dsid:
            logger.warning(
                "Provided a xsec file, but not a DSID, will try to guess ...")
        else:
            logger.info("Provided xsec file and DSID, thanks mate!")


def get_xsec(xsec_file, my_dsid=None):
Beispiel #12
0
parser.add_argument('--branch-name',
                    '-n',
                    help='the branch name to be smeared',
                    default='mbb')
parser.add_argument('--tag', '-t', help='tag after name', default='smeared')
parser.add_argument('--loc', '-l', help='loc of the skewnorm', default=1.0)
parser.add_argument('--scale', '-a', help='scale of the skewnorm', default=0.5)
parser.add_argument('--skew', '-k', help='skew of the skewnorm', default=0)
parser.add_argument('--mu', '-m', help='mu of the gaussian', default=1.0)
parser.add_argument('--sigma', '-s', help='sigma of the gaussian', default=0.5)
parser.add_argument('--method', '-e', help='norm or skewnorm', default='norm')
parser.add_argument('--seed', help='seed for random numbers', default=1234)
args = parser.parse_args()

if not args.method in ['norm', 'skewnorm']:
    logger.error("Provided smearing method not implemented!")
    raise ValueError("Sorry, need to exit here.")

# if not os.path.isdir(args.inputdir):
#     logger.error("Provided path does not exist or is not a directory!")
#     raise ValueError("Sorry, need to exit here.")

np.random.seed(args.seed)

for indx, f in enumerate(args.inputfiles):

    if not f.endswith("update.root"):
        continue

    logger.info("Updating " + f)
    treename = f.replace("_update.root", "_NoSys")
Beispiel #13
0
    '-s',
    '--signal',
    action="store_true",
    help="Use signal tree naming convention (default is background trees)")

args = parser.parse_args()

cmd = "rootls {}".format(args.file)
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

trees = output.split()

if args.signal:
    naming = 5
else:
    naming = 1
raw_names = [
    "_".join(fullname.split("_")[:naming]) + "_" for fullname in trees
]

unique_names = list(set(raw_names))

split_trees = [[t for t in trees if b in t] for b in unique_names]

length = len(split_trees[0])
for (l, bkg) in zip(split_trees, unique_names):
    logger.info("For process {}: {} trees".format(bkg, len(l)))
    if not len(l) == length:
        logger.error("Not the right length!")
Beispiel #14
0
    if not processname in processes:
        processes.append(processname)

    mc16a = 0
    mc16d = 0
    mc16e = 0

    mc16a = tree.GetEntries(
        "(RandomRunNumber >= 276262 && RandomRunNumber <= 320000)")
    mc16d = tree.GetEntries(
        "(RandomRunNumber >= 324320 && RandomRunNumber <= 337833)")
    mc16e = tree.GetEntries("(RandomRunNumber >= 348885)")

    if not mc16a > 0:
        logger.error("No MC16A in {}".format(tree))
    if not mc16d > 0:
        logger.error("No MC16D in {}".format(tree))
    if not mc16e > 0:
        logger.error("No MC16E in {}".format(tree))

logger.info("Checking for same amount of trees per process now.")

for process in processes:
    i = 0
    for treename in trees_list:
        if process in treename:
            i += 1
    logger.info("{} has {} trees".format(process, i))

tf.Close()
Beispiel #15
0
import logging
from commonHelpers.logger import logger
logger = logger.getChild("mephisto")

parser = argparse.ArgumentParser(
    description=
    'This script helps in creating a proper normalization for trees that have been processed through e.g. SimpleAnalysis and lack the "traditional" genWeight branch.',
    epilog="You beautiful person, you.")
parser.add_argument('inputdir',
                    help='The directory containing all the ROOT trees')
parser.add_argument('xsecsfile', help='Text file containing the xsecs')
args = parser.parse_args()

if not os.path.isdir(args.inputdir):
    logger.error("Provided path does not exist or is not a directory!")
    raise ValueError("Sorry, need to exit here.")
if not os.path.isfile(args.xsecsfile):
    logger.error("Provided file does not exist or is not a file!")
    raise ValueError("Sorry, need to exit here.")

with open(args.xsecsfile, 'r') as document:
    dict = {}
    for line in document:
        line = line.split()
        if not line:  # empty line?
            continue
        dict[line[0]] = float(line[1])

        print('{} : {}'.format(int(line[0]), float(line[1])))
Beispiel #16
0
        print(test)
    sys.exit(0)

suite = unittest.TestSuite()

# based on code snippet from http://stackoverflow.com/questions/1732438/how-do-i-run-all-python-unit-tests-in-a-directory#15630454
for postfix in tests:
    t = "test.test_" + postfix
    if "." in postfix:
        # i don't have a better solution yet, so hack for now
        importTest = ".".join(t.split(".")[:-2])
    else:
        importTest = t
    try:
        logger.info("Trying to import {}".format(importTest))
        mod = __import__(importTest, globals(), locals(), ['suite'])
    except ImportError:
        logger.error("Test {} not found - try {}".format(t, testmodules))
        raise
    try:
        # If the module defines a suite() function, call it to get the suite.
        suitefn = getattr(mod, 'suite')
        suite.addTest(suitefn())
    except (ImportError, AttributeError):
        # else, just load all the test cases from the module.
        logger.info("Loading test {}".format(t))
        suite.addTest(unittest.defaultTestLoader.loadTestsFromName(t))

result = unittest.TextTestRunner(verbosity=verbosity).run(suite)
sys.exit(not result.wasSuccessful())