Ejemplo n.º 1
0
def get_data(q):
   api = RhApi(DEFAULT_URL, debug = False)

   p = {"class": "Collisions18"}
   qid = api.qid(q)

   return api.csv(q, p)
Ejemplo n.º 2
0
def retrieve(max_run=320008,
             min_run=316766,
             folder="runreg_csc",
             table="datasets",
             ref_runs=[]):

    api = RhApi(DEFAULT_URL, debug=False)

    # Get column names and name of run column
    col_table = api.table(folder=folder, table=table)["columns"]
    cols = []
    r_num = ""
    r_num_i = 0
    for col in col_table:
        col_name = str(col["name"])
        if _get_data_col(col_name, table): cols.append(col_name)
        if not r_num and _get_run_col(col_name):
            cols.append(col_name)
            r_num = col_name
            r_num_i = cols.index(col_name)

    if not r_num:
        r_num = cols[0]

    # Form query
    c = ",".join("r." + x for x in cols)
    q = "select {0} from {1}.{2} r where r.{3}>=:minrun and r.{3}<=:maxrun order by r.{3}".format(
        c, folder, table, r_num)
    if ref_runs:
        p = {"maxrun": max(ref_runs), "minrun": min(ref_runs)}
    else:
        p = {"maxrun": str(max_run), "minrun": str(min_run)}
    qid = api.qid(q)

    # Allow for fetch() to update a pre-existing dict
    data = {}
    if type(ref_runs) == dict:
        dqm = ref_runs
        ref_runs = ref_runs.keys()
    skipped = 0
    it = 0
    while True:
        runs = []
        raw_data = api.json(q, p)["data"]

        for i in range(0, len(raw_data)):
            run = str(raw_data[i][r_num_i])
            # Only fetch runs relevant to AutoDQM
            if run not in ref_runs: continue
            # Get source of data (Global, Express, or PromptReco)
            if "RDA_NAME" in cols:
                rda_name = raw_data[i][cols.index("RDA_NAME")].lower()
            runs.append(run)
            # Only make a new entry for new runs
            if run not in data:
                data[run] = {}
            for j in range(0, len(raw_data[i])):
                if j == r_num_i: continue
                # Handle <folder>.datasets
                if table != "runs":
                    if "is_good" not in data[run]: pass
                    elif not data[run]["is_good"]: continue
                    else:
                        # The "BAD" tag gets priority
                        if raw_data[i][j] == "BAD":
                            data[run]["is_good"] = False
                        # Skip "NONSET" tags if marked as "GOOD" elsewhere
                        continue
                    if "RDA_NAME" in data[run]:
                        # Update source
                        data[run]["RDA_NAME"] = rda_name
                    # Update status
                    if raw_data[i][j] == "GOOD": data[run]["is_good"] = True
                    elif raw_data[i][j] == "BAD": data[run]["is_good"] = False
                    else: continue
                # Handle <folder>.runs
                else:
                    data[run][cols[j]] = raw_data[i][j]

            if table != "runs":
                # handle cases where all statuses are NOTSET
                if "is_good" not in data[run]: data[run]["is_good"] = False

        if len(raw_data) < 1 or max(
                runs) == p["minrun"] or p["minrun"] >= p["maxrun"]:
            break
        p["minrun"] = max(runs)
        it += 1

    if table == "runs" and ref_runs:
        refs = {"ref_data": [], "ref_cands": []}
        for run in data:
            if run == max(ref_runs): continue
            refs["ref_data"].append(
                dict(ref.get_wbm_data(max(ref_runs), run, data), **dqm[run]))
        refs["ref_cands"] = ref.get_ref_cands(refs["ref_data"])
        return refs
    elif data:
        return data
    else:
        return None