Esempio n. 1
0
    def __init__(self, info_path):
        """
        Plate object returned by Meadiator.

        :param info_path:
        :return:
        """
        tmpd = defaultdict(str, parse_meta(info_path))
        self.path = info_path
        self.plate_id = tmpd["plate_id"]
        self.date = tmpd["created_at"]
        self.lineage = tmpd["lineage"]
        desc = tmpd["description"]
        desc_elstring = desc.replace("Material library with ", "")
        if desc_elstring != "":
            desc_elstring = desc_elstring.split()[0]
        desc_elstring = re.sub("([A-Za-z])([A-Z])", "\\1,\\2", desc_elstring)
        desc_els = desc_elstring.split(",")
        if isinstance(desc_els, str):
            desc_els = [desc_els]
        self.elements = list(set(desc_els))
        if desc != "":
            self.deposition_method = desc.split("deposited by ")[-1].split()[0]
        else:
            self.deposition_method = ""
        self.substrate = tmpd["substrate"]
        self.map = tmpd["screening_map_id"]
        if "annealed at" in desc:
            self.anneal_temp = float(
                desc.split("annealed at ")[-1].split()[0].replace("C",
                                                                  "").strip())
        else:
            self.anneal_temp = 0
        if "to add" in desc:
            self.anneal_type = desc.split("to add ")[-1].split()[0]
        else:
            self.anneal_type = ""
        if " on " in desc:
            self.anneal_date = desc.split(" on ")[-1].strip()
        else:
            self.anneal_date = ""
        self.run_dict = {}
        self.exp_dict = {}
        self.ana_dict = {}
        self.runs = []
        if "runs" in tmpd.keys():
            if isinstance(tmpd["runs"], dict):
                for rund in tmpd["runs"].values():
                    self.runs.append(rund["path"])
        self.experiments = []
        if "experiments" in tmpd.keys():
            if isinstance(tmpd["experiments"], dict):
                for expd in tmpd["experiments"].values():
                    self.experiments.append(expd["path"])
        self.analyses = []
        if "analyses" in tmpd.keys():
            if isinstance(tmpd["analyses"], dict):
                for anad in tmpd["analyses"].values():
                    self.analyses.append(anad["path"])
        del tmpd
Esempio n. 2
0
    def __init__(self, run_path):
        """
        Run object returned by Meadiator.

        :param run_path:
        :return:
        """
        tmpd = defaultdict(str, parse_meta(run_path))
        self.path = run_path  # tmpd["file_path"]
        self.date = ""
        self.type = tmpd["experiment_type"]
        self.plate_id = tmpd["plate_id"]
        self.machine = ""
        self.elements = []
        self.anneal_temp = 0
        self.anneal_type = ""
        self.file_count = 0
        common_keys = ["plate_id", "experiment_type"]
        file_keys = [k for k in tmpd.keys() if "files_technique__" in k]
        self.files = {}
        for key in file_keys:
            file_tech = key.split("__")[-1]
            self.files[file_tech] = {}
            for file_group in tmpd[key].keys():
                if isinstance(tmpd[key][file_group], dict):
                    file_dict = make_file_dict(tmpd[key][file_group])
                    for v in file_dict.values():
                        v.update(source_zip=self.path)
                    self.files[file_tech][file_group] = file_dict
                else:
                    continue
        param_keys = [
            k for k in tmpd.keys() if "params__" in k or k == "parameters"
        ]
        self.techs = list(self.files.keys())
        self.tech_params = {}
        tech_param_list = []
        for key in param_keys:
            self.tech_params[key.split("__")[-1]] = tmpd[key]
            tech_param_list += list(tmpd[key].keys())
        self.tech_param_groups = list(self.tech_params.keys())
        self.tech_param_keys = list(set(tech_param_list))
        other_keys = [
            k for k in tmpd.keys()
            if k not in common_keys + file_keys + param_keys
        ]
        self.root_params = {k: tmpd[k] for k in other_keys}
        self.root_keys = other_keys
        del tmpd
Esempio n. 3
0
    def get_info(self, plate_id, return_dict=False):
        """
        Return dict of metadata for plate.

        :param plate_id: Integer plate_id.
        :return: Absolute path to info file as string.
        """
        zip_path = pjoin(self.plate_dir, str(plate_id), f"{plate_id}.zip")
        info_path = pjoin(dirname(zip_path), f"{plate_id}.info")
        if exists(zip_path):
            file_path = zip_path
        elif exists(info_path):
            file_path = info_path
        else:
            file_path = ""
        return parse_meta(file_path) if return_dict else file_path
Esempio n. 4
0
    def __init__(self, exp_path):
        """
        Experiment object returned by Meadiator.

        :param exp_path:
        :return:
        """
        tmpd = defaultdict(str, parse_meta(exp_path))
        base_dir = exp_path.strip("\\/").split("experiment")[0]
        self.path = exp_path  # tmpd["file_path"]
        self.date = ""
        self.type = basename(dirname(self.path))
        self.plate_id = tmpd["plate_ids"]
        if isinstance(self.plate_id, str):
            self.plate_id = [int(x.strip()) for x in self.plate_id.split(",")]
        self.elements = []
        self.anneal_temp = 0
        self.anneal_type = ""
        run_keys = [k for k in tmpd.keys() if k.startswith("run__")]
        self.run_count = len(run_keys)
        self.run_paths = [tmpd[k]["run_path"] for k in run_keys]
        self.files = {}
        for run_key in run_keys:
            file_keys = [
                k for k in tmpd[run_key].keys() if "files_technique__" in k
            ]
            for key in file_keys:
                file_tech = key.split("__")[-1]
                if file_tech not in self.files.keys():
                    self.files[file_tech] = {}
                for file_group in tmpd[run_key][key].keys():
                    if file_group not in self.files[file_tech].keys():
                        self.files[file_tech][file_group] = {}
                    if isinstance(tmpd[run_key][key][file_group], dict):
                        file_dict = make_file_dict(
                            tmpd[run_key][key][file_group])
                        for v in file_dict.values():
                            v.update(source_zip=pjoin(
                                base_dir, "run", *tmpd[run_key]
                                ["run_path"].split("/")))
                        self.files[file_tech][file_group].update(file_dict)
                    else:
                        continue
        self.techs = list(self.files.keys())
        del tmpd
        self.runs = []
Esempio n. 5
0
    def load_objects(self, overwrite_cache=False):
        """
        Load MEAD objects from file paths.

        :return:
        """
        objects_pck = f"{basename(self.base_dir)}_objects.bz2.pck"
        if exists(objects_pck) and not overwrite_cache:
            self.meadia = pickle.load(bz2.BZ2File(objects_pck, "r"))
            print(
                f"found existing objects dictionary in {pjoin(getcwd(), objects_pck)}"
            )
        else:
            self.log_entry(f"loading plate objects")
            self.meadia["plate"] = {}
            for plate_path in self.files["plate"]:
                plate_key = int(
                    basename(plate_path).split(".")[0].split("-")[0])
                self.meadia["plate"][plate_key] = Plate(plate_path)
            for key, key_dir in self.object_tups:
                self.log_entry(f"loading {key} objects")
                for file_path in self.files[key]:
                    obj_type = file_path.replace(key_dir,
                                                 "").strip("\\/").split(sep)[0]
                    if obj_type not in self.meadia[key].keys():
                        self.meadia[key][obj_type] = {}
                    obj_key = basename(file_path)
                    try:
                        meadia_obj = self.object_dict[key](file_path)
                        self.meadia[key][obj_type][obj_key] = meadia_obj
                        pid = meadia_obj.plate_id
                        if isinstance(pid, str):
                            if "," in pid:
                                pids = [int(x.strip()) for x in pid.split(",")]
                        elif isinstance(pid, list):
                            pids = [int(x) for x in pid]
                        else:
                            pids = [pid]
                        for p in pids:
                            if p in self.meadia["plate"].keys():
                                if key == "run":
                                    if (obj_type not in self.meadia["plate"]
                                        [p].run_dict.keys()):
                                        self.meadia["plate"][p].run_dict[
                                            obj_type] = {}
                                    self.meadia["plate"][p].run_dict[obj_type][
                                        obj_key] = meadia_obj
                                elif key == "exp":
                                    if (obj_type not in self.meadia["plate"]
                                        [p].exp_dict.keys()):
                                        self.meadia["plate"][p].exp_dict[
                                            obj_type] = {}
                                    self.meadia["plate"][p].exp_dict[obj_type][
                                        obj_key] = meadia_obj
                                elif key == "ana":
                                    if (obj_type not in self.meadia["plate"]
                                        [p].ana_dict.keys()):
                                        self.meadia["plate"][p].ana_dict[
                                            obj_type] = {}
                                    self.meadia["plate"][p].ana_dict[obj_type][
                                        obj_key] = meadia_obj
                            else:
                                self.meadia["load_errors"].append(
                                    (file_path, f"plate {p} not in release"))
                    except Exception as e:
                        self.meadia["load_errors"].append((file_path, str(e)))
            num_errors = len(self.meadia["load_errors"])
            self.log_entry(
                f"{num_errors} files were not loaded due to read errors")
            in_info_no_release = 0
            for plate_path in self.files["plate"]:  # propogate meta data
                plate_meta = parse_meta(plate_path)
                if "plate_id" in plate_meta.keys():
                    id = plate_meta["plate_id"]
                    elements = self.meadia["plate"][id].elements
                    ann_temp = self.meadia["plate"][id].anneal_temp
                    ann_type = self.meadia["plate"][id].anneal_type
                    for block in ["runs", "experiments", "analyses"]:
                        blk = block[:3]
                        if block in plate_meta.keys():
                            # update date, elements, anneal_temp, anneal_type
                            if isinstance(plate_meta[block], dict):
                                for k, blkd in plate_meta[block].items():
                                    otype = blkd["path"].split("/")[1]
                                    okey = blkd["path"].split("/")[-1]
                                    if not otype in self.meadia[blk].keys():
                                        print(
                                            f"{otype} not found in {blk} info for plate {id}"
                                        )
                                        continue
                                    if okey in self.meadia[blk][otype].keys():
                                        self.meadia[blk][otype][
                                            okey].elements = elements
                                        self.meadia[blk][otype][
                                            okey].anneal_temp = ann_temp
                                        self.meadia[blk][otype][
                                            okey].anneal_type = ann_type
                                        self.meadia[blk][otype][
                                            okey].anneal_temp = ann_temp
                                        self.meadia[blk][otype][
                                            okey].date = blkd["created_at"]
                                        if blk == "run":
                                            # update machine, file_count
                                            if "machine" in blkd.keys():
                                                self.meadia["run"][otype][
                                                    okey].machine = blkd[
                                                        "machine"]
                                            if "description" in blkd.keys():
                                                self.meadia["run"][otype][
                                                    okey].file_count = (
                                                        blkd["description"].
                                                        split("containing ")
                                                        [1].split(" files")[0])
                                        elif blk == "exp":
                                            # update runs
                                            if "run_paths" in blkd.keys():
                                                self.meadia["exp"][otype][
                                                    okey].runs = [
                                                        self.meadia["run"]
                                                        [otype][basename(p)]
                                                        for p in
                                                        blkd["run_paths"]
                                                    ]
                                        elif blk == "ana":
                                            # update experiments
                                            if "experiment_path" in blkd.keys(
                                            ):
                                                self.meadia["ana"][otype][
                                                    okey].experiment = self.meadia[
                                                        "exp"][otype][basename(
                                                            blkd[
                                                                "experiment_path"]
                                                        )]
                                    else:
                                        in_info_no_release += 1
                                        self.meadia["load_errors"].append(
                                            f"{otype} {blk} {okey} in plate {id} info but not in release"
                                        )
                else:
                    self.log_entry(
                        f"{plate_path} does not contain a 'plate_id' key")
            if in_info_no_release > 0:
                self.log_entry(
                    f"{in_info_no_release} runs/exps/anas are present in plate info files but were not included in the release"
                )
            # if len(self.load_errors) == 0:
            self.gen_links()
            pickle.dump(self.meadia, bz2.BZ2File(objects_pck, "w"))
            self.log_entry(
                f"wrote object dictionary to {pjoin(getcwd(), objects_pck)}")
Esempio n. 6
0
    def __init__(self, ana_path):
        """
        Experiment object returned by Meadiator.

        :param exp_path:
        :return:
        """
        tmpd = defaultdict(str, parse_meta(ana_path))
        self.path = ana_path  # tmpd["file_path"]
        self.date = ""
        self.type = basename(dirname(self.path))
        self.plate_id = tmpd["plate_ids"]
        if isinstance(self.plate_id, str):
            self.plate_id = [int(x.strip()) for x in self.plate_id.split(",")]
        self.elements = []
        self.anneal_temp = 0
        self.anneal_type = ""
        self.experiment_path = tmpd["experiment_path"]
        self.analyses = {}
        self.analysis_names = []
        self.files = {}
        ana_keys = [k for k in tmpd.keys() if k.startswith("ana__")]
        self.ana_count = len(ana_keys)
        for ana_key in ana_keys:
            self.analysis_names.append(tmpd[ana_key]["name"])
            self.analyses[ana_key] = {"name": tmpd[ana_key]["name"]}
            if "description" in tmpd[ana_key].keys():
                self.analyses[ana_key]["description"] = tmpd[ana_key][
                    "description"]
            if "analysis_fcn_version" in tmpd[ana_key].keys():
                self.analyses[ana_key]["version"] = tmpd[ana_key][
                    "analysis_fcn_version"]
            self.analyses[ana_key]["parameters"] = {}
            if "parameters" in tmpd[ana_key].keys():
                self.analyses[ana_key]["parameters"].update(
                    tmpd[ana_key]["parameters"])
            file_keys = [k for k in tmpd[ana_key].keys() if "files_" in k]
            if "technique" in tmpd[ana_key].keys():
                file_tech = tmpd[ana_key]["technique"]
            elif "analysis_general_type" in tmpd[ana_key].keys():
                if tmpd[ana_key]["analysis_general_type"] == "process_fom":
                    sourced = tmpd[tmpd[ana_key]["parameters"]["select_ana"]]
                    while "technique" not in sourced.keys():
                        sourced = tmpd[sourced["parameters"]["select_ana"]]
                    file_tech = sourced["technique"]
                else:
                    file_tech = "no_technique"
            else:
                file_tech = "no_technique"
            self.analyses[ana_key]["technique"] = file_tech
            for key in file_keys:
                if file_tech not in self.files.keys():
                    self.files[file_tech] = {}
                for file_group in tmpd[ana_key][key].keys():
                    if file_group not in self.files[file_tech].keys():
                        self.files[file_tech][file_group] = {}
                    if isinstance(tmpd[ana_key][key][file_group], dict):
                        file_dict = make_file_dict(
                            tmpd[ana_key][key][file_group])
                        for v in file_dict.values():
                            v.update(source_zip=self.path)
                        self.files[file_tech][file_group].update(file_dict)
                    else:
                        continue
        self.techs = list(self.files.keys())
        del tmpd
        self.experiment = None