Beispiel #1
0
    def test_from_structures(self):
        s1 = Structure([[5, 0, 0], [0, 5, 0], [0, 0, 5]], ["Fe"], [[0, 0, 0]])
        s2 = Structure([[5, 0, 0], [0, 5, 0], [0, 0, 5]], ["Mn"], [[0, 0, 0]])
        remarks = ["unittest"]
        authors="Test User <*****@*****.**>"
        snl_list = StructureNL.from_structures([s1, s2], authors, remarks=remarks)

        self.assertEqual(len(snl_list), 2)
        snl1 = snl_list[0]
        snl2 = snl_list[1]
        self.assertEqual(snl1.remarks, remarks)
        self.assertEqual(snl2.remarks, remarks)
        self.assertEqual(snl1.authors, [Author.parse_author(authors)])
        self.assertEqual(snl2.authors, [Author.parse_author(authors)])
Beispiel #2
0
    def test_to_from_dict(self):
        # no complicated objects in the 'data' or 'nodes' field
        a = StructureNL(self.s, self.hulk, ['test_project'], self.pmg,
                        ['remark1'], {"_my_data": "string"},
                        [self.valid_node, self.valid_node2])
        b = StructureNL.from_dict(a.as_dict())
        self.assertEqual(a, b)
        # complicated objects in the 'data' and 'nodes' field
        complicated_node = {"name": "complicated node",
                            "url": "www.complicatednodegoeshere.com",
                            "description": {"structure": self.s2}}
        a = StructureNL(self.s, self.hulk, ['test_project'], self.pmg,
                        ['remark1'], {"_my_data": {"structure": self.s2}},
                        [complicated_node, self.valid_node])
        b = StructureNL.from_dict(a.as_dict())
        self.assertEqual(a, b,
                         'to/from dict is broken when object embedding is '
                         'used! Apparently MontyEncoding is broken...')

        #Test molecule
        molnl = StructureNL(self.mol, self.hulk, references=self.pmg)
        b = StructureNL.from_dict(molnl.as_dict())
        self.assertEqual(molnl, b)
Beispiel #3
0
    def submit_structures(self, structures, authors, projects=None,
                          references='', remarks=None, data=None,
                          histories=None, created_at=None):
        """
        Submits a list of structures to the Materials Project as SNL files.
        The argument list mirrors the arguments for the StructureNL object,
        except that a list of structures with the same metadata is used as an
        input.

        .. note::

            As of now, this MP REST feature is open only to a select group of
            users. Opening up submissions to all users is being planned for
            the future.

        Args:
            structures: A list of Structure objects
            authors (list): List of {"name":'', "email":''} dicts,
                *list* of Strings as 'John Doe <*****@*****.**>',
                or a single String with commas separating authors
            projects ([str]): List of Strings ['Project A', 'Project B'].
                This applies to all structures.
            references (str): A String in BibTeX format. Again, this applies to
                all structures.
            remarks ([str]): List of Strings ['Remark A', 'Remark B']
            data ([dict]): A list of free form dict. Namespaced at the root
                level with an underscore, e.g. {"_materialsproject":<custom
                data>}. The length of data should be the same as the list of
                structures if not None.
            histories: List of list of dicts - [[{'name':'', 'url':'',
                'description':{}}], ...] The length of histories should be the
                same as the list of structures if not None.
            created_at (datetime): A datetime object

        Returns:
            A list of inserted submission ids.
        """
        from pymatgen.util.provenance import StructureNL
        snl_list = StructureNL.from_structures(structures, authors, projects,
                                               references, remarks, data,
                                               histories, created_at)
        self.submit_snl(snl_list)
Beispiel #4
0
    def test_snl(self):
        self.trans.set_parameter("author", "will")
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            snl = self.trans.to_snl([("will", "*****@*****.**")])
            self.assertEqual(
                len(w),
                1,
                "Warning not raised on type conversion with other_parameters",
            )
        ts = TransformedStructure.from_snl(snl)
        self.assertEqual(ts.history[-1]["@class"],
                         "SubstitutionTransformation")

        h = ("testname", "testURL", {"test": "testing"})
        snl = StructureNL(ts.final_structure, [("will", "*****@*****.**")],
                          history=[h])
        snl = TransformedStructure.from_snl(snl).to_snl([("notwill",
                                                          "*****@*****.**")])
        self.assertEqual(snl.history, [h])
        self.assertEqual(snl.authors, [("notwill", "*****@*****.**")])
Beispiel #5
0
def add_snl(mat, new_style_mat):
    snl = new_style_mat.get("snl", None)
    mat["snl"] = copy.deepcopy(mat["structure"])
    if snl:
        mat["snl"].update(snl)
    else:
        mat["snl"] = StructureNL(Structure.from_dict(mat["structure"]), []).as_dict()
        mat["snl"]["about"].update(mp_default_snl_fields)

    mat["snl_final"] = mat["snl"]
    mat["icsd_ids"] = [int(i) for i in get(mat["snl"], "about._db_ids.icsd_ids", [])]
    mat["pf_ids"] = get(mat["snl"], "about._db_ids.pf_ids", [])

    # Extract tags from remarks by looking for just nounds and adjectives
    mat["exp"] = {"tags": []}
    for remark in mat["snl"]["about"].get("_tags", []):
        tokens = set(
            tok[1]
            for tok in nltk.pos_tag(nltk.word_tokenize(remark), tagset="universal")
        )
        if len(tokens.intersection({"ADV", "ADP", "VERB"})) == 0:
            mat["exp"]["tags"].append(remark)
Beispiel #6
0
    def to_snl(self, authors, **kwargs) -> StructureNL:
        """
        Generate SNL from TransformedStructure.

        :param authors: List of authors
        :param **kwargs: All kwargs supported by StructureNL.
        :return: StructureNL
        """
        if self.other_parameters:
            warn("Data in TransformedStructure.other_parameters discarded during type conversion to SNL")
        hist = []
        for h in self.history:
            snl_metadata = h.pop("_snl", {})
            hist.append(
                {
                    "name": snl_metadata.pop("name", "pymatgen"),
                    "url": snl_metadata.pop("url", "http://pypi.python.org/pypi/pymatgen"),
                    "description": h,
                }
            )

        return StructureNL(self.final_structure, authors, history=hist, **kwargs)
Beispiel #7
0
    def match(self, snls, mat):
        """
        Finds a material doc that matches with the given snl

        Args:
            snl ([dict]): the snls list
            mat (dict): a materials doc

        Returns:
            generator of materials doc keys
        """
        sm = StructureMatcher(ltol=self.ltol,
                              stol=self.stol,
                              angle_tol=self.angle_tol,
                              primitive_cell=True,
                              scale=True,
                              attempt_supercell=False,
                              allow_subset=False,
                              comparator=ElementComparator())

        m_strucs = [Structure.from_dict(mat["structure"])] + [
            Structure.from_dict(init_struc)
            for init_struc in mat["initial_structures"]
        ]
        for snl in snls:
            snl_struc = StructureNL.from_dict(snl).structure
            try:
                snl_spacegroup = snl_struc.get_space_group_info()[0]
            except:
                snl_spacegroup = -1
            for struc in m_strucs:
                try:
                    struc_sg = struc.get_space_group_info()[0]
                except:
                    struc_sg = -1
                # The try-excepts are a temp fix to a spglib bug
                if struc_sg == snl_spacegroup and sm.fit(struc, snl_struc):
                    yield snl
                    break
Beispiel #8
0
    def match(self, snl, mats):
        """
        Finds a material doc that matches with the given snl

        Args:
            snl (dict): the snl doc
            mats ([dict]): the materials docs to match against

        Returns:
            dict: a materials doc if one is found otherwise returns None
        """
        sm = StructureMatcher(ltol=self.ltol, stol=self.stol, angle_tol=self.angle_tol,
                              primitive_cell=True, scale=True,
                              attempt_supercell=False, allow_subset=False,
                              comparator=ElementComparator())
        snl_struc = StructureNL.from_dict(snl).structure

        for m in mats:
            m_struct = Structure.from_dict(m["structure"])
            init_m_struct = Structure.from_dict(m["initial_structure"])
            if sm.fit(m_struct, snl_struc) or sm.fit(init_m_struct, snl_struc):
                return m[self.materials.key]

        return None
Beispiel #9
0
Datei: calc.py Projekt: utf/emmet
def prep(ctx, archive, authors):
    """prep structures from an archive for submission"""
    run = ctx.obj["RUN"]
    collections = ctx.obj["COLLECTIONS"]
    snl_collection = ctx.obj["CLIENT"].db.snls
    handler = ctx.obj["MONGO_HANDLER"]
    nmax = ctx.obj["NMAX"]
    skip = ctx.obj["SKIP"]
    # TODO no_dupe_check flag

    fname, ext = os.path.splitext(os.path.basename(archive))
    tag, sec_ext = fname.rsplit(".", 1) if "." in fname else [fname, ""]
    logger.info(click.style(f"tag: {tag}", fg="cyan"))
    if sec_ext:
        ext = "".join([sec_ext, ext])
    exts = ["tar.gz", ".tgz", "bson.gz", ".zip"]
    if ext not in exts:
        raise EmmetCliError(
            f"{ext} not supported (yet)! Please use one of {exts}.")

    meta = {"authors": [Author.parse_author(a) for a in authors]}
    references = meta.get("references", "").strip()
    source_ids_scanned = handler.collection.distinct("source_id",
                                                     {"tags": tag})

    # TODO add archive of StructureNL files
    input_structures, source_total = [], None
    if ext == "bson.gz":
        input_bson = gzip.open(archive)
        source_total = count_file_documents(input_bson)
        for doc in bson.decode_file_iter(input_bson):
            if len(input_structures) >= nmax:
                break
            if skip and doc["db_id"] in source_ids_scanned:
                continue
            elements = set([
                specie["element"] for site in doc["structure"]["sites"]
                for specie in site["species"]
            ])
            for l in SETTINGS.skip_labels:
                if l in elements:
                    logger.log(
                        logging.ERROR if run else logging.INFO,
                        f'Skip structure {doc["db_id"]}: unsupported element {l}!',
                        extra={
                            "tags": [tag],
                            "source_id": doc["db_id"]
                        },
                    )
                    break
            else:
                s = TransformedStructure.from_dict(doc["structure"])
                s.source_id = doc["db_id"]
                input_structures.append(s)
    elif ext == ".zip":
        input_zip = ZipFile(archive)
        namelist = input_zip.namelist()
        source_total = len(namelist)
        for fname in namelist:
            if len(input_structures) >= nmax:
                break
            if skip and fname in source_ids_scanned:
                continue
            contents = input_zip.read(fname)
            fmt = get_format(fname)
            s = Structure.from_str(contents, fmt=fmt)
            s.source_id = fname
            input_structures.append(s)
    else:
        tar = tarfile.open(archive, "r:gz")
        members = tar.getmembers()
        source_total = len(members)
        for member in members:
            if os.path.basename(member.name).startswith("."):
                continue
            if len(input_structures) >= nmax:
                break
            fname = member.name.lower()
            if skip and fname in source_ids_scanned:
                continue
            f = tar.extractfile(member)
            if f:
                contents = f.read().decode("utf-8")
                fmt = get_format(fname)
                s = Structure.from_str(contents, fmt=fmt)
                s.source_id = fname
                input_structures.append(s)

    total = len(input_structures)
    logger.info(
        f"{total} of {source_total} structure(s) loaded "
        f"({len(source_ids_scanned)} unique structures already scanned).")

    save_logs(ctx)
    snls, index = [], None
    for istruct in input_structures:
        # number of log messages equals number of structures processed if --run
        # only logger.warning goes to DB if --run
        if run and len(handler.buffer) >= handler.buffer_size:
            insert_snls(ctx, snls)

        struct = (istruct.final_structure if isinstance(
            istruct, TransformedStructure) else istruct)
        struct.remove_oxidation_states()
        struct = struct.get_primitive_structure()
        formula = struct.composition.reduced_formula
        sg = get_sg(struct)

        if not (struct.is_ordered and struct.is_valid()):
            logger.log(
                logging.WARNING if run else logging.INFO,
                f"Skip structure {istruct.source_id}: disordered or invalid!",
                extra={
                    "formula": formula,
                    "spacegroup": sg,
                    "tags": [tag],
                    "source_id": istruct.source_id,
                },
            )
            continue

        for full_name, coll in collections.items():
            # load canonical structures in collection for current formula and
            # duplicate-check them against current structure
            load_canonical_structures(ctx, full_name, formula)
            for canonical_structure in canonical_structures[full_name][
                    formula].get(sg, []):
                if structures_match(struct, canonical_structure):
                    logger.log(
                        logging.WARNING if run else logging.INFO,
                        f"Duplicate for {istruct.source_id} ({formula}/{sg}): {canonical_structure.id}",
                        extra={
                            "formula": formula,
                            "spacegroup": sg,
                            "tags": [tag],
                            "source_id": istruct.source_id,
                            "duplicate_dbname": full_name,
                            "duplicate_id": canonical_structure.id,
                        },
                    )
                    break
            else:
                continue  # no duplicate found -> continue to next collection

            break  # duplicate found
        else:
            # no duplicates in any collection
            prefix = snl_collection.database.name
            if index is None:
                # get start index for SNL id
                snl_ids = snl_collection.distinct("snl_id")
                index = max(
                    [int(snl_id[len(prefix) + 1:]) for snl_id in snl_ids])

            index += 1
            snl_id = "{}-{}".format(prefix, index)
            kwargs = {"references": references, "projects": [tag]}
            if isinstance(istruct, TransformedStructure):
                snl = istruct.to_snl(meta["authors"], **kwargs)
            else:
                snl = StructureNL(istruct, meta["authors"], **kwargs)

            snl_dct = snl.as_dict()
            snl_dct.update(get_meta_from_structure(struct))
            snl_dct["snl_id"] = snl_id
            snls.append(snl_dct)
            logger.log(
                logging.WARNING if run else logging.INFO,
                f"SNL {snl_id} created for {istruct.source_id} ({formula}/{sg})",
                extra={
                    "formula": formula,
                    "spacegroup": sg,
                    "tags": [tag],
                    "source_id": istruct.source_id,
                },
            )

    # final save
    if run:
        insert_snls(ctx, snls)
Beispiel #10
0
    def _get_snls_from_resource(json, url, identifier) -> Dict[str, StructureNL]:

        snls = {}

        exceptions = set()

        def _sanitize_symbol(symbol):
            if symbol == "vacancy":
                symbol = DummySpecies("X_vacancy", oxidation_state=None)
            elif symbol == "X":
                symbol = DummySpecies("X", oxidation_state=None)
            return symbol

        def _get_comp(sp_dict):
            return {
                _sanitize_symbol(symbol): conc
                for symbol, conc in zip(sp_dict["chemical_symbols"], sp_dict["concentration"])
            }

        for data in json["data"]:

            # TODO: check the spec! and remove this try/except (are all providers following spec?)
            # e.g. can check data["type"] == "structures"

            try:
                # e.g. COD
                structure = Structure(
                    lattice=data["attributes"]["lattice_vectors"],
                    species=[_get_comp(d) for d in data["attributes"]["species"]],
                    coords=data["attributes"]["cartesian_site_positions"],
                    coords_are_cartesian=True,
                )
                # Grab any custom fields or non-mandatory fields if they were requested
                namespaced_data = {
                    k: v
                    for k, v in data["attributes"].items()
                    if k.startswith("_") or k not in {"lattice_vectors", "species", "cartesian_site_positions"}
                }

                # TODO: follow `references` to add reference information here
                snl = StructureNL(
                    structure,
                    authors={},
                    history=[{"name": identifier, "url": url, "description": {"id": data["id"]}}],
                    data={"_optimade": namespaced_data},
                )

                snls[data["id"]] = snl

            # TODO: bare exception, remove...
            except Exception:

                try:
                    # e.g. MP (all ordered, no vacancies)
                    structure = Structure(
                        lattice=data["attributes"]["lattice_vectors"],
                        species=data["attributes"]["species_at_sites"],
                        coords=data["attributes"]["cartesian_site_positions"],
                        coords_are_cartesian=True,
                    )
                    # Grab any custom fields or non-mandatory fields if they were requested
                    namespaced_data = {
                        k: v
                        for k, v in data["attributes"].items()
                        if k.startswith("_") or k not in {"lattice_vectors", "species", "cartesian_site_positions"}
                    }

                    # TODO: follow `references` to add reference information here
                    snl = StructureNL(
                        structure,
                        authors={},
                        history=[{"name": identifier, "url": url, "description": {"id": data["id"]}}],
                        data={"_optimade": namespaced_data},
                    )

                    snls[data["id"]] = snl

                except Exception as exc:
                    if str(exc) not in exceptions:
                        exceptions.add(str(exc))

        if exceptions:
            _logger.error(f'Failed to parse returned data for {url}: {", ".join(exceptions)}')

        return snls
Beispiel #11
0
        def submit_snl(n_clicks, structure, comments, url):

            if not n_clicks:
                raise PreventUpdate

            token = parse_token(url)
            if not token:
                raise PreventUpdate

            structure = self.from_data(structure)
            if type(structure) != Structure:
                message = (
                    f"Can only submit structures to Materials Project, "
                    f"not {type(structure)}"
                )
                return MessageContainer(message, kind="warning")

            if not MP_CLIENT_KEY:
                message = (
                    f"Submission to MPComplete is currently disabled, "
                    f"please check back soon or contact @mkhorton."
                )
                return MessageContainer(message, kind="warning")

            # check if structure already exists on MP

            with MPRester() as mpr:
                mpids = mpr.find_structure(structure)

            if mpids:
                message = (
                    f"Similar structures are already available on "
                    f"the Materials Project, see: {', '.join(mpids)}"
                )
                return MessageContainer(message, kind="warning")

            remarks = [
                f"Generated by Crystal Toolkit {ct_version} and "
                f"submitted with MPComplete"
            ]
            if comments:
                remarks.append(comments)

            contents = get_token_response(token)

            user_name = f"{contents['first_name']} {contents['last_name']}"
            user_email = contents["email"]
            user_api_key = contents["api_key"]

            snl = StructureNL(
                structure, [{"name": user_name, "email": user_email}], remarks=remarks
            )

            with MPRester(
                user_api_key, endpoint="https://www.materialsproject.org/rest/v1"
            ) as mpr:
                try:
                    submission_response = mpr.submit_snl(snl)
                except Exception as exc:
                    return MessageContainer(str(exc), kind="warning")

            header = f"Structure submission status: {submission_response[0]['status']}"
            message = submission_response[0]["details"]

            return MessageContainer(
                [MessageHeader(header), MessageBody(message)], kind="info"
            )
Beispiel #12
0
    def test_to_from_dict(self):
        # no complicated objects in the 'data' or 'nodes' field
        a = StructureNL(self.s, self.hulk, ['test_project'], self.pmg,
                        ['remark1'], {"_my_data": "string"},
                        [self.valid_node, self.valid_node2])
        b = StructureNL.from_dict(a.as_dict())
        self.assertEqual(a, b)
        # complicated objects in the 'data' and 'nodes' field
        complicated_node = {
            "name": "complicated node",
            "url": "www.complicatednodegoeshere.com",
            "description": {
                "structure": self.s2
            }
        }
        a = StructureNL(self.s, self.hulk, ['test_project'], self.pmg,
                        ['remark1'], {"_my_data": {
                            "structure": self.s2
                        }}, [complicated_node, self.valid_node])
        b = StructureNL.from_dict(a.as_dict())
        self.assertEqual(
            a, b, 'to/from dict is broken when object embedding is '
            'used! Apparently MontyEncoding is broken...')

        # Test molecule
        molnl = StructureNL(self.mol, self.hulk, references=self.pmg)
        b = StructureNL.from_dict(molnl.as_dict())
        self.assertEqual(molnl, b)
Beispiel #13
0
    filename = stoich + "_" + energy_order_prefix + "_" + unique_id
    filename += ".json"

    
    atoms = row_i.atoms
    struct = AseAtomsAdaptor().get_structure(atoms)

    extra_data = {
        "_MPContribs_Internal_ID": unique_id,
        }

    struct_NL = StructureNL(
        struct,
        authors,
        projects=None,
        references="",
        remarks=remarks,
        data=extra_data,
        # history=extra_data,
        created_at=date,
        )

    path_i = os.path.join("out_data", filename)
    with open(path_i,"w") as file:
        json.dump(
            struct_NL.as_dict(),
            file,
            indent=2,
            )
Beispiel #14
0
def parse_vasp_dirs(vaspdirs, tag, task_ids, snl_metas):  # noqa: C901
    process = multiprocessing.current_process()
    name = process.name
    chunk_idx = int(name.rsplit("-")[1]) - 1
    logger.info(f"{name} starting.")
    tags = [tag, SETTINGS.year_tags[-1]]
    ctx = click.get_current_context()
    spec_or_dbfile = ctx.parent.parent.params["spec_or_dbfile"]
    target = calcdb_from_mgrant(spec_or_dbfile)
    snl_collection = target.db.snls_user
    sbxn = list(filter(None, target.collection.distinct("sbxn")))
    logger.info(f"Using sandboxes {sbxn}.")
    no_dupe_check = ctx.parent.parent.params["no_dupe_check"]
    run = ctx.parent.parent.params["run"]
    projection = {"tags": 1, "task_id": 1}
    count = 0
    drone = VaspDrone(
        additional_fields={"tags": tags},
        store_volumetric_data=ctx.params["store_volumetric_data"],
    )

    for vaspdir in vaspdirs:
        logger.info(f"{name} VaspDir: {vaspdir}")
        launcher = get_subdir(vaspdir)
        query = {"dir_name": {"$regex": launcher}}
        docs = list(
            target.collection.find(query,
                                   projection).sort([("_id", -1)]).limit(1))

        if docs:
            if no_dupe_check:
                logger.warning(f"FORCING re-parse of {launcher}!")
            else:
                if run:
                    shutil.rmtree(vaspdir)
                    logger.warning(
                        f"{name} {launcher} already parsed -> removed.")
                else:
                    logger.warning(
                        f"{name} {launcher} already parsed -> would remove.")
                continue

        try:
            task_doc = drone.assimilate(vaspdir)
        except Exception as ex:
            logger.error(f"Failed to assimilate {vaspdir}: {ex}")
            continue

        task_doc["sbxn"] = sbxn
        manual_taskid = isinstance(task_ids, dict)
        snl_metas_avail = isinstance(snl_metas, dict)
        task_id = task_ids[launcher] if manual_taskid else task_ids[chunk_idx][
            count]
        task_doc["task_id"] = task_id
        logger.info(f"Using {task_id} for {launcher}.")

        if docs:
            # make sure that task gets the same tags as the previously parsed task
            # (run through set to implicitly remove duplicate tags)
            if docs[0]["tags"]:
                existing_tags = list(set(docs[0]["tags"]))
                task_doc["tags"] += existing_tags
                logger.info(f"Adding existing tags {existing_tags} to {tags}.")

        snl_dct = None
        if snl_metas_avail:
            snl_meta = snl_metas.get(launcher)
            if snl_meta:
                references = snl_meta.get("references")
                authors = snl_meta.get(
                    "authors",
                    ["Materials Project <*****@*****.**>"])
                kwargs = {"projects": [tag]}
                if references:
                    kwargs["references"] = references

                struct = Structure.from_dict(task_doc["input"]["structure"])
                snl = StructureNL(struct, authors, **kwargs)
                snl_dct = snl.as_dict()
                snl_dct.update(get_meta_from_structure(struct))
                snl_id = snl_meta["snl_id"]
                snl_dct["snl_id"] = snl_id
                logger.info(f"Created SNL object for {snl_id}.")

        if run:
            if task_doc["state"] == "successful":
                if docs and no_dupe_check:
                    target.collection.remove({"task_id": task_id})
                    logger.warning(
                        f"Removed previously parsed task {task_id}!")

                try:
                    target.insert_task(task_doc, use_gridfs=True)
                except DocumentTooLarge:
                    output = dotty(task_doc["calcs_reversed"][0]["output"])
                    pop_keys = [
                        "normalmode_eigenvecs",
                        "force_constants",
                        "outcar.onsite_density_matrices",
                    ]

                    for k in pop_keys:
                        if k not in output:
                            continue

                        logger.warning(f"{name} Remove {k} and retry ...")
                        output.pop(k)
                        try:
                            target.insert_task(task_doc, use_gridfs=True)
                            break
                        except DocumentTooLarge:
                            continue
                    else:
                        logger.warning(
                            f"{name} failed to reduce document size")
                        continue

                if target.collection.count(query):
                    if snl_dct:
                        result = snl_collection.insert_one(snl_dct)
                        logger.info(
                            f"SNL {result.inserted_id} inserted into {snl_collection.full_name}."
                        )

                    shutil.rmtree(vaspdir)
                    logger.info(
                        f"{name} Successfully parsed and removed {launcher}.")
                    count += 1
        else:
            count += 1

    return count
Beispiel #15
0
    def assimilate(self,
                   path,
                   dbhost='localhost',
                   dbport=27017,
                   dbname='ICSD',
                   collection_name='ICSD_files',
                   store_mongo=True):
        """
        Assimilate data in a directory path into a pymatgen object. Because of
        the quirky nature of Python"s multiprocessing, the object must support
        pymatgen's as_dict() for parallel processing.
        Args:
            path: directory path
        Returns:
            An assimilated object
        """
        if store_mongo:
            client = MongoClient(dbhost, dbport)
            db = client[dbname]
            col = db[collection_name]

        data = {}

        files = os.listdir(path)
        file_ID = path.split('/')[-1]
        print(file_ID)
        data['icsd_id'] = int(file_ID)

        #data['cifwarnings'] = []
        cif_path = os.path.join(path, file_ID + '.cif')

        # capture any warnings generated by parsing cif file
        with warnings.catch_warnings(record=True) as w:
            cif_parser = CifParser(cif_path)
            for warn in w:
                if 'cifwarnings' in data:
                    data['cifwarnings'].append(str(warn.message))
                else:
                    data['cifwarnings'] = [str(warn.message)]
                logger.warning('{}: {}'.format(file_ID, warn.message))

        cif_dict = cif_parser.as_dict()
        orig_id = list(cif_dict.keys())[0]
        easy_dict = cif_dict[orig_id]

        if '_chemical_name_mineral' in easy_dict:
            data['min_name'] = easy_dict['_chemical_name_mineral']
        if '_chemical_name_systematic' in easy_dict:
            data['chem_name'] = easy_dict['_chemical_name_systematic']
        if '_cell_measurement_pressure' in easy_dict:
            data['pressure'] = float(
                easy_dict['_cell_measurement_pressure']) / 1000
        else:
            data['pressure'] = .101325

        with warnings.catch_warnings(record=True) as w:
            try:
                struc = cif_parser.get_structures()[0]
            except ValueError as err:
                # if cif parsing raises error, write icsd_id to Error_Record and do NOT add structure to mongo database
                logger.error(
                    file_ID + ': {}'.format(err) +
                    "\nDid not insert structure into Mongo Collection")
                with open('Error_Record', 'a') as err_rec:
                    err_rec.write(str(file_ID) + ': {}\n'.format(err))
                    err_rec.close()
            else:
                authors = 'Donny Winston<*****@*****.**>, Joseph Palakapilly<*****@*****.**>'
                references = self.bibtex_from_cif(cif_path)
                history = [{
                    'name': 'ICSD',
                    'url': 'https://icsd.fiz-karlsruhe.de/',
                    'description': {
                        'icsd_id': file_ID
                    }
                }]
                snl = StructureNL(struc,
                                  authors=authors,
                                  references=references,
                                  history=history)
                data['snl'] = snl.as_dict()

                meta = get_meta_from_structure(struc)
                data['nsites'] = meta['nsites']
                data['elements'] = meta['elements']
                data['nelements'] = meta['nelements']
                data['formula'] = meta['formula']
                data['formula_reduced'] = meta['formula_pretty']
                data['formula_reduced_abc'] = meta['formula_reduced_abc']
                data['formula_anonymous'] = meta['formula_anonymous']
                data['chemsys'] = meta['chemsys']
                data['is_valid'] = meta['is_valid']
                data['is_ordered'] = meta['is_ordered']

            #unfortunately any warnings are logged after any errors. Not too big of an issue
            for warn in w:
                if 'cifwarnings' in data:
                    data['cifwarnings'].append(str(warn.message))
                else:
                    data['cifwarnings'] = [str(warn.message)]
                logger.warning('{}: {}'.format(file_ID, warn.message))

        if 'snl' in data:
            if store_mongo:
                col.update_one({'icsd_id': int(file_ID)}, {'$set': data},
                               upsert=True)

        return data
Beispiel #16
0
 def test_remarks(self):
     a = StructureNL(self.s, self.hulk, remarks="string format")
     self.assertEqual(a.remarks[0], "string format")
     self.assertRaises(ValueError, StructureNL, self.s, self.hulk,
                       remarks=self.remark_fail)