Ejemplos de parse_ase en Python

Lenguaje de programación: Python

Namespace/Package Name: parsers.ase_parser

Método / Función: parse_ase

Ejemplos en hotexamples.com: 19

Python parse_ase - 19 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de parsers.ase_parser.parse_ase extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://hdl.handle.net/11256/671",
            "acl": ["public"],
            "mdf_source_name":
            "trinkle_elastic_fe_bcc",
            "mdf-publish.publication.collection":
            "Elastic Fe BCC",
            "mdf_data_class":
            "vasp",
            "cite_as": [
                "M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Comp. Mat. Sci. 126, 503 (2017).M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Data in Brief 10, 147 (2017)."
            ],
            "license":
            "http://creativecommons.org/publicdomain/zero/1.0/",
            "dc.title":
            "Ab initio calculations of the lattice parameter and elastic stiffness coefficients of bcc Fe with solutes",
            "dc.creator":
            "University of Illinois, General Motors",
            "dc.identifier":
            "http://hdl.handle.net/11256/671",
            "dc.contributor.author":
            ["M. R. Fellinger", "L. G. Hector Jr.", "D. R. Trinkle"],
            #            "dc.subject": ,
            #            "dc.description": ,
            "dc.relatedidentifier": [
                "http://dx.doi.org/10.1016/j.commatsci.2016.09.040",
                "http://dx.doi.org/10.1016/j.dib.2016.11.092"
            ],
            "dc.year":
            2017
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            "mdf-publish.publication.collection": "Elastic Fe BCC",
            "mdf-base.material_composition":
            data["frames"][0]["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title":
            "Elastic BCC - " + data["frames"][0]["chemical_formula"],
            #            "dc.creator": ,
            "dc.identifier": uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "outcar": uri
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Ejemplo n.º 2

Mostrar archivo

Archivo: strain_effects_oxygen_converter.py Proyecto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://hdl.handle.net/11256/701",
            "acl": ["public"],
            "mdf_source_name": "strain_effects_oxygen",
            "mdf-publish.publication.collection": "Strain Effects on Oxygen Migration",
            "mdf_data_class": "vasp",

            "cite_as": ["Mayeshiba, T. & Morgan, D. Strain effects on oxygen migration in perovskites. Physical chemistry chemical physics : PCCP 17, 2715-2721, doi:10.1039/c4cp05554c (2015).", "Mayeshiba, T. & Morgan, D. Correction: Strain effects on oxygen migration in perovskites. Physical chemistry chemical physics : PCCP, doi:10.1039/c6cp90050j (2016)."],
#            "license": ,

            "dc.title": "Strain effects on oxygen migration in perovskites: La[Sc, Ti, V, Cr, Mn, Fe, Co, Ni, Ga]O3",
            "dc.creator": "University of Wisconsin-Madison",
            "dc.identifier": "http://hdl.handle.net/11256/701",
            "dc.contributor.author": ["Mayeshiba, Tam", "Morgan, Dane"],
#            "dc.subject": ,
#            "dc.description": ,
            "dc.relatedidentifier": ["https://dx.doi.org/10.1039/c4cp05554c", "https://dx.doi.org/10.1039/c6cp90050j"],
            "dc.year": 2016
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "^OUTCAR$"), desc="Processing files", disable= not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file["no_root_path"] + "/" + data_file["filename"]
        try:
            record_metadata = {
                "globus_subject": uri,
                "acl": ["public"],
    #            "mdf-publish.publication.collection": ,
                "mdf-base.material_composition": data["frames"][0]["chemical_formula"],

    #            "cite_as": ,
    #            "license": ,

                "dc.title": "Oxygen Migration - " + data["frames"][0]["chemical_formula"],
    #            "dc.creator": ,
                "dc.identifier": uri,
    #            "dc.contributor.author": ,
    #            "dc.subject": ,
    #            "dc.description": ,
    #            "dc.relatedidentifier": ,
    #            "dc.year": ,

                "data": {
    #                "raw": ,
                    "files": {"outcar": uri}
                    }
                }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":", result.get("invalid_metadata", ""))
        except Exception:
            if verbose:
                print("Error on:", os.path.join(data_file["path"], data_file["filename"]))

    if verbose:
        print("Finished converting")

Ejemplo n.º 3

Mostrar archivo

Archivo: qm9_converter.py Proyecto: maxhutch/forge

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "http://quantum-machine.org/datasets/#qm9",
        "acl": ["public"],
        "mdf_source_name":
        "qm9",
        "mdf-publish.publication.collection":
        "Quantum Machine",
        "mdf_data_class":
        "xyz",
        "cite_as": [
            "L. Ruddigkeit, R. van Deursen, L. C. Blum, J.-L. Reymond, Enumeration of 166 billion organic small molecules in the chemical universe database GDB-17, J. Chem. Inf. Model. 52, 2864–2875, 2012.",
            "R. Ramakrishnan, P. O. Dral, M. Rupp, O. A. von Lilienfeld, Quantum chemistry structures and properties of 134 kilo molecules, Scientific Data 1, 140022, 2014."
        ],
        "dc.title":
        "Quantum Machine - QM9",
        "dc.creator":
        "Quantum Machine",
        "dc.identifier":
        "http://quantum-machine.org/datasets/#qm9",
        "dc.contributor.author": [
            "L. Ruddigkeit", "R. van Deursen", "L. C. Blum", "J.-L. Reymond",
            "R. Ramakrishnan", "P. O. Dral", "M. Rupp", "O. A. von Lilienfeld"
        ],
        "dc.subject": ["gdb-17"],
        "dc.description":
        ("Computational de novo design of new drugs and materials requires rigorous and unbiased exploration of chemical compound space. "
         "However, large uncharted territories persist due to its size scaling combinatorially with molecular size. We report computed geometric, "
         "energetic, electronic, and thermodynamic properties for 134k stable small organic molecules made up of CHONF. These molecules correspond "
         "to the subset of all 133,885 species with up to nine heavy atoms (CONF) out of the GDB-17 chemical universe of 166 billion organic "
         "molecules. We report geometries minimal in energy, corresponding harmonic frequencies, dipole moments, polarizabilities, along with "
         "energies, enthalpies, and free energies of atomization. All properties were calculated at the B3LYP/6-31G(2df,p) level of quantum "
         "chemistry. Furthermore, for the predominant stoichiometry, C7H10O2, there are 6,095 constitutional isomers among the 134k molecules. We "
         "report energies, enthalpies, and free energies of atomization at the more accurate G4MP2 level of theory for all of them. As such, this "
         "data set provides quantum chemical properties for a relevant, consistent, and comprehensive chemical space of small organic molecules. "
         "This database may serve the benchmarking of existing methods, development of new methods, such as hybrid quantum mechanics/machine "
         "learning, and systematic identification of structure-property relationships."
         ),
        "dc.relatedidentifier": ["https://doi.org/10.6084/m9.figshare.978904"],
        "dc.year":
        2014
    }

    # Make a Validator to help write the feedstock
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for file_data in tqdm(find_files(input_path, "xyz"),
                          desc="Processing QM9",
                          disable=not verbose):
        file_path = os.path.join(file_data["path"], file_data["filename"])
        record = parse_ase(file_path, "xyz")
        record_metadata = {
            "globus_subject":
            "https://data.materialsdatafacility.org/collections/test/qm9/" +
            file_data["no_root_path"] + "/" + file_data["filename"],
            "acl": ["public"],
            "mdf-publish.publication.collection":
            "Quantum Machine",
            "mdf-base.material_composition":
            record.get("chemical_formula", ""),
            "dc.title":
            "QM9 - " + record.get("chemical_formula", "") + " - " +
            file_data["filename"],
            "dc.creator":
            "Quantum Machine",
            "dc.identifier":
            "http://quantum-machine.org/datasets/#qm9",
            #"dc.contributor.author": ,               # OPT list of strings: Author(s) of record (if different from dataset)
            #"dc.subject": ,                          # OPT list of strings: Keywords about record
            #"dc.description": ,                      # OPT string: Description of record
            #"dc.relatedidentifier": ,                # OPT list of strings: Link(s) to related materials (if different from dataset)
            #"dc.year": ,                             # OPT integer: Year of record creation (if different from dataset)
            "data": {
                #"raw": ,                             # RCM string: Original data record text, if feasible
                "files": {
                    "xyz":
                    "https://data.materialsdatafacility.org/collections/test/qm9/"
                    + file_data["no_root_path"] + "/" + file_data["filename"]
                },
                "quantum chemistry level": {"B3LYP/6-31G(2df,p)"}
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_data", ""))

    if verbose:
        print("Finished converting")

Ejemplo n.º 4

Mostrar archivo

Archivo: autovasp_converter.py Proyecto: tskluzac/mdf-harvesters

def convert(input_path, metadata, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "^OUTCAR$"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject":
            uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            "mdf-base.material_composition":
            data["frames"][0]["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title":
            dataset_metadata["dc.title"] + " - " +
            data["frames"][0]["chemical_formula"],
            #            "dc.creator": ,
            "dc.identifier":
            uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "outcar": uri
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Ejemplo n.º 5

Mostrar archivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://qmml.org/datasets.html" + "#gdb7-13",
            "acl": ["public"],
            "mdf_source_name":
            "gdb7-13",
            "mdf-publish.publication.collection":
            "gdb7-13",
            "mdf_data_class":
            "xyz",
            "cite_as": [
                "Gr\'egoire Montavon, Matthias Rupp, Vivekanand Gobre, Alvaro Vazquez-Mayagoitia, Katja Hansen, Alexandre Tkatchenko, Klaus-Robert M\"uller, O. Anatole von Lilienfeld: Machine learning of molecular electronic properties in chemical compound space, New Journal of Physics, 15(9): 095003, IOP Publishing, 2013.DOI: 10.1088/1367-2630/15/9/095003"
            ],
            "license":
            "https://creativecommons.org/licenses/by/3.0/",
            "mdf_version":
            "0.1.0",
            "dc.title":
            "Machine learning of molecular electronic properties in chemical compound space",
            "dc.creator":
            "Argonne National Laboratory, Einstein Foundation, National Research Foundation of Korea",
            "dc.identifier":
            "http://qmml.org/datasets.html" + "#gdb7-13",
            "dc.contributor.author": [
                "Grégoire Montavon", "Matthias Rupp", "Vivekanand Gobre",
                "Alvaro Vazquez-Mayagoitia", "Katja Hansen",
                "Alexandre Tkatchenko", "Klaus-Robert Müller",
                "O. Anatole von Lilienfeld"
            ],
            #            "dc.subject": ,
            "dc.description":
            "7k small organic molecules, in their ground state, 14 combinations of properties and theory levels. 7,211 small organic molecules composed of H, C, N, O, S, Cl, saturated with H, and up to 7 non-H atoms. Molecules relaxed using DFT with PBE functional. Properties are atomization energy (DFT/PBE0), averaged polarizability (DFT/PBE0, SCS), H**O and LUMO eigenvalues (GW, DFT/PBE0, ZINDO), and, ionization potential, electron affinity, first excitation energy, frequency of maximal absorption (all ZINDO).",  # RCM string: Description of dataset contents
            "dc.relatedidentifier":
            ["https://doi.org/10.1088/1367-2630/15/9/095003"],
            "dc.year":
            2013
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    #dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        uri = "https://data.materialsdatafacility.org/collections/" + "gdb7-13/gdb7-13_data/" + data_file[
            "filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            "mdf-base.material_composition": record["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title": "gdb7-13 " + data_file["filename"],
            #            "dc.creator": ,
            #            "dc.identifier": ,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "xyz": uri
                },
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    # Alternatively, if the only way you can process your data is in one large list, you can pass the list to the Validator
    # You still must add the required metadata to your records
    # It is recommended to use the previous method if possible
    # result = dataset_validator.write_dataset(your_records_with_metadata)
    #if result["success"] is not True:
    #print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Ejemplo n.º 6

Mostrar archivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            #            "globus_subject": ,                      # REQ string: Unique value (should be URI if possible)
            #            "acl": ,                                 # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
            #            "mdf_source_name": ,                     # REQ string: Unique name for dataset
            #            "mdf-publish.publication.collection": ,  # RCM string: Collection the dataset belongs to

            #            "cite_as": ,                             # REQ list of strings: Complete citation(s) for this dataset.
            #            "license": ,                             # RCM string: License to use the dataset (preferrably a link to the actual license).

            #            "dc.title": ,                            # REQ string: Title of dataset
            #            "dc.creator": ,                          # REQ string: Owner of dataset
            #            "dc.identifier": ,                       # REQ string: Link to dataset (dataset DOI if available)
            #            "dc.contributor.author": ,               # RCM list of strings: Author(s) of dataset
            #            "dc.subject": ,                          # RCM list of strings: Keywords about dataset
            #            "dc.description": ,                      # RCM string: Description of dataset contents
            #            "dc.relatedidentifier": ,                # RCM list of strings: Link(s) to related materials (such as an article)
            #            "dc.year":                               # RCM integer: Year of dataset creation
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for file_data in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            record = parse_ase(
                os.path.join(file_data["path"], file_data["filename"]), "vasp")
            if not record:
                raise ValueError("No data returned")
        except Exception as e:
            continue
        record_metadata = {
            "globus_subject":
            "https://data.materialsdatafacility.org/collections/" +
            file_data["no_root_path"] + "/" + file_data[
                "filename"],  # REQ string: Unique value (should be URI to record if possible)
            "acl": [
                "public"
            ],  # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
            "mdf-publish.publication.collection":
            "Ti-O MEAM Model",  # RCM string: Collection the record belongs to
            #            "mdf_data_class": "vasp",                      # RCM string: Type of data in record
            "mdf-base.material_composition":
            record["frames"][0]
            ["chemical_formula"],  # RCM string: Chemical composition of material in record

            #            "cite_as": ,                             # OPT list of strings: Complete citation(s) for this record (if different from dataset)
            #            "license": ,                             # OPT string: License to use the record (if different from dataset) (preferrably a link to the actual license).
            "dc.title":
            "Ti-O MEAM Model - " + record["frames"][0]
            ["chemical_formula"],  # REQ string: Title of record
            #            "dc.creator": ,                          # OPT string: Owner of record (if different from dataset)
            "dc.identifier":
            "https://data.materialsdatafacility.org/collections/" +
            file_data["no_root_path"] + "/" + file_data[
                "filename"],  # RCM string: Link to record (record webpage, if available)
            #            "dc.contributor.author": ,               # OPT list of strings: Author(s) of record (if different from dataset)
            #            "dc.subject": ,                          # OPT list of strings: Keywords about record
            #            "dc.description": ,                      # OPT string: Description of record
            #            "dc.relatedidentifier": ,                # OPT list of strings: Link(s) to related materials (if different from dataset)
            #            "dc.year": ,                             # OPT integer: Year of record creation (if different from dataset)
            "data": {  # REQ dictionary: Other record data (described below)
                #                "raw": ,                             # RCM string: Original data record text, if feasible
                "files": {
                    "outcar":
                    "https://data.materialsdatafacility.org/collections/" +
                    file_data["no_root_path"] + "/" + file_data["filename"]
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Ejemplo n.º 7

Mostrar archivo

Archivo: sluschi_converter.py Proyecto: tskluzac/mdf-harvesters

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "http://blogs.brown.edu/qhong/?page_id=102",
        "acl": ["public"],
        "mdf_source_name":
        "sluschi",
        "mdf-publish.publication.collection":
        "SLUSCHI",
        "mdf_data_class":
        "vasp",
        "cite_as": [
            "Qi-Jun Hong, Axel van de Walle, A user guide for SLUSCHI: Solid and Liquid in Ultra Small Coexistence with Hovering Interfaces, Calphad, Volume 52, March 2016, Pages 88-97, ISSN 0364-5916, http://doi.org/10.1016/j.calphad.2015.12.003."
        ],
        "dc.title":
        "Solid and Liquid in Ultra Small Coexistence with Hovering Interfaces",
        "dc.creator":
        "Brown University",
        "dc.identifier":
        "http://doi.org/10.1016/j.calphad.2015.12.003",
        "dc.contributor.author": ["Qi-Jun Hong", "Axel van de Walle"],
        "dc.subject": [
            "Melting temperature calculation", "Density functional theory",
            "Automated code"
        ],
        "dc.description":
        "Although various approaches for melting point calculations from first principles have been proposed and employed for years, their practical implementation has hitherto remained a complex and time-consuming process. The SLUSCHI code (Solid and Liquid in Ultra Small Coexistence with Hovering Interfaces) drastically simplifies this procedure into an automated package, by implementing the recently-developed small-size coexistence method and putting together a series of steps that lead to final melting point evaluation. Based on density functional theory, SLUSCHI employs Born–Oppenheimer molecular dynamics techniques under the isobaric–isothermal (NPT) ensemble, with interface to the first-principles code VASP.",
        "dc.relatedidentifier": ["http://blogs.brown.edu/qhong/?page_id=102"],
        "dc.year":
        2015
    }

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(root=input_path, file_pattern="^OUTCAR$"),
                         desc="Processing data files",
                         disable=not verbose):
        file_data = parse_ase(file_path=os.path.join(dir_data["path"],
                                                     dir_data["filename"]),
                              data_format="vasp",
                              verbose=False)

        # If no data, skip record
        if not file_data or not file_data["frames"]:
            continue

        uri = "globus:sluschi/" + dir_data["no_root_path"] + "/" + dir_data[
            "filename"]
        record_metadata = {
            "globus_subject":
            uri,
            "acl": ["public"],
            "mdf-publish.publication.collection":
            "SLUSCHI",
            "mdf-base.material_composition":
            file_data["frames"][0]["chemical_formula"],
            "dc.title":
            "SLUSCHI - " + file_data["frames"][0]["chemical_formula"],
            #            "dc.creator": ,
            #            "dc.identifier": ,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": str(file_data),
                "files": {
                    "outcar": uri
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Ejemplo n.º 8

Mostrar archivo

Archivo: monoborides_dft_converter.py Proyecto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://dx.doi.org/doi:10.18126/M24S3J",
            "acl": ["public"],
            "mdf_source_name": "monoborides_dft",
            "mdf-publish.publication.collection": "Monoborides DFT",
            "mdf_data_class": "vasp",

            "cite_as": ["Kim, Hyojung; Trinkle, Dallas R., \"Mechanical Properties and Phase Stability of Monoborides using Density Functional Theory Calculations,\" 2017, http://dx.doi.org/doi:10.18126/M24S3J"],
#            "license": "",

            "dc.title": "Mechanical Properties and Phase Stability of Monoborides using Density Functional Theory Calculations",
            "dc.creator": "University of Illinois at Urbana-Champaign",
            "dc.identifier": "http://dx.doi.org/doi:10.18126/M24S3J",
            "dc.contributor.author": ["Kim, Hyojung", "Trinkle, Dallas R."],
            "dc.subject": ["ab-initio", "special quasirandom structure", "DFT", "polycrystalline mechanical properties", "stacking fault energy", "solubility limit", "monoboride", "B27 structure", "Bf structure", "Vegard's law"],
            "dc.description": "This data demonstrates the Ti-monoborides with improved polycrystalline elastic properties such as Young's modulus and Pugh's ratio, and stacking fault energies. The lattice parameters, total energies and elastic constants of monoborides are computed using density functional theory",
#            "dc.relatedidentifier": [],
            "dc.year": 2017
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable=not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/published/publication_232/" + data_file["no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
#            "mdf-publish.publication.collection": ,
            "mdf-base.material_composition": data["frames"][0]["chemical_formula"],

#            "cite_as": ,
#            "license": ,

            "dc.title": "Monoborides DFT - " + data["frames"][0]["chemical_formula"],
#            "dc.creator": ,
            "dc.identifier": uri,
#            "dc.contributor.author": ,
#            "dc.subject": ,
#            "dc.description": ,
#            "dc.relatedidentifier": ,
#            "dc.year": ,

            "data": {
#                "raw": ,
                "files": {"outcar": uri}
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":", result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Ejemplo n.º 9

Mostrar archivo

def convert(input_path, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    dataset_metadata = {
        "globus_subject": "http://hdl.handle.net/11256/782",                      # REQ string: Unique value (should be URI if possible)
        "acl": ["public"],                                 # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
        "mdf_source_name": "ti_o_fitting_db",                     # REQ string: Unique name for dataset
        "mdf-publish.publication.collection": "Ti-O Fitting Database",  # RCM string: Collection the dataset belongs to
        "mdf_data_class": "vasp",                      # RCM string: Type of data in record

        "cite_as": ["Trinkle, Dallas R.; Zhang, Pinchao Fitting database entries for a modified embedded atom method potential for interstitial oxygen in titanium (2016-07-25) http://hdl.handle.net/11256/782"],
        "license": "http://creativecommons.org/licenses/by/3.0/us/",                             # RCM string: License to use the dataset (preferrably a link to the actual license).

        "dc.title": "Fitting database entries for a modified embedded atom method potential for interstitial oxygen in titanium",                            # REQ string: Title of dataset
        "dc.creator": "University of Illinois, Urbana-Champaign",                          # REQ string: Owner of dataset
        "dc.identifier": "http://hdl.handle.net/11256/782",                       # REQ string: Link to dataset (dataset DOI if available)
        "dc.contributor.author": ["Trinkle, Dallas R", "Zhang, Pinchao"],               # RCM list of strings: Author(s) of dataset
#        "dc.subject": ,                          # RCM list of strings: Keywords about dataset
#        "dc.description": ,                      # RCM string: Description of dataset contents
#        "dc.relatedidentifier": ,                # RCM list of strings: Link(s) to related materials (such as an article)
        "dc.year": 2016                              # RCM integer: Year of dataset creation
        }


    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable= not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file["no_root_path"] + "/" + data_file["filename"]

        try:
            record_metadata = {
                "globus_subject": uri,                      # REQ string: Unique value (should be URI to record if possible)
                "acl": ["public"],                                 # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
                "mdf-publish.publication.collection": "Ti-O Fitting Database",  # RCM string: Collection the record belongs to
                "mdf-base.material_composition": data["frames"][0]["chemical_formula"],       # RCM string: Chemical composition of material in record

    #            "cite_as": ,                             # OPT list of strings: Complete citation(s) for this record (if different from dataset)
    #            "license": ,                             # OPT string: License to use the record (if different from dataset) (preferrably a link to the actual license).

                "dc.title": "Ti-O Fitting Database - " + data["frames"][0]["chemical_formula"],                            # REQ string: Title of record
    #            "dc.creator": ,                          # OPT string: Owner of record (if different from dataset)
                 "dc.identifier": uri,                       # RCM string: Link to record (record webpage, if available)
    #            "dc.contributor.author": ,               # OPT list of strings: Author(s) of record (if different from dataset)
    #            "dc.subject": ,                          # OPT list of strings: Keywords about record
    #            "dc.description": ,                      # OPT string: Description of record
    #            "dc.relatedidentifier": ,                # OPT list of strings: Link(s) to related materials (if different from dataset)
    #            "dc.year": ,                             # OPT integer: Year of record creation (if different from dataset)

                "data": {                                # REQ dictionary: Other record data (described below)
    #                "raw": ,                             # RCM string: Original data record text, if feasible
                    "files": {"outcar": uri}                            # RCM dictionary: {file_type : uri_to_file} pairs, data files (Example: {"cif" : "https://example.org/cifs/data_file.cif"})
                    }
                }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":", result.get("invalid_metadata", ""))
        except Exception:
            print("Error on:", data_file["path"] + "/" + data_file["filename"])


    # TODO: Save your converter as [mdf_source_name]_converter.py
    # You're done!
    if verbose:
        print("Finished converting")

Ejemplo n.º 10

Mostrar archivo

Archivo: oxygen_interstital_deformation_converter.py Proyecto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://hdl.handle.net/11256/272",
            "acl": ["public"],
            "mdf_source_name": "oxygen_interstitials_deformation",
            "mdf-publish.publication.collection": "Oxygen Interstitials and Deformation Twins in alpha-Titanium",
            "mdf_data_class": "vasp",

            "cite_as": ["Interaction Between Oxygen Interstitials and Deformation Twins in alpha-Titanium, Acta Materialia v. 105 (2016), pp. 44 - 51 http://dx.doi.org/10.1016/j.actamat.2015.12.019"],
            "license": "http://creativecommons.org/licenses/by/3.0/us/",

            "dc.title": "Interaction Between Oxygen Interstitials and Deformation Twins in alpha-Titanium",
            "dc.creator": "University of Maryland",
            "dc.identifier": "http://hdl.handle.net/11256/272",
            "dc.contributor.author": ["Joost, William J.", "Ankem, Sreeramamurthy", "Kuklja, Maija M."],
#            "dc.subject": ,
#            "dc.description": ,
            "dc.relatedidentifier": ["http://dx.doi.org/10.1016/j.actamat.2015.12.019"],
            "dc.year": 2016
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable= not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file["no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            "mdf-publish.publication.collection": "Oxygen Interstitials and Deformation Twins in alpha-Titanium",
            "mdf-base.material_composition": data["frames"][0]["chemical_formula"],

#            "cite_as": ,
#            "license": ,

            "dc.title": "Oxygen Interstitials and Deformation Twins - " + data["frames"][0]["chemical_formula"],
#            "dc.creator": ,
            "dc.identifier": uri,
#            "dc.contributor.author": ,
#            "dc.subject": ,
#            "dc.description": ,
#            "dc.relatedidentifier": ,
#            "dc.year": ,

            "data": {
#                "raw": ,
                "files": {"outcar": uri}
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":", result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Ejemplo n.º 11

Mostrar archivo

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject": "http://khazana.uconn.edu/module_search/search.php?m=2",
        "acl": ["public"],
        "mdf_source_name": "khazana_vasp",
        "mdf-publish.publication.collection": "Khazana",
        "mdf_data_class": "vasp",

        "cite_as": ["http://khazana.uconn.edu/module_search/search.php?m=2"],
        "dc.title": "Khazana (VASP)",
        "dc.creator": "University of Connecticut",
        "dc.identifier": "http://khazana.uconn.edu",
#        "dc.contributor.author": ,
        "dc.subject": ["DFT", "VASP"]
#        "dc.description": ,
#        "dc.relatedidentifier": ,
#        "dc.year": 
        }


    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(root=input_path, file_pattern="^OUTCAR"), desc="Processing data files", disable= not verbose):
        file_data = parse_ase(file_path=os.path.join(dir_data["path"], dir_data["filename"]), data_format="vasp", verbose=False)

        uri = "https://data.materialsdatafacility.org/collections/khazana/OUTCARS/" + dir_data["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            "mdf-publish.publication.collection": "Khazana",
            "mdf-base.material_composition": file_data["frames"][0]["chemical_formula"],

            "dc.title": "Khazana VASP - " + file_data["frames"][0]["chemical_formula"],
#            "dc.creator": ,
            "dc.identifier": uri,
#            "dc.contributor.author": ,
#            "dc.subject": ,
#            "dc.description": ,
#            "dc.relatedidentifier": ,
#            "dc.year": ,

            "data": {
#                "raw": str(file_data),
                "files": {"outcar": uri}
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":", result.get("invalid_metadata"))

    if verbose:
        print("Finished converting")

Ejemplo n.º 12

Mostrar archivo

Archivo: cp_complexes_converter.py Proyecto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # TODO: Make sure the metadata is present in some form.
    # Fields can be:
    #    REQ (Required, must be present)
    #    RCM (Recommended, should be present if possible)
    #    OPT (Optional, can be present if useful)
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "https://figshare.com/articles/Synthesis_Characterization_and_Some_Properties_of_Cp_W_NO_H_sup_3_sup_allyl_Complexes/2158483",  # REQ string: Unique value (should be URI if possible)
            "acl": [
                "public"
            ],  # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
            "mdf_source_name":
            "cp_complexes",  # REQ string: Unique name for dataset
            "mdf-publish.publication.collection":
            "Cp*W(NO)(H)(η3‑allyl) Complexes",  # RCM string: Collection the dataset belongs to
            "mdf_data_class":
            "CIF",  # RCM string: Type of data in all records in the dataset (do not provide for multi-type datasets)
            "cite_as": [
                "Baillie, Rhett A.; Holmes, Aaron S.; Lefèvre, Guillaume P.; Patrick, Brian O.; Shree, Monica V.; Wakeham, Russell J.; Legzdins, Peter; Rosenfeld, Devon C. (2015): Synthesis, Characterization, and Some Properties of Cp*W(NO)(H)(η3‑allyl) Complexes. ACS Publications. https://doi.org/10.1021/acs.inorgchem.5b00747.s002"
            ],  # REQ list of strings: Complete citation(s) for this dataset.
            "license":
            "https://creativecommons.org/licenses/by-nc/4.0/",  # RCM string: License to use the dataset (preferrably a link to the actual license).
            "mdf_version":
            "0.1.0",  # REQ string: The metadata version in use (see VERSION above).
            "dc.title":
            "Synthesis, Characterization, and Some Properties of Cp*W(NO)(H)(η3‑allyl) Complexes",  # REQ string: Title of dataset
            "dc.creator":
            "The University of British Columbia, The Dow Chemical Company",  # REQ string: Owner of dataset
            "dc.identifier":
            "https://figshare.com/articles/Synthesis_Characterization_and_Some_Properties_of_Cp_W_NO_H_sup_3_sup_allyl_Complexes/2158483",  # REQ string: Link to dataset (dataset DOI if available)
            "dc.contributor.author": [
                "Baillie, Rhett A.", "Holmes, Aaron S.",
                "Lefèvre, Guillaume P.", "Patrick, Brian O.",
                "Shree, Monica V.", "Wakeham, Russell J.", "Legzdins, Peter",
                "Rosenfeld, Devon C."
            ],  # RCM list of strings: Author(s) of dataset
            "dc.subject": [
                "THF", "DFT", "18 e PMe 3 adducts", "complex",
                "coordination isomers", "magnesium allyl reagent"
            ],  # RCM list of strings: Keywords about dataset
            "dc.description":
            "Sequential treatment at low temperatures of Cp*W(NO)Cl2 in THF with 1 equiv of a binary magnesium allyl reagent, followed by an excess of LiBH4, affords three new Cp*W(NO)(H)(η3-allyl) complexes, namely, Cp*W(NO)(H)(η3-CH2CHCMe2) (1), Cp*W(NO)(H)(η3-CH2CHCHPh) (2), and Cp*W(NO)(H)(η3-CH2CHCHMe) (3).",  # RCM string: Description of dataset contents
            "dc.relatedidentifier": [
                "https://doi.org/10.1021/acs.inorgchem.5b00747"
            ],  # RCM list of strings: Link(s) to related materials (such as an article)
            "dc.year":
            2015  # RCM integer: Year of dataset creation
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
#    dataset_validator = Validator(dataset_metadata, strict=False)
# You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    # TODO: Write the code to convert your dataset's records into JSON-serializable Python dictionaries
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype
    for file_data in find_files(input_path, ".cif"):
        record = parse_ase(os.path.join(file_data["path"],
                                        file_data["filename"]),
                           data_format="cif")

        # TODO: Fill in these dictionary fields for each record
        # Fields can be:
        #    REQ (Required, must be present)
        #    RCM (Recommended, should be present if possible)
        #    OPT (Optional, can be present if useful)
        record_metadata = {
            "globus_subject":
            "https://figshare.com/articles/Synthesis_Characterization_and_Some_Properties_of_Cp_W_NO_H_sup_3_sup_allyl_Complexes/2158483#"
            + record[
                "chemical_formula"],  # REQ string: Unique value (should be URI to record if possible)
            "acl": [
                "public"
            ],  # REQ list of strings: UUID(s) of users/groups allowed to access data, or ["public"]
            #            "mdf-publish.publication.collection": ,  # OPT string: Collection the record belongs to (if different from dataset)
            #            "mdf_data_class": ,                      # OPT string: Type of data in record (if not set in dataset metadata)
            "mdf-base.material_composition":
            record[
                "chemical_formula"],  # RCM string: Chemical composition of material in record

            #            "cite_as": ,                             # OPT list of strings: Complete citation(s) for this record (if different from dataset)
            #            "license": ,                             # OPT string: License to use the record (if different from dataset) (preferrably a link to the actual license).
            "dc.title":
            "Cp Complexes - " +
            record["chemical_formula"],  # REQ string: Title of record
            #            "dc.creator": ,                          # OPT string: Owner of record (if different from dataset)
            #            "dc.identifier": ,                       # RCM string: Link to record (record webpage, if available)
            #            "dc.contributor.author": ,               # OPT list of strings: Author(s) of record (if different from dataset)
            #            "dc.subject": ,                          # OPT list of strings: Keywords about record
            #            "dc.description": ,                      # OPT string: Description of record
            #            "dc.relatedidentifier": ,                # OPT list of strings: Link(s) to related materials (if different from dataset)
            #            "dc.year": ,                             # OPT integer: Year of record creation (if different from dataset)

            #            "data": {                                # RCM dictionary: Other record data (described below)
            #                "raw": json.dumps(record),                             # RCM string: Original data record text, if feasible
            #                "files": ,                           # RCM dictionary: {file_type : uri_to_file} pairs, data files (Example: {"cif" : "https://example.org/cifs/data_file.cif"})

            # other                              # RCM any JSON-valid type: Any other data fields you would like to include go in the "data" dictionary. Keys will be prepended with 'mdf_source_name:'
            #                }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    # Alternatively, if the only way you can process your data is in one large list, you can pass the list to the Validator
    # You still must add the required metadata to your records
    # It is recommended to use the previous method if possible
    # result = dataset_validator.write_dataset(your_records_with_metadata)
    #if result["success"] is not True:
    #print("Error:", result["message"])

    # TODO: Save your converter as [mdf_source_name]_converter.py
    # You're done!
    if verbose:
        print("Finished converting")

Ejemplo n.º 13

Mostrar archivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://qmml.org/datasets.html#bfcc-13",
            "acl": ["public"],
            "mdf_source_name":
            "bfcc13",
            "mdf-publish.publication.collection":
            "bfcc13",
            "mdf_data_class":
            "vasp",
            "cite_as": [
                "Lance J. Nelson, Vidvuds Ozoliņš, C. Shane Reese, Fei Zhou, Gus L.W. Hart: Cluster expansion made easy with Bayesian compressive sensing, Physical Review B 88(15): 155105, 2013."
            ],
            #            "license": ,
            "mdf_version":
            "0.1.0",
            "dc.title":
            "Cluster expansion made easy with Bayesian compressive sensing",
            "dc.creator":
            "Brigham Young University, University of California Los Angeles, Lawrence Livermore National Laboratory",
            "dc.identifier":
            "http://qmml.org/datasets.html",
            "dc.contributor.author": [
                "Lance J. Nelson", "Vidvuds Ozoliņš", "C. Shane Reese",
                "Fei Zhou", "Gus L.W. Hart"
            ],
            #            "dc.subject": ,
            "dc.description":
            "4k DFT calculations for solid AgPd, CuPt and AgPt FCC superstructures. DFT/PBE energy, forces and stresses for cell sizes 1-16 across all compositions including primitive cells.",
            "dc.relatedidentifier": [
                "https://journals.aps.org/prb/abstract/10.1103/PhysRevB.88.155105"
            ],
            "dc.year":
            2013
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    #dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        if data:
            uri = "https://data.materialsdatafacility.org/collections/" + "bfcc-13/bfcc-13/" + data_file[
                "no_root_path"] + "/" + data_file["filename"]
            record_metadata = {
                "globus_subject":
                uri,
                "acl": ["public"],
                #            "mdf-publish.publication.collection": ,
                #            "mdf_data_class": ,
                "mdf-base.material_composition":
                data["frames"][0]["chemical_formula"],

                #            "cite_as": ,
                #            "license": ,
                "dc.title":
                "bfcc13 - " + data["frames"][0]["chemical_formula"],
                #            "dc.creator": ,
                "dc.identifier":
                uri,
                #            "dc.contributor.author": ,
                #            "dc.subject": ,
                #            "dc.description": ,
                #            "dc.relatedidentifier": ,
                #            "dc.year": ,
                "data": {
                    #                "raw": ,
                    "files": {
                        "outcar": uri
                    },
                }
            }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":",
                      result.get("invalid_metadata", ""))
            # The Validator may return warnings if strict=False, which should be noted
            if result.get("warnings", None):
                print("Warnings:", result["warnings"])

    # Alternatively, if the only way you can process your data is in one large list, you can pass the list to the Validator
    # You still must add the required metadata to your records
    # It is recommended to use the previous method if possible
    # result = dataset_validator.write_dataset(your_records_with_metadata)
    #if result["success"] is not True:
    #print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Ejemplo n.º 14

Mostrar archivo

Archivo: doak_strain_energies_converter.py Proyecto: tskluzac/mdf-harvesters

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://hdl.handle.net/11256/85",
            "acl": ["public"],
            "mdf_source_name":
            "doak_strain_energies",
            "mdf-publish.publication.collection":
            "Doak Strain Energies",
            "mdf_data_class":
            "vasp",
            "cite_as": [
                "Doak JW, Wolverton C (2012) Coherent and incoherent phase stabilities of thermoelectric rocksalt IV-VI semiconductor alloys. Phys. Rev. B 86: 144202 http://dx.doi.org/10.1103/PhysRevB.86.144202"
            ],
            "license":
            "http://creativecommons.org/licenses/by-sa/3.0/us/",
            "dc.title":
            "GeTe-PbTe PbS-PbTe PbSe-PbS PbTe-PbSe PbTe-SnTe SnTe-GeTe mixing and coherency strain energies",
            "dc.creator":
            "Northwestern University",
            "dc.identifier":
            "http://hdl.handle.net/11256/85",
            "dc.contributor.author": ["Doak, JW", "Wolverton, C"],
            #            "dc.subject": ,
            #            "dc.description": ,
            "dc.relatedidentifier":
            ["http://dx.doi.org/10.1103/PhysRevB.86.144202"],
            "dc.year":
            2012
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        try:
            record_metadata = {
                "globus_subject":
                uri,
                "acl": ["public"],
                "mdf-publish.publication.collection":
                "Doak Strain Energies",
                "mdf-base.material_composition":
                data["frames"][0]["chemical_formula"],

                #            "cite_as": ,
                #            "license": ,
                "dc.title":
                "Strain Energy - " + data["frames"][0]["chemical_formula"],
                #            "dc.creator": ,
                "dc.identifier":
                uri,
                #            "dc.contributor.author": ,
                #            "dc.subject": ,
                #            "dc.description": ,
                #            "dc.relatedidentifier": ,
                #            "dc.year": ,
                "data": {
                    #                    "raw": ,
                    "files": {
                        "outcar": uri
                    }
                }
            }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":",
                      result.get("invalid_metadata", ""))
        except Exception:
            print("Error on:", data_file["path"])

    if verbose:
        print("Finished converting")

Ejemplo n.º 15

Mostrar archivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject": "http://hdl.handle.net/11256/88",
            "acl": ["public"],
            "mdf_source_name": "ta_melting",
            "mdf-publish.publication.collection": "Ta Melting",
            "mdf_data_class": "vasp",

            "cite_as": ["Qi-Jun Hong and Axel van de Walle, Solid-liquid coexistence in small systems: A statistical method to calculate melting temperatures, Journal of chemical physics, 139, 094114 (2013). http://dx.doi.org/10.1063/1.4819792"],
            "license": "http://creativecommons.org/licenses/by/3.0/us/",

            "dc.title": "Ta Melting Point Calculation by Small-cell Coexistence Method",
            "dc.creator": "Brown University, Caltech",
            "dc.identifier": "http://hdl.handle.net/11256/88",
            "dc.contributor.author": ["Qi-Jun Hong", "Axel van de Walle"],
#            "dc.subject": ,
            "dc.description": "We calculate the melting temperature of Tantalum, by employing the small-size coexistence solid-liquid coexistence method.",
            "dc.relatedidentifier": ["http://dx.doi.org/10.1063/1.4819792"],
            "dc.year": 2013
            }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable= not verbose):
        data = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + data_file["no_root_path"] + "/" + data_file["filename"]
        try:
            record_metadata = {
                "globus_subject": uri,
                "acl": ["public"],
                "mdf-publish.publication.collection": "Ta Melting",
                "mdf-base.material_composition": data["frames"][0]["chemical_formula"],

#                "cite_as": ,
#                "license": ,

                "dc.title": "Ta Melting - " + data["frames"][0]["chemical_formula"],
#                "dc.creator": ,
                "dc.identifier": uri,
#                "dc.contributor.author": ,
#                "dc.subject": ,
#                "dc.description": ,
#                "dc.relatedidentifier": ,
#                "dc.year": ,

                "data": {
#                    "raw": ,
                    "files": {"outcar": uri}
                    }
                }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":", result.get("invalid_metadata", ""))
        except Exception:
            print("Error on:", data_file["path"])

    if verbose:
        print("Finished converting")

Ejemplo n.º 16

Mostrar archivo

Archivo: ab_initio_solute_database_converter.py Proyecto: tskluzac/mdf-harvesters

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "https://publish.globus.org/jspui/handle/ITEM/164",
        "acl": ["public"],
        "mdf_source_name":
        "ab_initio_solute_database",
        "mdf-publish.publication.collection":
        "High-Throughput ab-initio Dilute Solute Diffusion Database",
        "mdf_data_class":
        "vasp",
        "cite_as": [
            'Wu, Henry; Mayeshiba, Tam; Morgan, Dane, "Dataset for High-throughput Ab-initio Dilute Solute Diffusion Database," 2016, http://dx.doi.org/doi:10.18126/M2X59R'
        ],
        "dc.title":
        "High-throughput Ab-initio Dilute Solute Diffusion Database",
        "dc.creator":
        "Materials Data Facility",
        "dc.identifier":
        "http://dx.doi.org/doi:10.18126/M2X59R",
        "dc.contributor.author":
        ["Wu, Henry", "Mayeshiba, Tam", "Morgan, Dane"],
        "dc.subject": ["dilute", "solute", "DFT", "diffusion"],
        "dc.description":
        "We demonstrate automated generation of diffusion databases from high-throughput density functional theory (DFT) calculations. A total of more than 230 dilute solute diffusion systems in Mg, Al, Cu, Ni, Pd, and Pt host lattices have been determined using multi-frequency diffusion models. We apply a correction method for solute diffusion in alloys using experimental and simulated values of host self-diffusivity.",
        "dc.relatedidentifier": [
            "http://dx.doi.org/10.1038/sdata.2016.54",
            "http://dx.doi.org/10.6084/m9.figshare.1546772"
        ],
        "dc.year":
        2016
    }

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(root=input_path,
                                    file_pattern="^OUTCAR$",
                                    verbose=verbose),
                         desc="Processing data files",
                         disable=not verbose):
        file_data = parse_ase(file_path=os.path.join(dir_data["path"],
                                                     dir_data["filename"]),
                              data_format="vasp",
                              verbose=False)
        if file_data:
            uri = "https://data.materialsdatafacility.org/published/publication_164/data/" + dir_data[
                "no_root_path"] + "/" + dir_data["filename"]
            record_metadata = {
                "globus_subject":
                uri,
                "acl": ["public"],
                "mdf-publish.publication.collection":
                "High-Throughput ab-initio Dilute Solute Diffusion Database",
                "mdf-base.material_composition":
                file_data["frames"][0]["chemical_formula"],
                "dc.title":
                "High-throughput Ab-initio Dilute Solute Diffusion Database - "
                + file_data["frames"][0]["chemical_formula"],
                #"dc.creator": ,
                "dc.identifier":
                uri,
                #"dc.contributor.author": ,
                #"dc.subject": ,
                #"dc.description": ,
                #"dc.relatedidentifier": ,
                #"dc.year": ,
                "data": {
                    #                   "raw": str(file_data),
                    "files": {
                        "outcar": uri
                    }
                }
            }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"], ":",
                      result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Ejemplo n.º 17

Mostrar archivo

def convert(input_path, verbose=False):

    # Collect the metadata
    dataset_metadata = {
        "globus_subject":
        "http://khazana.uconn.edu/polymer_genome/index.php",
        "acl": ["public"],
        "mdf_source_name":
        "khazana_polymer",
        "mdf-publish.publication.collection":
        "Khazana",
        "mdf_data_class":
        "cif",
        "cite_as": [
            "T. D. Huan, A. Mannodi-Kanakkithodi, C. Kim, V. Sharma, G. Pilania, R. Ramprasad\nA polymer dataset for accelerated property prediction and design Sci. Data, 3, 160012 (2016).",
            "A. Mannodi-Kanakkithodi, G. M. Treich, T. D. Huan, R. Ma, M. Tefferi, Y. Cao, G A. Sotzing, R. Ramprasad\nRational Co-Design of Polymer Dielectrics for Energy Storage Adv. Mater., 28, 6277 (2016).",
            "T. D. Huan, A. Mannodi-Kanakkithodi, R. Ramprasad\nAccelerated materials property predictions and design using motif-based fingerprints Phys. Rev. B, 92, 014106 (2015).",
            "A. Mannodi-Kanakkithodi, G. Pilania, T. D. Huan, T. Lookman, R. Ramprasad\nMachine learning strategy for accelerated design of polymer dielectrics Sci. Rep., 6, 20952 (2016)."
        ],
        "dc.title":
        "Khazana (Polymer)",
        "dc.creator":
        "University of Connecticut",
        "dc.identifier":
        "http://khazana.uconn.edu",
        #        "dc.contributor.author": ,
        "dc.subject": ["polymer"]
        #        "dc.description": ,
        #        "dc.relatedidentifier": ,
        #        "dc.year":
    }

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for dir_data in tqdm(find_files(input_path, "\.cif$"),
                         desc="Processing data files",
                         disable=not verbose):
        file_data = parse_ase(file_path=os.path.join(dir_data["path"],
                                                     dir_data["filename"]),
                              data_format="cif",
                              verbose=False)

        uri = "http://khazana.uconn.edu/module_search/material_detail.php?id=" + dir_data[
            "filename"].replace(".cif", "")
        record_metadata = {
            "globus_subject": uri,
            "acl": ["pubilc"],
            "mdf-publish.publication.collection": "Khazana",
            "mdf-base.material_composition": file_data["chemical_formula"],
            "dc.title": "Khazana Polymer - " + file_data["chemical_formula"],
            #            "dc.creator": ,
            "dc.identifier": uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": str(file_data),
                "files": {
                    "cif": uri
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")

Ejemplo n.º 18

Mostrar archivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "http://qmml.org/datasets.html#h2o-13",
            "acl": ["public"],
            "mdf_source_name":
            "h2o-13",
            "mdf-publish.publication.collection":
            "h2o-13",
            "mdf_data_class":
            "xyz",
            "cite_as": [
                "Albert P. Bartók, Michael J. Gillan, Frederick R. Manby, Gábor Csányi: Machine-learning approach for one- and two-body corrections to density functional theory: Applications to molecular and condensed water, Physical Review B 88(5): 054104, 2013. http://dx.doi.org/10.1103/PhysRevB.88.054104"
            ],
            #            "license": ,
            "mdf_version":
            "0.1.0",
            "dc.title":
            "Machine-learning approach for one- and two-body corrections to density functional theory: Applications to molecular and condensed water",
            "dc.creator":
            "University of Cambridge, University College London, University of Bristol",
            "dc.identifier":
            "http://qmml.org/datasets.html#h2o-13",
            "dc.contributor.author": [
                "Albert P. Bartók", "Michael J. Gillan", "Frederick R. Manby",
                "Gábor Csányi"
            ],
            #            "dc.subject": ,
            "dc.description":
            "Water monomer and dimer geometries, with calculations at DFT, MP2 and CCSD(T) level of theory. 7k water monomer geometries corresponding to a grid, with energies and forces at DFT / BLYP, PBE, PBE0 with AV5Z basis set",
            "dc.relatedidentifier":
            ["https://doi.org/10.1103/PhysRevB.88.054104"],
            "dc.year":
            2013
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    #dataset_validator = Validator(dataset_metadata, strict=False)
    # You can also force the Validator to treat warnings as errors with strict=True
    dataset_validator = Validator(dataset_metadata, strict=True)

    # Get the data
    #    Each record should be exactly one dictionary
    #    It is recommended that you convert your records one at a time, but it is possible to put them all into one big list (see below)
    #    It is also recommended that you use a parser to help with this process if one is available for your datatype

    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]))
        uri = "https://data.materialsdatafacility.org/collections/" + "h2o-13/split_xyz_files/" + data_file[
            "no_root_path"] + '/' + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf_data_class": ,
            "mdf-base.material_composition": record["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title": "H2o-13 - " + data_file["filename"],
            #            "dc.creator": ,
            "dc.identifier": uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "xyz": uri
                },
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))
        # The Validator may return warnings if strict=False, which should be noted
        if result.get("warnings", None):
            print("Warnings:", result["warnings"])

    # Alternatively, if the only way you can process your data is in one large list, you can pass the list to the Validator
    # You still must add the required metadata to your records
    # It is recommended to use the previous method if possible
    # result = dataset_validator.write_dataset(your_records_with_metadata)
    #if result["success"] is not True:
    #print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Ejemplo n.º 19

Mostrar archivo

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "globus_subject":
            "https://data.materialsdatafacility.org/published/#trinkle_mg_x_diffusion",
            "acl": ["public"],
            "mdf_source_name":
            "trinkle_mg_x_diffusion",
            "mdf-publish.publication.collection":
            "Mg-X Diffusion Dataset",
            "mdf_data_class":
            "vasp",
            "cite_as": [
                "Citation for dataset Mg-X-Diffusion with author(s): Dallas Trinkle, Ravi Agarwal"
            ],
            #            "license": "",
            "dc.title":
            "Mg-X-Diffusion",
            "dc.creator":
            "University of Illinois at Urbana-Champaign",
            "dc.identifier":
            "https://data.materialsdatafacility.org/published/#trinkle_mg_x_diffusion",
            "dc.contributor.author": ["Trinkle, Dallas", "Agarwal, Ravi"],
            #"dc.subject": [],
            #"dc.description": "",
            #            "dc.relatedidentifier": [],
            "dc.year":
            2017
        }
    elif type(metadata) is str:
        try:
            with open(metadata, 'r') as metadata_file:
                dataset_metadata = json.load(metadata_file)
        except Exception as e:
            sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    # Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp")
        uri = "https://data.materialsdatafacility.org/collections/" + "mg-x/" + data_file[
            "no_root_path"] + "/" + data_file["filename"]
        record_metadata = {
            "globus_subject": uri,
            "acl": ["public"],
            #            "mdf-publish.publication.collection": ,
            #            "mdf-base.material_composition": record["frames"][0]["chemical_formula"],

            #            "cite_as": ,
            #            "license": ,
            "dc.title": "Mg-X Diffusions - ",
            #            "dc.creator": ,
            "dc.identifier": uri,
            #            "dc.contributor.author": ,
            #            "dc.subject": ,
            #            "dc.description": ,
            #            "dc.relatedidentifier": ,
            #            "dc.year": ,
            "data": {
                #                "raw": ,
                "files": {
                    "outcar": uri
                }
            }
        }
        try:
            record_metadata["mdf-base.material_composition"] = record[
                "frames"][0]["chemical_formula"]
            record_metadata[
                "dc.title"] += " " + record["frames"][0]["chemical_formula"]
        except:
            #parse_ase unable to read composition of record 1386: https://data.materialsdatafacility.org/collections/mg-x/Elements/Eu/Mg-X_Eu/OUTCAR
            #Placing in the correct material composition
            record_metadata["mdf-base.material_composition"] = "EuMg149"
            record_metadata["dc.title"] += "EuMg149"

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"], ":",
                  result.get("invalid_metadata", ""))

    if verbose:
        print("Finished converting")