Python Validator.Validator Exemples, mdf_refinery.validator.Validator.Validator Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : metadata_only_converter.py Projet : maxhutch/forge

def convert(metadata, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    Validator(dataset_metadata)

    if verbose:
        print("Finished converting")

Exemple #2

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "The Open Access Malaria Box: A Drug Discovery Catalyst for Neglected Diseases",
                "acl": ['public'],
                "source_name":
                "malaria_drug_discovery",
                "citation": [
                    "Spangenberg T, Burrows JN, Kowalczyk P, McDonald S, Wells TNC, Willis P (2013) The Open Access Malaria Box: A Drug Discovery Catalyst for Neglected Diseases. PLoS ONE 8(6): e62906. https://doi.org/10.1371/journal.pone.0062906"
                ],
                "data_contact": {
                    "given_name": "Thomas",
                    "family_name": "Spangenberg",
                    "email": "*****@*****.**",
                    "instituition": "Medicines for Malaria Venture"
                },
                "author": [{
                    "given_name": "Thomas",
                    "family_name": "Spangenberg",
                    "email": "*****@*****.**",
                    "instituition": "Medicines for Malaria Venture"
                }, {
                    "given_name": "Jeremy N.",
                    "family_name": "Burrows",
                    "instituition": "Medicines for Malaria Venture"
                }, {
                    "given_name": "Paul",
                    "family_name": "Kowalczyk",
                    "instituition": "SCYNEXIS Inc."
                }, {
                    "given_name": "Simon",
                    "family_name": "McDonald",
                    "instituition": "Medicines for Malaria Venture"
                }, {
                    "given_name": "Timothy N. C.",
                    "family_name": "Wells",
                    "instituition": "Medicines for Malaria Venture"
                }, {
                    "given_name": "Paul",
                    "family_name": "Willis",
                    "email": "*****@*****.**",
                    "instituition": "Medicines for Malaria Venture"
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Open Access Malaria Box",
                "tags": [
                    "Malaria", "Malarial parasites", "Antimalarials",
                    "Plasmodium", "Parasitic diseases", "Drug discovery",
                    "Plasmodium falciparum"
                ],
                "description":
                "In most cases it is a prerequisite to be able to obtain physical samples of the chemical compounds for further study, and the groups responsible for screening did not originally plan to provide these molecules. In addition, many of the biological systems in which these compounds would be tested are not suitable for testing such large numbers of compounds. There needs to therefore be some simplification of the collection. To overcome these barriers, a diverse collection of anti-malarial compounds has been designed and assembled.",
                "year":
                2013,
                "links": {
                    "landing_page":
                    "https://doi.org/10.1371/journal.pone.0062906",

                    #  "publication": ,
                    "data_doi":
                    "https://ndownloader.figshare.com/files/1090667",

                    #   "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    with open(os.path.join(input_path, "Table_S1.csv"), 'r') as raw_in:
        data_records = raw_in.read()
    for record in tqdm(parse_tab(data_records),
                       desc="Processing Data",
                       disable=not verbose):
        record_metadata = {
            "mdf": {
                "title": "Malaria Drug Discovery - " + record["Smiles"],
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": record["Smiles"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "csv": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/malaria_drug_discovery/Table_S1.csv",
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #3

0

Afficher le fichier

Fichier : halogen_ionic_liquids_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Halogen-Substituted Ionic Liquids",
                "acl": ["public"],
                "source_name": "halogen_ionic_liquids",

                "data_contact": {
                    
                    "given_name": "Vitaly V.",
                    "family_name": "Chaban",
                    "email": "*****@*****.**",
                    "institution": "University of Rochester",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Chaban, Vitaly V. (2016). Halogen-Substituted Ionic Liquids [Data set]. Zenodo. http://doi.org/10.5281/zenodo.165493"],

                "author": [{

                    "given_name": "Vitaly V.",
                    "family_name": "Chaban",
                    "email": "*****@*****.**",
                    "institution": "Universidade Federal de São Paulo",

                }],

                "license": "https://creativecommons.org/licenses/by/4.0/",
                "collection": "Halogen Substituted Ionic Liquids",
                #"tags": [""],
                "description": "Pre-equilibrated systems for different size for AIMD for Halogen-Substituted Ionic Liquids.",
                "year": 2016,

                "links": {

                    "landing_page": "https://doi.org/10.5281/zenodo.165493",
                    #"publication": [""],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Halogen Ionic Liquids - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/halogen_ionic_liquids/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Exemple #4

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Elemental vacancy diffusion database from high-throughput first-principles calculations for fcc and hcp structures",
                "acl": ["public"],
                "source_name": "nist_fcc_hcp_structures",

                "data_contact": {
                    
                    "given_name": "Dane",
                    "family_name": "Morgan",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Angsten, Thomas; Mayeshiba, Tam; Wu, Henry; Morgan, Dane Elemental vacancy diffusion for fcc and hcp structures (2014-08-08) http://hdl.handle.net/11256/76"],

                "author": [{

                    "given_name": "Thomas",
                    "family_name": "Angsten",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Tam",
                    "family_name": "Mayeshiba",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Henry",
                    "family_name": "Wu",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Dane",
                    "family_name": "Morgan",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                }],

                #"license": "",  NO LICENSE ON SITE... CONTACT AUTHOR
                "collection": "NIST fcc hcp Structures",
                #"tags": [""],
                "description": "This work demonstrates how databases of diffusion-related properties can be developed from high-throughput ab initio calculations. The formation and migration energies for vacancies of all adequately stable pure elements in both the face-centered cubic (fcc) and hexagonal close packing (hcp) crystal structures were determined using ab initio calculations. For hcp migration, both the basal plane and z-direction nearest-neighbor vacancy hops were considered. Energy barriers were successfully calculated for 49 elements in the fcc structure and 44 elements in the hcp structure.",
                "year": 2014,

                "links": {

                    "landing_page": "http://hdl.handle.net/11256/76",
                    "publication": ["http://dx.doi.org/10.1088/1367-2630/16/1/015018"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors=0
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable=not verbose):
        try:
            record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp-out")
        except Exception as e:
            errors+=1
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "NIST fcc hcp structures - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": ,

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "outcar": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/nist_fcc_hcp_structures/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Errors: " + str(errors))
        print("Finished converting")

Exemple #5

0

Afficher le fichier

Fichier : gdb9_14_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Quantum Chemistry Structures and Properties of 134 kilo Molecules",
                "acl": ['public'],
                "source_name":
                "gdb9_14",
                "citation": [
                    "Raghunathan Ramakrishnan, Pavlo Dral, Matthias Rupp, O. Anatole von Lilienfeld: Quantum Chemistry Structures and Properties of 134 kilo Molecules, Scientific Data 1: 140022, 2014."
                ],
                "data_contact": {
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "institution": "Argonne National Laboratory",
                },
                "author": [{
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "instituition": "Argonne National Laboratory"
                }, {
                    "given_name": "Raghunathan",
                    "family_name": "Ramakrishnan",
                    "institution": "University of Basel"
                }, {
                    "given_name":
                    "Pavlo O.",
                    "family_name":
                    "Dral",
                    "instituition":
                    "Max-Planck-Institut für Kohlenforschung, University of Erlangen-Nuremberg",
                }, {
                    "given_name": "Matthias",
                    "family_name": "Rupp",
                    "instituition": "University of Basel",
                }],
                "license":
                "https://creativecommons.org/licenses/by-nc-sa/4.0/",
                "collection":
                "gdb9_14",
                "tags": [
                    "Computational chemistry", "Density functional theory",
                    "Quantum chemistry"
                ],
                "description":
                "133,885 small organic molecules with up to 9 C, O, N, F atoms, saturated with H. Geometries, harmonic frequencies, dipole moments, polarizabilities, energies, enthalpies, and free energies of atomization at the DFT/B3LYP/6-31G(2df,p) level of theory. For a subset of 6,095 constitutional isomers of C7H10O2, energies, enthalpies, and free energies of atomization are provided at the G4MP2 level of theory.",
                "year":
                2014,
                "links": {
                    "landing_page": "http://qmml.org/datasets.html#gdb9-14",
                    "publication": ["http://dx.doi.org/10.1038/sdata.2014.22"],
                    # "data_doi": "",

                    #                "related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "http://qmml.org",
                        "path": "/Datasets/gdb9-14.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        index = ""
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), "xyz")
        except Exception as e:  #Unable to convert string to float on some files.
            errors += 1  #String is in scientific form e.g. 6.2198*^-6
        comp = record["chemical_formula"]
        if data_file["no_root_path"] == "dsgdb9nsd.xyz":
            start = data_file["filename"].find('_')
            #index is between the underscore and ".xyz"
            index = int(data_file["filename"][start + 1:-4])

        record_metadata = {
            "mdf": {
                "title": "gdb9_14 - " + comp,
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": comp,
                #            "raw": ,
                "links": {
                    # "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/gdb9_14/" + data_file["no_root_path"] +
                        "/" + data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            },
            "gdb9_14": {
                "index": index,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("TOTAL ERRORS: " + str(errors))
        print("Finished converting")

Exemple #6

0

Afficher le fichier

Fichier : klh_1_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "KLH Dataset I",
                "acl": ["public"],
                "source_name": "klh_1",

                "data_contact": {

                    "given_name": "Clinton S",
                    "family_name": "Potter",
                    "email": "*****@*****.**",
                    "institution": "The Scripps Research Institute",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                #"citation": [""],

                "author": [{

                    "given_name": "Yuanxin",
                    "family_name": "Zhu",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Bridget",
                    "family_name": "Carragher",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Robert M",
                    "family_name": "Glaeser",
                    "institution": "University of California, Berkeley",

                },
                {

                    "given_name": "Denis",
                    "family_name": "Fellmann",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Chandrajit",
                    "family_name": "Bajaj",
                    "institution": "University of Texas at Austin,",

                },
                {

                    "given_name": "Marshall",
                    "family_name": "Bern",
                    "institution": "Palo Alto Research Center",

                },
                {

                    "given_name": "Fabrice",
                    "family_name": "Mouche",
                    "institution": "The Scripps Research Institute",

                },
                {

                    "given_name": "Felix",
                    "family_name": "de Haas",
                    "institution": "FEI Company, Eindhoven",

                },
                {

                    "given_name": "Richard J",
                    "family_name": "Hall",
                    "institution": "Imperial College London",

                },
                {

                    "given_name": "David J",
                    "family_name": "Kriegman",
                    "institution": "University of California, San Diego",

                },
                {

                    "given_name": "Steven J",
                    "family_name": "Ludtke",
                    "institution": "Baylor College of Medicine",

                },
                {

                    "given_name": "Satya P",
                    "family_name": "Mallick",
                    "institution": "University of California, San Diego",

                },
                {

                    "given_name": "Pawel A",
                    "family_name": "Penczek",
                    "institution": "University of Texas-Houston Medical School",

                },
                {

                    "given_name": "Alan M",
                    "family_name": "Roseman",
                    "institution": "MRC Laboratory of Molecular Biology",

                },
                {

                    "given_name": "Fred J",
                    "family_name": "Sigworth",
                    "institution": "Yale University School of Medicine",

                },
                {

                    "given_name": "Niels",
                    "family_name": "Volkmann",
                    "institution": "The Burnham Institute",

                },
                {

                    "given_name": "Clinton S",
                    "family_name": "Potter",
                    "email": "*****@*****.**",
                    "institution": "The Scripps Research Institute",

                }],

                #"license": "",
                "collection": "Keyhole Limpet Hemocyanin",
                "tags": ["Electron microscopy", "Single-particle reconstruction", "Automatic particle selection", "Image processing", "Pattern recognition"],
                "description": "Manual selection of single particles in images acquired using cryo-electron microscopy (cryoEM) will become a significant bottleneck when datasets of a hundred thousand or even a million particles are required for structure determination at near atomic resolution. Algorithm development of fully automated particle selection is thus an important research objective in the cryoEM field. A number of research groups are making promising new advances in this area. Evaluation of algorithms using a standard set of cryoEM images is an essential aspect of this algorithm development. With this goal in mind, a particle selection \"bakeoff\" was included in the program of the Multidisciplinary Workshop on Automatic Particle Selection for cryoEM. Twelve groups participated by submitting the results of testing their own algorithms on a common dataset. The dataset consisted of 82 defocus pairs of high-magnification micrographs, containing keyhole limpet hemocyanin particles, acquired using cryoEM.",
                "year": 2004,

                "links": {

                    "landing_page": "http://emg.nysbc.org/redmine/projects/public-datasets/wiki/KLH_dataset_I",
                    "publication": ["http://www.sciencedirect.com/science/article/pii/S1047847703002004#!"],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "map$"), desc="Processing Files", disable=not verbose):
        with open(os.path.join(data_file["path"], data_file["filename"]), 'r') as raw_in:
            map_data = raw_in.read()
        headers = ["index", "image", "coordinate"]
        for line in parse_tab(map_data, headers=headers, sep=" "):
            ifile_1 = line["image"].replace(".002", ".001")
            ifile_2 = line["image"]
            cfile = line["coordinate"]
            df = pd.read_csv(os.path.join(data_file["path"], cfile), delim_whitespace=True)
            ## Metadata:record
            record_metadata = {
                "mdf": {
    
                    "title": "Keyhole Limpet Hemocyanin 1 - " + cfile,
                    "acl": ["public"],
                    #"composition": ,
    
                    #"tags": ,
                    "description": "Images under exposure1 are near-to-focus (NTF). Images under exposure2 are far-from-focus (FFF).",
                    #"raw": ,
    
                    "links": {
    
                        #"landing_page": ,
                        #"publication": ,
                        #"data_doi": ,
                        #"related_id": ,
    
                        "klh": {
    
                            "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                            "http_host": "https://data.materialsdatafacility.org",
    
                            "path": "/collections/klh_1/" + data_file["no_root_path"] + "/" + cfile,
    
                            },
    
                        "jpg": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure1_jpeg/" + ifile_1.replace(".mrc", ".jpg"),
        
                            },
    
    
                        "mrc": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure1_mrc/" + ifile_1,
        
                            },
                        
                        "jpg2": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure2_jpeg/" + ifile_2.replace(".mrc", ".jpg"),
        
                            },
    
    
                        "mrc2": {
        
                                "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                                "http_host": "https://data.materialsdatafacility.org",
        
                                "path": "/collections/klh_1/exposure2_mrc/" + ifile_2,
        
                            },
                    },
    
                    #"citation": ,
    
                    #"data_contact": {
    
                        #"given_name": ,
                        #"family_name": ,
                        #"email": ,
                        #"institution": ,
    
                    #},
    
                    #"author": [{
    
                        #"given_name": ,
                        #"family_name": ,
                        #"email": ,
                        #"institution": ,
    
                    #}],
    
                    #"year": ,
    
                },
    
                #"dc": {
    
                #},
    
    
            }
            ## End metadata
    
            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)
    
            # Check if the Validator accepted the record, and stop processing if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if not result["success"]:
                if not dataset_validator.cancel_validation()["success"]:
                    print("Error cancelling validation. The partial feedstock may not be removed.")
                raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Exemple #7

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Uncertainty quantification for quantum chemical models of complex reaction networks	",
                "acl": ["public"],
                "source_name": "reiher_quantum_chemical_models",

                "data_contact": {
                    
                    "given_name": "Markus",
                    "family_name": "Reiher",
                    "email": "*****@*****.**",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["(2016). Uncertainty quantification for quantum chemical models of complex reaction networks. , 195, 497-520. 10.1039/C6FD00144K"],

                "author": [{

                    "given_name": "Jonny",
                    "family_name": "Proppe",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Tamara",
                    "family_name": "Husch",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Gregor N.",
                    "family_name": "Simma",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Markus",
                    "family_name": "Reiher",
                    "email": "*****@*****.**",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                }],

                "license": "http://creativecommons.org/licenses/by/3.0/",
                "collection": "Reiher Quantum Chemical Models",
                #"tags": [""],
                "description": "For the quantitative understanding of complex chemical reaction mechanisms, it is, in general, necessary to accurately determine the corresponding free energy surface and to solve the resulting continuous-time reaction rate equations for a continuous state space. For a general (complex) reaction network, it is computationally hard to fulfill these two requirements. However, it is possible to approximately address these challenges in a physically consistent way. On the one hand, it may be sufficient to consider approximate free energies if a reliable uncertainty measure can be provided. On the other hand, a highly resolved time evolution may not be necessary to still determine quantitative fluxes in a reaction network if one is interested in specific time scales. In this paper, we present discrete-time kinetic simulations in discrete state space taking free energy uncertainties into account.",
                "year": 2016,

                "links": {

                    "landing_page": "http://pubs.rsc.org/en/content/articlelanding/fd/2016/c6fd00144k#!divAbstract",
                    "publication": ["http://pubs.rsc.org/doi/c6fd90075e"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz$"), desc="Processing files", disable=not verbose):
        if "PaxHeaders" in data_file["path"]:
            continue
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Reiher Quantum Chemical Models - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/reiher_quantum_chemical_models/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Exemple #8

0

Afficher le fichier

Fichier : cytotoxic_pt_complexes_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Theoretical Investigations and Density Functional Theory Based Quantitative Structure–Activity Relationships Model for Novel Cytotoxic Platinum(IV) Complexes",
                "acl": ["public"],
                "source_name": "cytotoxic_pt_complexes",

                "data_contact": {
                    
                    "given_name": "Markus",
                    "family_name": "Galanski",
                    "email": "*****@*****.**",
                    "institution": "University of Vienna",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Keppler, Bernhard K. (2013/01/10). Theoretical Investigations and Density Functional Theory Based Quantitative Structure–Activity Relationships Model for Novel Cytotoxic Platinum(IV) Complexes. Journal of Medicinal Chemistry, 56, 330-344. doi: 10.1021/jm3016427"],

                "author": [{

                    "given_name": "Hristo P.",
                    "family_name": "Varbanov",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Michael A.",
                    "family_name": "Jakupec",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Alexander",
                    "family_name": "Roller",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Frank",
                    "family_name": "Jensen",
                    "email": "*****@*****.**",
                    "institution": "University of Aarhus",

                },
                {

                    "given_name": "Markus",
                    "family_name": "Galanski",
                    "email": "*****@*****.**",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Bernhard K.",
                    "family_name": "Keppler",
                    "institution": "University of Vienna",

                }],

                "license": "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection": "Cytotoxic Platinum Complexes",
                "tags": ["structure geometry", "series", "resistance", "Herein", "laboratory", "tetraki", "tris", "Relationship", "wb 97x", "mechanism", "cisplatin", "complex", "SW", "Cytotoxic", "calculation", "relationship", "Density Functional Theory", "DFT", "Reliable", "ComplexesOctahedral", "bi", "compound", "Quantitative", "Model", "QSAR investigations", "cytotoxicity", "candidate", "cell line CH 1", "descriptor", "optimization", "QSAR models", "toxicity", "Theoretical Investigations"],
                "description": "Octahedral platinum(IV) complexes are promising candidates in the fight against cancer. In order to rationalize the further development of this class of compounds, detailed studies on their mechanisms of action, toxicity, and resistance must be provided and structure–activity relationships must be drawn. Herein, we report on theoretical and QSAR investigations of a series of 53 novel bis-, tris-, and tetrakis(carboxylato)platinum(IV) complexes, synthesized and tested for cytotoxicity in our laboratories. ",
                "year": 2012,

                "links": {

                    "landing_page": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3557934/",
                    "publication": ["https://dx.doi.org/10.1021%2Fjm3016427"],
                    #"data_doi": "",
                    #"related_id": ,

                    "cif": {

                        #"globus_endpoint": ,
                        "http_host": "https://ndownloader.figshare.com",

                        "path": "/files/3593325",
                        },
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "temp_file.cif"), desc="Processing files", disable=not verbose):
        #Temp_file is the same as the real file, but with authors and adresses deleted so that ase can read composition
        #It should only be used for converting purposes
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "cif")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Cytotoxic Platinum Complexes - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "cif": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/cytotoxic_pt_complexes/" + "jm3016427_si_002.cif",
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Exemple #9

0

Afficher le fichier

Fichier : irradiated_pyrochlores_ml_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Using Machine Learning To Identify Factors That Govern Amorphization of Irradiated Pyrochlores",
                "acl": ['public'],
                "source_name":
                "irradiated_pyrochlores_ml",
                "citation": [
                    "Using Machine Learning To Identify Factors That Govern Amorphization of Irradiated Pyrochlores Ghanshyam Pilania, Karl R. Whittle, Chao Jiang, Robin W. Grimes, Christopher R. Stanek, Kurt E. Sickafus, and Blas Pedro Uberuaga Chemistry of Materials 2017 29 (6), 2574-2583 DOI: 10.1021/acs.chemmater.6b04666"
                ],
                "data_contact": {
                    "given_name": "Blas Pedro",
                    "family_name": "Uberuaga",
                    "email": "*****@*****.**",
                    "instituition": "Los Alamos National Laboratory"
                },
                "author": [{
                    "given_name": "Blas Pedro",
                    "family_name": "Uberuaga",
                    "email": "*****@*****.**",
                    "instituition": "Los Alamos National Laboratory"
                }, {
                    "given_name": "Ghanshyam",
                    "family_name": "Pilania",
                    "instituition": "Los Alamos National Laboratory"
                }, {
                    "given_name": "Karl R.",
                    "family_name": "Whittle",
                    "instituition": "University of Liverpool"
                }, {
                    "given_name": "Chao",
                    "family_name": "Jiang",
                    "instituition": "Idaho National Laboratory"
                }, {
                    "given_name": "Robin W.",
                    "family_name": "Grimes",
                    "instituition": "Imperial College London"
                }, {
                    "given_name": "Christopher R.",
                    "family_name": "Stanek",
                    "instituition": "Los Alamos National Laboratory"
                }, {
                    "given_name": "Kurt E.",
                    "family_name": "Sickafus",
                    "instituition": "University of Tennessee"
                }],
                "license":
                "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection":
                "ML for Amorphization of Irradiated Pyrochlores",
                "tags": ["ML"],
                "description":
                "Here, we use a machine learning model to examine the factors that govern amorphization resistance in the complex oxide pyrochlore (A2B2O7) in a regime in which amorphization occurs as a consequence of defect accumulation. We examine the fidelity of predictions based on cation radii and electronegativities, the oxygen positional parameter, and the energetics of disordering and amorphizing the material.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "http://pubs.acs.org/doi/full/10.1021/acs.chemmater.6b04666#showFigures",

                    #   "publication": ,
                    #"data_doi": "",

                    #    "related_id": ,
                    "pdf": {

                        #"globus_endpoint": ,
                        "http_host": "https://ndownloader.figshare.com",
                        "path": "/files/7712131",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    with open(os.path.join(input_path, "irradiated_pyrochlores_ml_data.txt"),
              'r') as raw_in:
        headers = raw_in.readline().split("; ")
        for record in parse_tab(raw_in.read(), headers=headers, sep=" "):
            record_metadata = {
                "mdf": {
                    "title":
                    "ML for Amorphization of Irradiated Pyrochlores - " +
                    record["Compound"],
                    "acl": ['public'],

                    #            "tags": ,
                    #            "description": ,
                    "composition":
                    record["Compound"],
                    #            "raw": ,
                    "links": {
                        #                "landing_page": ,

                        #                "publication": ,
                        #                "data_doi": ,

                        #                "related_id": ,
                        "txt": {
                            "globus_endpoint":
                            "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                            "http_host":
                            "https://data.materialsdatafacility.org",
                            "path":
                            "/collections/irradiated_pyrochlores_ml/irradiated_pyrochlores_ml_data.txt",
                        },
                    },

                    #            "citation": ,
                    #            "data_contact": {

                    #                "given_name": ,
                    #                "family_name": ,

                    #                "email": ,
                    #                "institution":,

                    #                },

                    #            "author": ,

                    #            "license": ,
                    #            "collection": ,
                    #            "year": ,

                    #            "mrr":

                    #            "processing": ,
                    #            "structure":,
                }
            }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #10

0

Afficher le fichier

Fichier : yeast_mediator_complex_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Molecular architecture of the yeast Mediator complex",
                "acl": ["public"],
                "source_name": "yeast_mediator_complex",

                "data_contact": {

                    "given_name": "Benjamin",
                    "family_name": "Webb",
                    "email": "*****@*****.**",
                    "institution": "University of California San Francisco",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Robinson, Philip J, Trnka, Michael J, Pellarin, Riccardo, Greenberg, Charles H, Bushnell, David A, Davis, Ralph, … Kornberg, Roger D. (2015). Molecular architecture of the yeast Mediator complex [Data set]. eLife. Zenodo. http://doi.org/10.5281/zenodo.802915"],

                "author": [{

                    "given_name": "Philip J",
                    "family_name": "Robinson",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Michael J",
                    "family_name": "Trnka",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Riccardo",
                    "family_name": "Pellarin",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Charles H",
                    "family_name": "Greenberg",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "David A",
                    "family_name": "Bushnell",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Ralph",
                    "family_name": "Davis",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Alma L",
                    "family_name": "Burlingame",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Roger D",
                    "family_name": "Kornberg",
                    "institution": "Stanford University",

                }],

                "license": "http://www.opensource.org/licenses/LGPL-2.1",
                "collection": "Yeast Mediator Complex",
                "tags": ["Integrative Modeling Platform (IMP)", "Chemical crosslinks", "PMI", "X-ray crystallography"],
                "description": "The 21-subunit Mediator complex transduces regulatory information from enhancers to promoters, and performs an essential role in the initiation of transcription in all eukaryotes. This repository contains files used in the 3-D modeling of the entire Mediator complex, using an integrative modeling approach that combines information from chemical cross-linking and mass spectrometry; X-ray crystallography; homology modeling; and cryo-electron microscopy.",
                "year": 2015,

                "links": {

                    "landing_page": "https://zenodo.org/record/802915",
                    "publication": ["https://doi.org/10.7554/eLife.08719", "https://github.com/integrativemodeling/mediator/tree/v1.0.3"],
                    "data_doi": "https://doi.org/10.5281/zenodo.802915",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb"), desc="Processing Files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Yeast Mediator Complex - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    "pdb": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/yeast_mediator_complex/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Exemple #11

0

Afficher le fichier

Fichier : qsar_molecular_diversity_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title": "Neighborhood Behavior:  A Useful Concept for Validation of “Molecular Diversity” Descriptors",
                "acl": ['public'],
                "source_name": "qsar_molecular_diversity",
                "citation": ["David E Patterson, Richard D Cramer, Allan M Ferguson, Robert D Clark, Laurence W Weinberger. Neighbourhood Behaviour: A Useful Concept for Validation of \"Molecular Diversity\" Descriptors. J. Med. Chem. 1996 (39) 3049 - 3059."],
                "data_contact": {
    
                    "given_name": "Richard D.",
                    "family_name": "Cramer",
                    
                    "email": "*****@*****.**",
    
                    },
    
                "author": [{
                    
                    "given_name": "David E.",
                    "family_name": "Patterson",
                    
                    },
                    {
                    
                    "given_name": "Richard D.",
                    "family_name": "Cramer",
                    
                    "email": "*****@*****.**",
                    
                    },
                    {
                    
                    "given_name": "Allan M.",
                    "family_name": "Ferguson",
                    
                    },
                    {
                    
                    "given_name": "Robert D.",
                    "family_name": "Clark",
                    
                    },
                    {
                    
                    "given_name": "Laurence E.",
                    "family_name": "Weinberger",
                    
                    }],
    
              #  "license": "",
    
                "collection": "QSAR Molecular Diversity",
                #"tags": ,
    
                "description": "If a molecular descriptor is to be a valid and useful measure of “similarity” in drug discovery, a plot of differences in its values vs differences in biological activities for a set of related molecules will exhibit a characteristic trapezoidal distribution enhancement, revealing a “neighborhood behavior” for the descriptor. Applying this finding to 20 datasets allows 11 molecular diversity descriptors to be ranked by their validity for compound library design",
                "year": 1996,
    
                "links": {
    
                    "landing_page": "ftp://ftp.ics.uci.edu/pub/baldig/learning/Patterson/",
    
                    "publication": ["http://pubs.acs.org/doi/abs/10.1021/jm960290n"],
                  #  "data_doi": ,
    
                   # "related_id": ,
    
                    # data links: {
                    
                        #"globus_endpoint": ,
                        #"http_host": ,
    
                        #"path": ,
                        #}
                    },
    
    #            "mrr": ,
    
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                    }]
                }
            }
        
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "sdf"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "sdf")
        record_metadata = {
            "mdf": {
                "title": "QSAR Molecular Diversity - " + record["chemical_formula"],
                "acl": ['public'],
    
    #            "tags": ,
    #            "description": ,
                
                "composition": record["chemical_formula"],
               # "raw": json.dumps(record),
    
                "links": {
    #                "landing_page": ,
    
    #                "publication": ,
    #                "data_doi": ,
    
    #                "related_id": ,
    
                    "sdf": {
                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
    
                        "path": "/collections/qsar_molecular_diversity/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },
    
    #            "citation": ,
    #            "data_contact": {
    
    #                "given_name": ,
    #                "family_name": ,
    
    #                "email": ,
    #                "institution":,
    
    #                },
    
    #            "author": ,
    
    #            "license": ,
    #            "collection": ,
    #            "year": ,
    
    #            "mrr":
    
    #            "processing": ,
    #            "structure":,
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #12

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Machine learning of molecular electronic properties in chemical compound space",
                "acl": ['public'],
                "source_name":
                "gdb7_13",
                "citation": [
                    "Gr\'egoire Montavon, Matthias Rupp, Vivekanand Gobre, Alvaro Vazquez-Mayagoitia, Katja Hansen, Alexandre Tkatchenko, Klaus-Robert M\"uller, O. Anatole von Lilienfeld: Machine learning of molecular electronic properties in chemical compound space, New Journal of Physics, 15(9): 095003, IOP Publishing, 2013.DOI: 10.1088/1367-2630/15/9/095003"
                ],
                "data_contact": {
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "institution": "Argonne National Laboratory",
                },
                "author": [{
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "instituition": "Argonne National Laboratory"
                }, {
                    "given_name": "Grégoire",
                    "family_name": "Montavon",
                    "institution": "Technical University of Berlin"
                }, {
                    "given_name":
                    "Matthias",
                    "family_name":
                    "Rupp",
                    "instituition":
                    "Institute of Pharmaceutical Sciences, ETH Zurich",
                }, {
                    "given_name":
                    "Vivekanand",
                    "family_name":
                    "Gobre",
                    "instituition":
                    "Fritz-Haber-Institut der Max-Planck-Gesellschaft",
                }, {
                    "given_name": "Alvaro",
                    "family_name": "Vazquez-Mayagoitia",
                    "instituition": "Argonne National Laboratory",
                }, {
                    "given_name":
                    "Katja",
                    "family_name":
                    "Hansen",
                    "instituition":
                    "Fritz-Haber-Institut der Max-Planck-Gesellschaft",
                }, {
                    "given_name":
                    "Alexandre",
                    "family_name":
                    "Tkatchenko",
                    "email":
                    "*****@*****.**",
                    "instituition":
                    "Fritz-Haber-Institut der Max-Planck-Gesellschaft, Pohang University of Science and Technology",
                }, {
                    "given_name":
                    "Klaus-Robert",
                    "family_name":
                    "Müller",
                    "email":
                    "*****@*****.**",
                    "instituition":
                    "Technical University of Berlin, Korea University",
                }],
                "license":
                "https://creativecommons.org/licenses/by/3.0/",
                "collection":
                "gdb7_13",
                #            "tags": ,
                "description":
                "7k small organic molecules, in their ground state, 14 combinations of properties and theory levels. 7,211 small organic molecules composed of H, C, N, O, S, Cl, saturated with H, and up to 7 non-H atoms. Molecules relaxed using DFT with PBE functional. Properties are atomization energy (DFT/PBE0), averaged polarizability (DFT/PBE0, SCS), H**O and LUMO eigenvalues (GW, DFT/PBE0, ZINDO), and, ionization potential, electron affinity, first excitation energy, frequency of maximal absorption (all ZINDO).",
                "year":
                2013,
                "links": {
                    "landing_page":
                    "http://qmml.org/datasets.html#gdb7-13",
                    "publication":
                    ["http://dx.doi.org/10.1088/1367-2630/15/9/095003"],
                    #"data_doi": "",

                    #                "related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "http://qmml.org",
                        "path": "/Datasets/gdb7-13.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        record_metadata = {
            "mdf": {
                "title": "gdb7_13 " + data_file["filename"],
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": record["chemical_formula"],
                #            "raw": ,
                "links": {
                    #"landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/gdb7_13/gdb7_13_data/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #13

0

Afficher le fichier

Fichier : ohmic_si_c_contacts_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Synthesis of Ti3AuC2, Ti3Au2C2 and Ti3IrC2 by noble-metal substitution reaction in Ti3SiC2 for high-temperature-stable ohmic contacts to SiC",
                "acl": ["public"],
                "source_name":
                "ohmic_si_c_contacts",
                "data_contact": {
                    "given_name":
                    "Per",
                    "family_name":
                    "Eklund",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Fashandi, Hossein, Dahlqvist, Martin, Lu, Jun, Palisaitis, Justinas, Simak, Sergei I, Abrikosov, Igor A, … Eklund, Per. (2017). Synthesis of Ti3AuC2, Ti3Au2C2 and Ti3IrC2 by noble-metal substitution reaction in Ti3SiC2 for high-temperature-stable ohmic contacts to SiC [Data set]. Zenodo. http://doi.org/10.5281/zenodo.376969"
                ],
                "author": [{
                    "given_name":
                    "Hossein",
                    "family_name":
                    "Fashandi",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Martin",
                    "family_name":
                    "Dahlqvist",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Jun",
                    "family_name":
                    "Lu",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Justinas",
                    "family_name":
                    "Palisaitis",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Sergei I",
                    "family_name":
                    "Simak",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Igor A",
                    "family_name":
                    "Abrikosov",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Johanna",
                    "family_name":
                    "Rosen",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Lars",
                    "family_name":
                    "Hultman",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Mike",
                    "family_name":
                    "Andersson",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Anita Lloyd",
                    "family_name":
                    "Spetz",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Per",
                    "family_name":
                    "Eklund",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Ohmic Contact to SiC",
                "tags": [
                    "electronic structure calculations", "MAX phase", "XRD",
                    "I/V measurement", "spin-orbit coupling",
                    "density of states", "Nanoscale materials",
                    "Structure of solids and liquids",
                    "Surfaces, interfaces and thin films",
                    "Two-dimensional materials"
                ],
                "description":
                "The large class of layered ceramics encompasses both van der Waals (vdW) and non-vdW solids. While intercalation of noble metals in vdW solids is known, formation of compounds by incorporation of noble-metal layers in non-vdW layered solids is largely unexplored. Here, we show formation of Ti3AuC2 and Ti3Au2C2 phases with up to 31% lattice swelling by a substitutional solid-state reaction of Au into Ti3SiC2 single-crystal thin films with simultaneous out-diffusion of Si. Ti3IrC2 is subsequently produced by a substitution reaction of Ir for Au in Ti3Au2C2. These phases form Ohmic electrical contacts to SiC and remain stable after 1,000 h of ageing at 600 °C in air. The present results, by combined analytical electron microscopy and ab initio calculations, open avenues for processing of noble-metal-containing layered ceramics that have not been synthesized from elemental sources, along with tunable properties such as stable electrical contacts for high-temperature power electronics or gas sensors.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.376969",
                    "publication": [
                        "http://www.nature.com/nmat/journal/v16/n8/full/nmat4896.html"
                    ],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "(OUTCAR|cif$)"),
                          desc="Processing files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        if dtype == "cif":
            ftype = "cif"
        else:
            ftype = "vasp-out"
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), ftype)
        except:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Ohmic Contact to SiC - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ohmic_si_c_contacts/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Errors: " + str(errors))
        print("Finished converting")

Exemple #14

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Heptamethyl Indenyl (Ind*) Enables Diastereoselective Benzamidation of Cyclopropenes via Rh(III)-Catalyzed C-H Activation",
                "acl": ["public"],
                "source_name":
                "cyclopropenes",
                "data_contact": {
                    "given_name": "Robert S.",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Paton, R., & Jackson, K. (2016). Heptamethyl Indenyl (Ind*) Enables Diastereoselective Benzamidation of Cyclopropenes via Rh(III)-Catalyzed C-H Activation [Data set]. Zenodo. http://doi.org/10.5281/zenodo.53736"
                ],
                "author": [{
                    "given_name": "Tomislav",
                    "family_name": "Rovis",
                    "institution": "Colorado State University",
                }, {
                    "given_name": "Robert S.",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Kelvin E.",
                    "family_name": "Jackson",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Natthawat",
                    "family_name": "Semakul",
                    "institution": "Colorado State University",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Diastereoselective Benzamidation of Cyclopropenes",
                "tags":
                ["DFT", "Gaussian", "Transition State", "Stereoselectivity"],
                "description":
                "The diastereoselective coupling of O-substituted arylhydroxamates and cyclopropenes mediated by Rh(III) catalysis was successfully developed. Through ligand development, the diastereoselectivity of this reaction was improved using a heptamethylindenyl (Ind*) ligand, which has been rationalized using quantum chemical calculations. In addition, the nature of the O-substituted ester of benzhydroxamic acid proved important for high diastereoselectivity. This transformation tolerates a variety of benzamides and cyclopropenes that furnish cyclopropa[c]dihydroisoquinolones with high diastereocontrol, which could then be easily transformed into synthetically useful building blocks for pharmaceuticals and bio-active molecules.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://zenodo.org/record/53736#.WWWmjMaZPFQ",
                    "publication": [
                        "http://pubs.rsc.org/en/content/articlelanding/2016/sc/c6sc02587k#!divAbstract"
                    ],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, ".out$"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "gaussian-out")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Diastereoselective Benzamidation of Cyclopropenes - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "out": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cyclopropenes/" + data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #15

0

Afficher le fichier

Fichier : fe_na_metaphosphate_structure_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Synthesis and structural data of a Fe-base sodium metaphosphate compound, NaFe(PO3)3",
                "acl": ['public'],
                "source_name":
                "fe_na_metaphosphate_structure",
                "citation": [
                    "Lin, Xinghao et al. “Synthesis and Structural Data of a Fe-Base Sodium Metaphosphate Compound, NaFe(PO3)3.” Data in Brief 4 (2015): 217–221. PMC. Web. 30 June 2017."
                ],
                "data_contact": {
                    "given_name": "Yanming",
                    "family_name": "Zhao",
                    "email": "nc.ude.tucs@myoahz",
                    "instituition": "South China University of Technology"
                },
                "author": [{
                    "given_name":
                    "Xinghao",
                    "family_name":
                    "Lin",
                    "instituition":
                    "South China University of Technology"
                }, {
                    "given_name":
                    "Yanming",
                    "family_name":
                    "Zhao",
                    "email":
                    "nc.ude.tucs@myoahz",
                    "instituition":
                    "South China University of Technology"
                }, {
                    "given_name":
                    "Youzhong",
                    "family_name":
                    "Dong",
                    "instituition":
                    "South China University of Technology"
                }, {
                    "given_name":
                    "Quan",
                    "family_name":
                    "Kuang",
                    "instituition":
                    "South China University of Technology"
                }],
                "license":
                "http://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Fe-base sodium metaphosphate Synthesis and Structure",
                # "mdf-tags": ,
                "description":
                "In this data article, the synthesized process of this metaphosphate compound and the morphology of the obtained sample will be provided. The high-power XRD Rietveld refinement is applied to determine the crystal structure of this metaphosphate compound and the refinement result including the main refinement parameters, atomic coordinate and some important lattace parameters are stored in the cif file.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4510540/",

                    #  "publication": ,
                    # "data_doi": "",

                    #  "related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://www.ncbi.nlm.nih.gov",
                        "path": "/pmc/articles/PMC4510540/bin/mmc2.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "cif"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "cif")
        record_metadata = {
            "mdf": {
                "title":
                "Synthesis and Structure of - " + record["chemical_formula"],
                "acl": ['public'],

                #            "tags": ,
                #            "mdescription": ,
                "composition":
                record["chemical_formula"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "cif": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/fe_na_metaphosphate_structure/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #16

0

Afficher le fichier

Fichier : ft_icr_ms_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title": "Assigned formula of complex mixture FT-ICR MS datasets",
                "acl": ['public'],
                "source_name": "ft_icr_ms",
                "citation": ["Blackburn, John; Uhrin, Dusan. (2017). Assigned formula of complex mixture FT-ICR MS datasets, [dataset]. University of Edinburgh. School of Chemistry. http://dx.doi.org/10.7488/ds/1984"],
                "data_contact": {
    
                    "given_name": "Dusan",
                    "family_name": "Uhrin",
                    
                    "email": "*****@*****.**",
                    "instituition": "University of Edinburgh"
    
                    },
    
                "author": [{
    
                    "given_name": "John",
                    "family_name": "Blackburn",
                    
                    "instituition": "University of Edinburgh"
                    
                    },
                    {
                    
                    "given_name": "Dusan",
                    "family_name": "Uhrin",
                    
                    "email": "*****@*****.**",
                    "instituition": "University of Edinburgh"
                    
                    }],
    
                "license": "http://creativecommons.org/licenses/by/4.0/legalcode",
    
                "collection": "FT-ICR MS Complex Mixtures",
                "tags": ["ESI", "MALDI", "LDI"],
    
                "description": "The dataset included is of formula assigned from FT-ICR MS data for samples of Suwannee River fulvic acid (SRFA) and Suwannee River natural organic matter (SRNOM) (both are standards from the International Humic Substances Society) using a variety of ionisation sources. This includes electrospray ionisation (ESI), matrix assisted laser desorption/ionisation (MALDI) and matrix free laser desorption/ionisation (LDI).",
                "year": 2017,
    
                "links": {
    
                    "landing_page": "http://datashare.is.ed.ac.uk/handle/10283/2640",
    
                    "publication": ["http://dx.doi.org/10.1021/acs.analchem.6b04817"],
                   # "data_doi": "",
    
              #      "related_id": ,
    
                    "zip": {
                    
                        #"globus_endpoint": ,
                        "http_host": "http://datashare.is.ed.ac.uk",
    
                        "path": "/download/10283/2640/Assigned_formula_of_complex_mixture_FT-ICR_MS_datasets.zip",
                        }
                    },
    
    #            "mrr": ,
    
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                    }]
                }
            }
        
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    with open(os.path.join(input_path, "ft_icr_ms_data.txt")) as raw_in:
        all_data = raw_in.read()
    for record in tqdm(parse_tab(all_data, sep=";"), desc="Processing files", disable=not verbose):
        record_metadata = {
            "mdf": {
                "title": "FT_ICR_MS " + record["Molecular Formula"],
                "acl": ['public'],
    
    #            "tags": ,
    #            "description": ,
                
                "composition": record["Molecular Formula"],
                "raw": json.dumps(record),
    
                "links": {
    #                "landing_page": ,
    
    #                "publication": ,
    #                "data_doi": ,
    
    #                "related_id": ,
    
                    "txt": {
                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
    
                        "path": "/collections/ft_icr_ms/ft_icr_ms_data.txt",
                        },
                    },
    
    #            "citation": ,
    #            "data_contact": {
    
    #                "given_name": ,
    #                "family_name": ,
    
    #                "email": ,
    #                "institution":,
    
    #                },
    
    #            "author": ,
    
    #            "license": ,
    #            "collection": ,
    #            "year": ,
    
    #            "mrr":
    
    #            "processing": ,
    #            "structure":,
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #17

0

Afficher le fichier

Fichier : porous_mof_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Pore Shape Modification of a Microporous Metal–Organic Framework Using High Pressure: Accessing a New Phase with Oversized Guest Molecules",
                "acl": ['public'],
                "source_name":
                "porous_mof",
                "citation": [
                    "The University of Edinburgh School of Chemistry. (2016). Pore Shape Modification of a Microporous Metal-Organic Frame-work Using High Pressure: Accessing a New Phase with Oversized Guest Molecules, [dataset]. http://dx.doi.org/10.7488/ds/371."
                ],
                "data_contact": {
                    "given_name": "Stephen A.",
                    "family_name": "Moggach",
                    "email": "*****@*****.**",
                    "instituition": "University of Edinburgh"
                },
                "author": [{
                    "given_name": "Stephen A.",
                    "family_name": "Moggach",
                    "email": "*****@*****.**",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Scott C.",
                    "family_name": "McKellar",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Jorge",
                    "family_name": "Sotelo",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Alex",
                    "family_name": "Greenaway",
                    "instituition": "University of St Andrews"
                }, {
                    "given_name": "John P. S.",
                    "family_name": "Mowat",
                    "instituition": "University of St Andrews"
                }, {
                    "given_name": "Odin",
                    "family_name": "Kvam",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Carole A.",
                    "family_name": "Morrison",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Paul A.",
                    "family_name": "Wright",
                    "instituition": "University of St Andrews"
                }],
                "license":
                "http://creativecommons.org/licenses/by/4.0/legalcode",
                "collection":
                "Porous Metal-Organic-Framework",
                #"tags": ,
                "description":
                "Pressures up to 0.8 GPa have been used to squeeze a range of sterically “oversized” C5–C8 alkane guest molecules into the cavities of a small-pore Sc-based metal–organic framework. Guest inclusion causes a pronounced reorientation of the aromatic rings of one-third of the terephthalate linkers, which act as “torsion springs”, resulting in a fully reversible change in the local pore structure. The study demonstrates how pressure-induced guest uptake can be used to investigate framework flexibility relevant to “breathing” behavior and to understand the uptake of guest molecules in MOFs relevant to hydrocarbon separation.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "http://datashare.is.ed.ac.uk/handle/10283/942",
                    "publication":
                    ["http://dx.doi.org/10.1021/acs.chemmater.5b02891"],
                    "data_doi":
                    "http://dx.doi.org//10.7488/ds/371",

                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host":
                        "http://datashare.is.ed.ac.uk",
                        "path":
                        "/download/10283/942/Pore_Shape_Modification_of_a_Microporous_Metal-Organic_Frame-work_Using_High_Pressure:_Accessing_a_New_Phase_with_Oversized_Guest_Molecules.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "cif"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "cif")
        record_metadata = {
            "mdf": {
                "title":
                "Metal-Organic-Frame-Work - " + record["chemical_formula"],
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": record["chemical_formula"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "cif": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/porous_mof/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #18

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                'Research Data Supporting "The microstructure and hardness of Ni-Co-Al-Ti-Cr quinary alloys"',
                "acl": ["public"],
                "source_name":
                "quinary_alloys",
                "citation": [
                    'Christofidou, K. A., Jones, N. G., Pickering, E. J., Flacau, R., Hardy, M. C., & Stone, H. J. Research Data Supporting "The microstructure and hardness of Ni-Co-Al-Ti-Cr quinary alloys" [Dataset]. https://doi.org/10.17863/CAM.705'
                ],
                "data_contact": {
                    "given_name": "Howard",
                    "family_name": "Stone",
                    "email": "*****@*****.**",
                    "institution": "University of Cambridge"
                },
                "author": [{
                    "given_name": "Howard",
                    "family_name": "Stone",
                    "email": "*****@*****.**",
                    "institution": "University of Cambridge"
                }, {
                    "given_name": "Katerina",
                    "family_name": "Christofidou",
                    "institution": "University of Cambridge",
                    "orcid": "https://orcid.org/0000-0002-8064-5874"
                }, {
                    "given_name": "Nicholas",
                    "family_name": "Jones",
                    "institution": "University of Cambridge"
                }, {
                    "given_name": "Edward",
                    "family_name": "Pickering",
                    "institution": "University of Cambridge"
                }, {
                    "given_name": "Roxana",
                    "family_name": "Flacau",
                    "institution": "University of Cambridge"
                }, {
                    "given_name": "Mark",
                    "family_name": "Hardy",
                    "institution": "University of Cambridge"
                }],
                "license":
                "http://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Ni-Co-Al-Ti-Cr Quinary Alloys",
                #            "data_format": ,
                #            "data_type": ,
                "tags": ["alloys"],
                "description":
                "DSC files, neutron diffraction data, hardness measurements, SEM and TEM images and thermodynamic simulations are provided for all alloy compositions studied and presented in this manuscript.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://www.repository.cam.ac.uk/handle/1810/256771",
                    "publication":
                    "https://doi.org/10.1016/j.jallcom.2016.07.159",
                    "data_doi": "https://doi.org/10.17863/CAM.705",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    with open(os.path.join(input_path, "alloy_data.csv"), 'r') as adata:
        raw_data = adata.read()
    for record in tqdm(parse_tab(raw_data),
                       desc="Processing records",
                       disable=not verbose):
        links = {}
        for ln in find_files(input_path, record["Alloy"]):
            key = "_".join(ln["no_root_path"].split("/")).replace(" ", "_")
            links[key] = {
                "globus_endpoint":
                "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                "http_host":
                "https://data.materialsdatafacility.org",
                "path":
                os.path.join("/collections/quinary_alloys", ln["no_root_path"],
                             ln["filename"])
            }
        links["csv"] = {
            "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
            "http_host": "https://data.materialsdatafacility.org",
            "path": "/collections/quinary_alloys/alloy_data.csv"
        }
        record_metadata = {
            "mdf": {
                "title": "Ni-Co-Al-Ti-Cr Quinary Alloys " + record["Alloy"],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,
                "composition": "NiCoAlTiCr",
                "raw": json.dumps(record),
                "links": links,  #{
                #                "landing_page": ,

                #                "publication": ,
                #                "dataset_doi": ,

                #                "related_id": ,

                #                 "csv": {

                #                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                #                    "http_host": "https://data.materialsdatafacility.org",

                #                    "path": "/collections/quinary_alloys/alloy_data.csv",
                #                    },
                #                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #               "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            },
            "quinary_alloys": {
                "atomic_composition_percent": {
                    "Ni": record["Ni"],
                    "Co": record["Co"],
                    "Al": record["Al"],
                    "Ti": record["Ti"],
                    "Cr": record["Cr"]
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    if verbose:
        print("Finished converting")

Exemple #19

0

Afficher le fichier

Fichier : cip_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Evaluation and comparison of classical interatomic potentials through a user-friendly interactive web-interface",
                "acl": ["public"],
                "source_name":
                "cip",
                "citation": [
                    "Choudhary K, Congo FYP, Liang T, Becker C, Hennig RG, Tavazza F (2017) Evaluation and comparison of classical interatomic potentials through a user-friendly interactive web-interface. Scientific Data 4: 160125. http://dx.doi.org/10.1038/sdata.2016.125",
                    "Choudhary K, Congo FYP, Liang T, Becker C, Hennig RG, Tavazza F (2017) Data from: Evaluation and comparison of classical interatomic potentials through a user-friendly interactive web-interface. Dryad Digital Repository. http://dx.doi.org/10.5061/dryad.dd56c"
                ],
                "data_contact": {
                    "given_name": "Kamal",
                    "family_name": "Choudhary",
                    "email": "*****@*****.**",
                    "institution":
                    "National Institute of Standards and Technology"

                    # IDs
                },
                "author": [
                    {
                        "given_name":
                        "Kamal",
                        "family_name":
                        "Choudhary",
                        "email":
                        "*****@*****.**",
                        "institution":
                        "National Institute of Standards and Technology"

                        # IDs
                    },
                    {
                        "given_name":
                        "Faical",
                        "family_name":
                        "Congo",
                        "institution":
                        "National Institute of Standards and Technology"

                        # IDs
                    },
                    {
                        "given_name": "Tao",
                        "family_name": "Liang",
                        "institution": "The Pennsylvania State University"

                        # IDs
                    },
                    {
                        "given_name":
                        "Chandler",
                        "family_name":
                        "Becker",
                        "institution":
                        "National Institute of Standards and Technology"

                        # IDs
                    },
                    {
                        "given_name": "Richard",
                        "family_name": "Hennig",
                        "institution": "University of Florida"

                        # IDs
                    },
                    {
                        "given_name":
                        "Francesca",
                        "family_name":
                        "Tavazza",
                        "institution":
                        "National Institute of Standards and Technology"

                        # IDs
                    }
                ],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "NIST Classical Interatomic Potentials",
                "tags": [
                    "interatomic potentials", "force-fields", "total energy",
                    "energy", "elastic matrix", "structure", "elastic modulus",
                    "JARVIS"
                ],
                "description":
                "We computed energetics and elastic properties of variety of materials such as metals and ceramics using a wide range of empirical potentials and compared them to density functional theory (DFT) as well as to experimental data, where available.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://www.ctcms.nist.gov/~knc6/periodic.html",
                    "publication": "http://dx.doi.org/10.1038/sdata.2016.125",
                    "data_doi": "http://dx.doi.org/10.5061/dryad.dd56c",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    with open(os.path.join(
            input_path, "classical_interatomic_potentials.json")) as in_file:
        for record in tqdm(json.load(in_file),
                           desc="Converting data",
                           disable=not verbose):
            record_metadata = {
                "mdf": {
                    "title":
                    "NIST Classical Interatomic Potential - " +
                    record["forcefield"] + ", " + record["composition"],
                    "acl": ["public"],

                    #                "tags": ,
                    #                "description": ,
                    "composition":
                    record["composition"],
                    "raw":
                    json.dumps(record),
                    "links": {
                        #                    "landing_page": ,

                        #                    "publication": ,
                        #                    "dataset_doi": ,

                        #                    "related_id": ,

                        # data links: {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },

                    #                "citation": ,
                    #                "data_contact": {

                    #                    "given_name": ,
                    #                    "family_name": ,

                    #                    "email": ,
                    #                    "institution":,

                    # IDs
                    #                    },

                    #                "author": ,

                    #                "license": ,
                    #                "collection": ,
                    #                "data_format": ,
                    #                "data_type": ,
                    #                "year": ,

                    #                "mrr":

                    #            "processing": ,
                    #            "structure":,
                }
            }

            # Pass each individual record to the Validator
            result = dataset_validator.write_record(record_metadata)

            # Check if the Validator accepted the record, and print a message if it didn't
            # If the Validator returns "success" == True, the record was written successfully
            if result["success"] is not True:
                print("Error:", result["message"])

    if verbose:
        print("Finished converting")

Exemple #20

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Modeling of the bacterial molecular chaperone GroEL using 3D EM data and cnmultifit",
                "acl": ["public"],
                "source_name":
                "cnmultifit_groel",
                "data_contact": {
                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California San Francisco",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Lasker, K., Velázquez-Muriel, J. A., Webb, B. M., Yang, Z., Ferrin, T. E., & Sali, A. (2012). Modeling of the bacterial molecular chaperone GroEL using 3D EM data and cnmultifit [Data set]. Methods Mol Biol. Zenodo. http://doi.org/10.5281/zenodo.46596"
                ],
                "author": [{
                    "given_name":
                    "Keren",
                    "family_name":
                    "Lasker",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Javier A.",
                    "family_name":
                    "Velázquez-Muriel",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Benjamin M.",
                    "family_name":
                    "Webb",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Zheng",
                    "family_name":
                    "Yang",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Thomas E.",
                    "family_name":
                    "Ferrin",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Andrej",
                    "family_name":
                    "Sali",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of California San Francisco",
                }],
                "license":
                "http://www.opensource.org/licenses/LGPL-2.1",
                "collection":
                "GroEL cnmultifit",
                "tags": [
                    "Integrative Modeling Platform (IMP)",
                    "Electron microscopy density map", "MODELLER", "MultiFit"
                ],
                "description":
                "These scripts demonstrate the use of IMP, MODELLER and Chimera in the modeling of the bacterial molecular chaperone GroEL. First, MODELLER is used to generate structures for the individual components in the GroEL complex. Then, IMP is used to fit these components together into the electron microscopy density map of the entire complex.",
                "year":
                2012,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.46596",
                    "publication": [
                        "https://doi.org/10.1007/978-1-61779-588-6_15",
                        "https://github.com/integrativemodeling/multifit_groel/tree/v1.0"
                    ],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/46596/files/multifit_groel-v1.0.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb$"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "GroEL cnmultifit - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "pdb": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cnmultifit_groel/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #21

0

Afficher le fichier

Fichier : xafs_sl_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "XAFS Spectra Library",
                "acl": ["public"],
                "source_name":
                "xafs_sl",
                "citation": ["http://cars.uchicago.edu/xaslib"],
                "data_contact": {
                    "given_name": "Matthew",
                    "family_name": "Newville",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago"
                },
                "author": {
                    "given_name": "Matthew",
                    "family_name": "Newville",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago"
                },

                #            "license": ,
                "collection":
                "XAFS SL",
                "tags": ["XAFS", "Spectra"],
                "description":
                "This is a collection of X-ray Absorption Spectra. The data here are intended to be of good quality, and on well-characterized samples, but no guarantees are made about either of these intentions.",
                #            "year": ,
                "links": {
                    "landing_page": "http://cars.uchicago.edu/xaslib",

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }]
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    query = "select s.id, s.name, s.rating_summary, s.collection_date, s.submission_date, s.d_spacing, s.comments, b.name, f.fullname, s1.name, s1.preparation, s1.formula, s2.name, ed.name, en.units, p.name from spectrum s, beamline b, facility f, sample s1, sample s2, edge ed, energy_units en, person p where b.id=s.beamline_id and f.id=b.facility_id and s1.id=s.sample_id and s2.id=s.reference_id and ed.id=s.edge_id and en.id=s.energy_units_id and p.id=s.person_id;"
    #Translation vars for convenience and code readability
    s_id = 0
    s_name = 1
    s_rating_summary = 2
    s_collection_date = 3
    s_submission_date = 4
    s_d_spacing = 5
    s_comments = 6
    b_name = 7
    f_fullname = 8
    s1_name = 9
    s1_preparation = 10
    s1_formula = 11
    s2_name = 12
    ed_name = 13
    en_units = 14
    p_name = 15
    #Make connection to Postgres
    with psycopg2.connect("dbname=xafs") as conn:
        with conn.cursor() as cursor:
            cursor.execute(query)  #Run query
            #Process each row
            for record in tqdm(cursor,
                               desc="Processing database",
                               disable=not verbose):
                record_metadata = {
                    "mdf": {
                        "title": record[s_name],
                        "acl": ["public"],

                        #                    "tags": ,
                        "description": record[s_comments],
                        "composition": record[s1_formula],
                        #                    "raw": ,
                        "links": {
                            "landing_page":
                            quote("http://cars.uchicago.edu/xaslib/spectrum/" +
                                  str(record[s_id]),
                                  safe="/:"),

                            #                        "publication": ,
                            #                        "dataset_doi": ,

                            #                        "related_id": ,
                            "xdi": {
                                #"globus_endpoint": ,
                                "http_host":
                                "http://cars.uchicago.edu",
                                "path":
                                "/" + quote(str(record[s_id]) + "/" +
                                            record[s_name] + ".xdi",
                                            safe="/:"),
                            },
                        },

                        #                    "citation": ,
                        #                    "data_contact": {

                        #                        "given_name": ,
                        #                        "family_name": ,

                        #                        "email": ,
                        #                        "institution":,

                        # IDs
                        #                        },

                        #                    "author": ,

                        #                    "license": ,
                        #                    "collection": ,
                        #                    "data_format": ,
                        #                    "data_type": ,
                        #                    "year": ,

                        #                    "mrr":

                        #            "processing": ,
                        #            "structure":,
                    },
                    "xafs_sl": {
                        "ratings": record[s_rating_summary],
                        "absorption_edge": record[ed_name],
                        "sample_name": record[s1_name],
                        "sample_prep": record[s1_preparation],
                        "reference_sample": record[s2_name],
                        "beamline":
                        record[f_fullname] + " - " + record[b_name],
                        "energy_units": record[en_units],
                        "d_spacing": record[s_d_spacing],
                        "date_measured": str(record[s_collection_date]),
                        #                    "date_uploaded" : str(record[s_submission_date]),
                        "user_comments": record[s_comments]
                    }
                }

                # Pass each individual record to the Validator
                result = dataset_validator.write_record(record_metadata)

                # Check if the Validator accepted the record, and print a message if it didn't
                # If the Validator returns "success" == True, the record was written successfully
                if result["success"] is not True:
                    print("Error:", result["message"])

    if verbose:
        print("Finished converting")

Exemple #22

0

Afficher le fichier

Fichier : carbonyl_sulfide_fluxes_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Dataset for \"Canopy uptake dominates nighttime carbonyl sulfide fluxes in a boreal forest\"",
                "acl": ["public"],
                "source_name":
                "carbonyl_sulfide_fluxes",
                "data_contact": {
                    "given_name":
                    "Huilin",
                    "family_name":
                    "Chen",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of Groningen, University of Colorado"
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Linda M.J. Kooijmans, Kadmiel Maseyk, Ulli Seibt, Wu Sun, Timo Vesala, Ivan Mammarella, … Huilin Chen. (2017). Dataset for \"Canopy uptake dominates nighttime carbonyl sulfide fluxes in a boreal forest\" [Data set]. Zenodo. http://doi.org/10.5281/zenodo.580303"
                ],
                "author": [{
                    "given_name": "Linda M.J.",
                    "family_name": "Kooijmans",
                    "institution": "University of Groningen",
                }, {
                    "given_name": "Kadmiel",
                    "family_name": "Maseyk",
                    "institution": "The Open University",
                }, {
                    "given_name": "Ulli",
                    "family_name": "Seibt",
                    "institution": "University of California",
                }, {
                    "given_name": "Wu",
                    "family_name": "Sun",
                    "institution": "University of California",
                }, {
                    "given_name": "Timo",
                    "family_name": "Vesala",
                    "institution": "University of Helsinki",
                }, {
                    "given_name": "Ivan",
                    "family_name": "Mammarella",
                    "institution": "University of Helsinki",
                }, {
                    "given_name": "Pasi",
                    "family_name": "Kolari",
                    "institution": "University of Helsinki",
                }, {
                    "given_name": "Juho",
                    "family_name": "Aalto",
                    "institution": "University of Helsinki",
                }, {
                    "given_name":
                    "Alessandro",
                    "family_name":
                    "Franchin",
                    "institution":
                    "University of Helsinki, University of Colorado",
                }, {
                    "given_name": "Roberta",
                    "family_name": "Vecchi",
                    "institution": "University of Milan",
                }, {
                    "given_name": "Gianluigi",
                    "family_name": "Valli",
                    "institution": "University of Milan",
                }, {
                    "given_name":
                    "Huilin",
                    "family_name":
                    "Chen",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of Groningen, University of Colorado",
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Carbonyl Sulfide Fluxes",
                #"tags": [""],
                "description":
                "Nighttime averaged ecosystem fluxes of COS and CO2 obtained through the radon-tracer and eddy-covariance method as presented in \"Canopy uptake dominates nighttime carbonyl sulfide fluxes in a boreal forest\" submitted to Atmospheric Chemistry and Physics.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.580303",
                    "publication":
                    ["https://www.atmos-chem-phys-discuss.net/acp-2017-407/"],
                    #"data_doi": "",
                    #"related_id": "",
                    "txt": {

                        #"globus_endpoint": ,
                        "http_host":
                        "https://zenodo.org",
                        "path":
                        "/record/580303/files/Kooijmans_et_al_2017_ACPD_20170516.txt",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    with open(
            os.path.join(input_path, "Kooijmans_et_al_2017_ACPD_20170516.txt"),
            "r") as raw_in:
        data = raw_in.read()
    description = "".join(data.split("\n\n")[1:2])
    start = "##########################################\n"
    for line in tqdm(parse_tab(data.split(start)[-1], sep=","),
                     desc="Processing Data",
                     disable=not verbose):
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Carbonyl Sulfide Fluxes doy: " + line["doy"],
                "acl": ["public"],
                #"composition": ,

                #"tags": ,
                "description": description,
                "raw": json.dumps(line),
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    "txt": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/carbonyl_sulfide_fluxes/Kooijmans_et_al_2017_ACPD_20170516.txt",
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #23

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Uniting Ruthenium(II) and Platinum(II) Polypyridine Centers in Heteropolymetallic Complexes Giving Strong Two-Photon Absorption",
                "acl": ["public"],
                "source_name":
                "ru_pt_complexes",
                "data_contact": {
                    "given_name": "Pengfei",
                    "family_name": "Shi",
                    "email": "*****@*****.**",
                    "institution": "Huaihai Institute of Technology",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Shi, Pengfei; Coe, Benjamin J.; Sánchez, Sergio; Wang, Daqi; Tian, Yupeng; Nyk, Marcin; Samoc, Marek (2015): Uniting Ruthenium(II) and Platinum(II) Polypyridine Centers in Heteropolymetallic Complexes Giving Strong Two-Photon Absorption. ACS Publications. https://doi.org/10.1021/acs.inorgchem.5b02089 Retrieved: 15:54, Jul 27, 2017 (GMT)"
                ],
                "author": [{
                    "given_name": "Pengfei",
                    "family_name": "Shi",
                    "email": "*****@*****.**",
                    "institution": "Huaihai Institute of Technology",
                }, {
                    "given_name": "Benjamin J.",
                    "family_name": "Coe",
                    "email": "*****@*****.**",
                    "institution": "The University of Manchester",
                }, {
                    "given_name": "Sergio",
                    "family_name": "Sánchez",
                    "institution": "The University of Manchester",
                }, {
                    "given_name": "Daqi",
                    "family_name": "Wang",
                    "institution": "Liaocheng University",
                }, {
                    "given_name": "Yupeng",
                    "family_name": "Tian",
                    "institution": "Anhui University",
                }, {
                    "given_name": "Marcin",
                    "family_name": "Nyk",
                    "institution": "Wrocław University of Technology",
                }, {
                    "given_name": "Marek",
                    "family_name": "Samoc",
                    "institution": "Wrocław University of Technology",
                }],
                "license":
                "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection":
                "Ru Pt Heteropolymetallic Complexes",
                "tags": [
                    "Heteropolymetallic Complexes", "850 nm", "834 nm",
                    "polymetallic species", "Pt coordination",
                    "spectra change", "moietie", "qpy", "MLCT",
                    "2 PA activities", "complex", "301 GM", "PtII", "RuII",
                    "523 GM", "heptanuclear RuPt 6", "absorption bands"
                ],
                "description":
                "New trinuclear RuPt2 and heptanuclear RuPt6 complex salts are prepared by attaching PtII 2,2′:6′,2″-terpyridine (tpy) moieties to RuII 4,4′:2′,2″:4″,4‴-quaterpyridine (qpy) complexes. Characterization includes single crystal X-ray structures for both polymetallic species. The visible absorption bands are primarily due to RuII → qpy metal-to-ligand charge-transfer (MLCT) transitions, according to time-dependent density functional theory (TD-DFT) calculations. These spectra change only slightly on Pt coordination, while the orange-red emission from the complexes shows corresponding small red-shifts, accompanied by decreases in intensity. Cubic molecular nonlinear optical behavior has been assessed by using Z-scan measurements. These reveal relatively high two-photon absorption (2PA) cross sections σ2, with maximal values of 301 GM at 834 nm (RuPt2) and 523 GM at 850 nm (RuPt6) when dissolved in methanol or acetone, respectively. Attaching PtII(tpy) moieties triples or quadruples the 2PA activities when compared with the RuII-based cores.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "https://figshare.com/collections/Uniting_Ruthenium_II_and_Platinum_II_Polypyridine_Centers_in_Heteropolymetallic_Complexes_Giving_Strong_Two_Photon_Absorption/2204182",
                    "publication":
                    ["https://doi.org/10.1021/acs.inorgchem.5b02089"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "(xyz|cif)"),
                          desc="Processing files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), dtype)
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Ru Pt Heteropolymetallic Complexes - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ru_pt_complexes/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")

Exemple #24

0

Afficher le fichier

Fichier : qm_mdt_c_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
        "mdf": {
            "title": "Quantum Machine - MD Trajectories of C7O2H10",
            "acl": ["public"],
            "source_name": "qm_mdt_c",
            "citation": ["S. Chmiela, A. Tkatchenko, H. E. Sauceda, I. Poltavsky, K. T. Schütt, K.-R. Müller Machine Learning of Accurate Energy-Conserving Molecular Force Fields, 2017.", "K. T. Schütt, F. Arbabzadah, S. Chmiela, K.-R. Müller, A. Tkatchenko Quantum-Chemical Insights from Deep Tensor Neural Networks, Nat. Commun. 8, 13890, 2017."],

            "data_contact": {

                "given_name": "Alexandre",
                "family_name": "Tkatchenko",

                "email": "*****@*****.**",
                "institution": "University of Luxembourg"

                # IDs
                },

            "author": [{

                "given_name": "Alexandre",
                "family_name": "Tkatchenko",

                "email": "*****@*****.**",
                "institution": "University of Luxembourg"

                # IDs
                },
                {

                "given_name": "Kristof",
                "family_name": "Schütt",

                "institution": "Technical University of Berlin"

                # IDs
                },
                {

                "given_name": "Farhad",
                "family_name": "Arbabzadah",

                "institution": "Technical University of Berlin"

                # IDs
                },
                {

                "given_name": "Stefan",
                "family_name": "Chmiela",

                "institution": "Technical University of Berlin"

                # IDs
                },
                {
                "given_name": "Klaus",
                "family_name": "Müller",

                "institution": "Technical University of Berlin"
                }],

#            "license": ,

            "collection": "Quantum Machine",
            "tags": ["molecular", "dynamics", "trajectories", "DFT", "density functional theory", "PBE", "exchange", "simulation"],

            "description": "This data set consists of molecular dynamics trajectories of 113 randomly selected C7O2H10 isomers calculated at a temperature of 500 K and resolution of 1fs using density functional theory with the PBE exchange-correlation potential.",
            "year": 2016,

            "links": {

                "landing_page": "http://quantum-machine.org/datasets/#C7O2H10",

                "publication": ["https://dx.doi.org/10.1038/ncomms13890"],
#                "dataset_doi": ,

#                "related_id": ,

                "tar_gz": {

                    #"globus_endpoint": ,
                    "http_host": "http://quantum-machine.org",

                    "path": "/data/c7o2h10_md.tar.gz",
                    }
                },

#            "mrr": ,

            "data_contributor": [{
                "given_name": "Jonathon",
                "family_name": "Gaff",
                "email": "*****@*****.**",
                "institution": "The University of Chicago",
                "github": "jgaff"
                }]
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")


    dataset_validator = Validator(dataset_metadata)


    # Get the data
    for file_data in tqdm(find_files(os.path.join(input_path, "c7o2h10_md"), "xyz"), desc="Processing QM_MDT_C", disable= not verbose):
        file_path = os.path.join(file_data["path"], file_data["filename"])
        record = parse_ase(file_path, "xyz")
        record_metadata = {
        "mdf": {
            "title": "MD Trajectories of C7O2H10 - " + record.get("chemical_formula", "") + " - " + file_data["filename"],
            "acl": ["public"],

#            "tags": ,
#            "description": ,
            
            "composition": record.get("chemical_formula", ""),
#            "raw": ,

            "links": {
                "landing_page": "https://data.materialsdatafacility.org/collections/test/md_trajectories_of_c7o2h10/c7o2h10_md/" + file_data["no_root_path"] + "/" if file_data["no_root_path"] else "" + file_data["filename"],

#                "publication": ,
#                "dataset_doi": ,

#                "related_id": ,

                "xyz": {
 
                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                    "http_host": "https://data.materialsdatafacility.org",

                    "path": "/collections/test/md_trajectories_of_c7o2h10/c7o2h10_md/" + file_data["filename"],
                    },
                "energy": {
                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                    "http_host": "https://data.materialsdatafacility.org",

                    "path": "/collections/test/md_trajectories_of_c7o2h10/c7o2h10_md/" + file_data["filename"].replace(".xyz", "") + ".energy.dat"
                    }
                },

#            "citation": ,
#            "data_contact": {

#                "given_name": ,
#                "family_name": ,

#                "email": ,
#                "institution":,

                # IDs
#                },

#            "author": ,

#            "license": ,
#            "collection": ,
#            "data_format": ,
#            "data_type": ,
#            "year": ,

#            "mrr":

#            "processing": ,
#            "structure":,
            },
            "qm_mdt_c": {
            "temperature" : {
                "value": 500,
                "unit": "kelvin"
                },
            "resolution" : {
                "value" : 1,
                "unit" : "femtosecond"
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])


    if verbose:
        print("Finished converting")

Exemple #25

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
        "mdf": {
            "title": "Materials Commons Data",
            "acl": ["public"],
            "source_name": "materials_commons",
            "citation": ["Puchala, B., Tarcea, G., Marquis, E.A. et al. JOM (2016) 68: 2035. doi:10.1007/s11837-016-1998-7"],
            "data_contact": {

                "given_name": "Brian",
                "family_name": "Puchala",

                "email": "*****@*****.**",
                "institution": "University of Michigan",
                "orcid": "https://orcid.org/0000-0002-2461-6614"

                },

            "author": [{

                "given_name": "Brian",
                "family_name": "Puchala",

                "email": "*****@*****.**",
                "institution": "University of Michigan",
                "orcid": "https://orcid.org/0000-0002-2461-6614"

                },
                {

                "given_name": "Glenn",
                "family_name": "Tarcea",

                "institution": "University of Michigan",

                },
                {

                "given_name": "Emmanuelle",
                "family_name": "Marquis",

                "institution": "University of Michigan",

                },
                {

                "given_name": "Margaret",
                "family_name": "Hedstrom",

                "institution": "University of Michigan",

                },
                {

                "given_name": "Hosagrahar",
                "family_name": "Jagadish",

                "institution": "University of Michigan",

                },
                {

                "given_name": "John",
                "family_name": "Allison",

                "institution": "University of Michigan",

                }],

#            "license": ,

            "collection": "Materials Commons",
            "tags": ["materials"],

            "description": "A platform for sharing research data.",
            "year": 2016,

            "links": {

                "landing_page": "https://materialscommons.org/mcpub/",

                "publication": "https://dx.doi.org/10.1007/s11837-016-1998-7",
#                "dataset_doi": ,

#                "related_id": ,

                # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

#            "mrr": ,

            "data_contributor": {
                "given_name": "Jonathon",
                "family_name": "Gaff",
                "email": "*****@*****.**",
                "institution": "The University of Chicago",
                "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")


    dataset_validator = Validator(dataset_metadata)


    # Get the data
    for dir_data in tqdm(find_files(input_path, file_pattern="json", verbose=verbose), desc="Processing metadata", disable= not verbose):
        with open(os.path.join(dir_data["path"], dir_data["filename"])) as file_data:
            mc_data = json.load(file_data)
        record_metadata = {
        "mdf": {
            "title": mc_data["title"],
            "acl": ["public"],

            "tags": mc_data["keywords"],
            "description": mc_data["description"],
            
#            "composition": ,
#            "raw": ,

            "links": {
                "landing_page": "https://materialscommons.org/mcpub/#/details/" + mc_data["id"],

                "publication": mc_data["doi"],
#                "dataset_doi": ,

#                "related_id": ,

                # data links: {
 
                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },

#            "citation": ,
#            "data_contact": {

#                "given_name": ,
#                "family_name": ,

#                "email": ,
#                "institution":,

                # IDs
#                },

#            "author": ,

#            "license": mc_data["license"]["link"],
#            "collection": ,
#            "data_format": ,
#            "data_type": ,
            "year": int(mc_data.get("published_date", "0000")[:4]),

#            "mrr":

#            "processing": ,
#            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])


    if verbose:
        print("Finished converting")

Exemple #26

0

Afficher le fichier

Fichier : trinkle_mg_x_diffusion_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Mg-X-Diffusion",
                "acl": ['public'],
                "source_name":
                "trinkle_mg_x_diffusion",
                "citation": [
                    "Citation for dataset Mg-X-Diffusion with author(s): Dallas Trinkle, Ravi Agarwal"
                ],
                "data_contact": {
                    "given_name": "Dallas",
                    "family_name": "Trinkle",
                    "email": "*****@*****.**",
                    "institution":
                    "University of Illinois at Urbana-Champaign",
                },
                "author": [{
                    "given_name":
                    "Dallas",
                    "family_name":
                    "Trinkle",
                    "email":
                    "*****@*****.**",
                    "instituition":
                    "University of Illinois at Urbana-Champaign"
                }, {
                    "given_name":
                    "Ravi",
                    "family_name":
                    "Agarwal",
                    "institution":
                    "University of Illinois at Urbana-Champaign"
                }],

                #"license": "",
                "collection":
                "Mg-X Diffusion Dataset",
                #            "tags": ,

                #"description": ,
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://data.materialsdatafacility.org/published/#trinkle_mg_x_diffusion",

                    # "publication": [""],
                    #"data_doi": "",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    total_errors = 0
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]),
                "vasp-out")
        except Exception as e:
            #print("Error on: " + data_file["path"] + "/" + data_file["filename"] + "\n" + repr(e))
            total_errors += 1
        record_metadata = {
            "mdf": {
                "title": "Mg-X Diffusion - ",
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,

                #"composition": ,
                #            "raw": ,
                "links": {
                    #"landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "outcar": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/mg-x/" + data_file["no_root_path"] +
                        "/" + data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }
        try:
            record_metadata["mdf"]["composition"] = record["mdf"][
                "chemical_formula"]
            record_metadata["mdf"]["title"] += record["mdf"][
                "chemical_formula"]
        except:
            #parse_ase unable to read composition of record 1386: https://data.materialsdatafacility.org/collections/mg-x/Elements/Eu/Mg-X_Eu/OUTCAR
            #Placing in the correct material composition
            record_metadata["mdf"]["composition"] = "EuMg149"
            record_metadata["mdf"]["title"] += "EuMg149"

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Total errors: " + str(total_errors))
        print("Finished converting")

Exemple #27

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Ab initio calculations of the lattice parameter and elastic stiffness coefficients of bcc Fe with solutes",
                "acl": ["public"],
                "source_name":
                "trinkle_elastic_fe_bcc",
                "citation": [
                    "M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Comp. Mat. Sci. 126, 503 (2017).",
                    "M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Data in Brief 10, 147 (2017)."
                ],
                "data_contact": {
                    "given_name": "Michael",
                    "family_name": "Fellinger",
                    "email": "*****@*****.**",
                    "institution": "University of Illinois",
                },
                "author": [{
                    "given_name": "Michael",
                    "family_name": "Fellinger",
                    "email": "*****@*****.**",
                    "institution": "University of Illinois",
                }, {
                    "given_name": "Dallas",
                    "family_name": "Trinkle",
                    "institution": "University of Illinois",
                }, {
                    "given_name": "Louis",
                    "family_name": "Hector Jr.",
                    "institution": "General Motors",
                }],
                "license":
                "http://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Elastic Fe BCC",
                "tags": ["dft"],
                "description":
                "We introduce a solute strain misfit tensor that quantifies how solutes change the lattice parameter.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://materialsdata.nist.gov/dspace/xmlui/handle/11256/671",
                    "publication": [
                        "http://dx.doi.org/10.1016/j.commatsci.2016.09.040",
                        "http://dx.doi.org/10.1016/j.dib.2016.11.092"
                    ],
                    "data_doi":
                    "http://hdl.handle.net/11256/671",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp-out")
        record_metadata = {
            "mdf": {
                "title": "Elastic BCC - " + data["chemical_formula"],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,
                "composition": data["chemical_formula"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,
                    "outcar": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/" + data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    if verbose:
        print("Finished converting")

Exemple #28

0

Afficher le fichier

Fichier : jcap_xps_spectral_db_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title": "JCAP XPS Spectral Database",
                "acl": ["public"],
                "source_name": "jcap_xps_spectral_db",
                "citation": ["http://solarfuelshub.org/xps-spectral-database"],
                "data_contact": {
                    "given_name": "Harry",
                    "family_name": "Atwater",
                    "email": "*****@*****.**",
                    "institution":
                    "Joint Center for Artificial Photosynthesis",
                },

                #            "author": ,

                #            "license": ,
                "collection": "JCAP XPS Spectral DB",
                "tags": ["xps", "spectra"],
                "description":
                "The JCAP High Throughput Experimentation research team uses combinatorial methods to quickly identify promising light absorbers and catalysts for solar-fuel devices. Pure-phase materials — including metal oxides, nitrides, sulfides, oxinitrides, and other single- and mixed-metal materials — are prepared using multiple deposition techniques (e.g., physical vapor deposition, inkjet printing, and micro-fabrication) on various substrates. High-resolution X-ray photoelectron spectroscopy (XPS) spectra for materials that have been characterized to date are made available here as part of JCAP's Materials Characterization Standards (MatChS) database.",
                #            "year": ,
                "links": {
                    "landing_page":
                    "http://solarfuelshub.org/xps-spectral-database",

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    for data_file in tqdm(find_files(input_path, ".json"),
                          desc="Processing files",
                          disable=not verbose):
        with open(os.path.join(data_file["path"],
                               data_file["filename"])) as in_file:
            data = json.load(in_file)
        record_metadata = {
            "mdf": {
                "title": "JCAP Spectra - " + data["xps_region"],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,
                "composition": data.pop("material"),
                #            "raw": ,
                "links": {
                    "landing_page": data.pop("link"),

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                "year": data.pop("year"),

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }
        data.pop("data")
        record_metadata["jcap_xps_spectral_db"] = data

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    if verbose:
        print("Finished converting")

Exemple #29

0

Afficher le fichier

Fichier : fretr_bayesian_restraint_converter.py Projet : maxhutch/forge

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Benchmark of the FRETR Bayesian restraint",
                "acl": ["public"],
                "source_name": "fretr_bayesian_restraint",

                "data_contact": {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Bonomi, M., Pellarin, R., Kim, S. J., Russel, D., Sundin, B. A., Riffle, M., … Sali, A. (2014). Benchmark of the FRETR Bayesian restraint [Data set]. Mol Cell Proteomics. Zenodo. http://doi.org/10.5281/zenodo.46558"],

                "author": [{

                    "given_name": "Massimiliano",
                    "family_name": "Bonomi",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco, University of Cambridge",

                },
                {

                    "given_name": "Riccardo",
                    "family_name": "Pellarin",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Seung Joong",
                    "family_name": "Kim",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Daniel",
                    "family_name": "Russel",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Bryan A.",
                    "family_name": "Sundin",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Michael",
                    "family_name": "Riffle",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Daniel",
                    "family_name": "Jaschob",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Richard",
                    "family_name": "Ramsden",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Trisha N.",
                    "family_name": "Davis",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Eric G. D.",
                    "family_name": "Muller",
                    "email": "*****@*****.**",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",

                }],

                "license": "http://www.opensource.org/licenses/LGPL-2.1",
                "collection": "FRETR Bayesian Restraint",
                "tags": ["Integrative Modeling Platform (IMP)", "Benchmark", "Förster resonance energy transfer (FRET)"],
                "description": "The use of in vivo Förster resonance energy transfer (FRET) data to determine the molecular architecture of a protein complex in living cells is challenging due to data sparseness, sample heterogeneity, signal contributions from multiple donors and acceptors, unequal fluorophore brightness, photobleaching, flexibility of the linker connecting the fluorophore to the tagged protein, and spectral cross-talk. We addressed these challenges by using a Bayesian approach that produces the posterior probability of a model, given the input data. The posterior probability is defined as a function of the dependence of our FRET metric FRETR on a structure (forward model), a model of noise in the data, as well as prior information about the structure, relative populations of distinct states in the sample, forward model parameters, and data noise.",
                "year": 2014,

                "links": {

                    "landing_page": "https://zenodo.org/record/46558",
                    "publication": ["https://doi.org/10.1074/mcp.M114.040824", "https://github.com/integrativemodeling/fret_benchmark/tree/v1.0"],
                    "data_doi": "https://doi.org/10.5281/zenodo.46558",
                    #"related_id": "",

                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",

                        "path": "/record/46558/files/fret_benchmark-v1.0.zip",

                    },

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb$"), desc="Processing Files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "FRETR Bayesian Restraint - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    "pdb": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/fretr_bayesian_restraint/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")

Exemple #30

0

Afficher le fichier

def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Prediction of Compounds in Different Local SAR Environments using ECP",
                "acl": ["public"],
                "source_name":
                "ecp_sar_environments",
                "data_contact": {
                    "given_name":
                    "Jürgen",
                    "family_name":
                    "Bajorath",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Namasivayam, V., Gupta-Ostermann, D., Balfer, J., Heikamp, K., & Bajorath, J. (2014). Prediction of Compounds in Different Local SAR Environments using ECP [Data set]. Zenodo. http://doi.org/10.5281/zenodo.8626"
                ],
                "author": [{
                    "given_name":
                    "Vigneshwaran",
                    "family_name":
                    "Namasivayam",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Disha",
                    "family_name":
                    "Gupta-Ostermann",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Jenny",
                    "family_name":
                    "Balfer",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Kathrin",
                    "family_name":
                    "Heikamp",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Jürgen",
                    "family_name":
                    "Bajorath",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "SAR Environments using ECP",
                #"tags": [""],
                "description":
                "Active compounds can participate in different local structure–activity relationship (SAR) environments and introduce different degrees of local SAR discontinuity, depending on their structural and potency relationships in data sets. Such SAR features have thus far mostly been analyzed using descriptive approaches, in particular, on the basis of activity landscape modeling. However, compounds in different local SAR environments have not yet been predicted. Herein, we adapt the emerging chemical patterns (ECP) method, a machine learning approach for compound classification, to systematically predict compounds with different local SAR characteristics. ECP analysis is shown to accurately assign many compounds to different local SAR environments across a variety of activity classes covering the entire range of observed local SARs.",
                "year":
                2014,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.8626",
                    "publication":
                    ["http://pubs.acs.org/doi/abs/10.1021/ci500147b"],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/8626/files/Data_sets.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "sdf"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), "sdf")
        except Exception as e:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "SAR Environments using ECP - " + record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "sdf": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ecp_sar_environments/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("ERRORS: " + str(errors))
        print("Finished converting")