def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "The ‘SAR Matrix’ method and its extensions for applications in medicinal chemistry and chemogenomics",
                "acl": ["public"],
                "source_name": "sar_chemogenomics",

                "data_contact": {
                    
                    "given_name": "Jürgen",
                    "family_name": "Bajorath",
                    "email": "*****@*****.**",
                    "institution": "Rheinische Friedrich-Wilhelms-Universität",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Gupta-Ostermann, D., & Bajorath, J. (2014). The ‘SAR Matrix’ method and its extensions for applications in medicinal chemistry and chemogenomics [Data set]. F1000Research. Zenodo. http://doi.org/10.5281/zenodo.10457"],

                "author": [{

                    "given_name": "Disha",
                    "family_name": "Gupta-Ostermann",
                    "institution": "Rheinische Friedrich-Wilhelms-Universität",

                },
                {

                    "given_name": "Jürgen",
                    "family_name": "Bajorath",
                    "email": "*****@*****.**",
                    "institution": "Rheinische Friedrich-Wilhelms-Universität",

                }],

                "license": "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection": "SAR Chemogenomics",
                #"tags": [""],
                "description": "We describe the ‘Structure-Activity Relationship (SAR) Matrix’ (SARM) methodology that is based upon a special two-step application of the matched molecular pair (MMP) formalism. The SARM method has originally been designed for the extraction, organization, and visualization of compound series and associated SAR information from compound data sets.",
                "year": 2014,

                "links": {

                    "landing_page": "https://doi.org/10.5281/zenodo.10457",
                    "publication": ["https://f1000research.com/articles/3-113/v2"],
                    #"data_doi": "",
                    #"related_id": ,

                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",

                        "path": "/record/10457/files/Cpd_data_sets.zip",
                        },
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors=0
    for data_file in tqdm(find_files(input_path, "sdf"), desc="Processing files", disable=not verbose):
        try:
            record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "sdf")
        except Exception as e:
            errors+=1
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "SAR Chemogenomics - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "sdf": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/sar_chemogenoomics/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("ERRORS: " + str(errors))
        print("Finished converting")
Exemple #2
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Elemental vacancy diffusion database from high-throughput first-principles calculations for fcc and hcp structures",
                "acl": ["public"],
                "source_name": "nist_fcc_hcp_structures",

                "data_contact": {
                    
                    "given_name": "Dane",
                    "family_name": "Morgan",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Angsten, Thomas; Mayeshiba, Tam; Wu, Henry; Morgan, Dane Elemental vacancy diffusion for fcc and hcp structures (2014-08-08) http://hdl.handle.net/11256/76"],

                "author": [{

                    "given_name": "Thomas",
                    "family_name": "Angsten",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Tam",
                    "family_name": "Mayeshiba",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Henry",
                    "family_name": "Wu",
                    "institution": "University of Wisconsin-Madison",

                },
                {

                    "given_name": "Dane",
                    "family_name": "Morgan",
                    "email": "*****@*****.**",
                    "institution": "University of Wisconsin-Madison",

                }],

                #"license": "",  NO LICENSE ON SITE... CONTACT AUTHOR
                "collection": "NIST fcc hcp Structures",
                #"tags": [""],
                "description": "This work demonstrates how databases of diffusion-related properties can be developed from high-throughput ab initio calculations. The formation and migration energies for vacancies of all adequately stable pure elements in both the face-centered cubic (fcc) and hexagonal close packing (hcp) crystal structures were determined using ab initio calculations. For hcp migration, both the basal plane and z-direction nearest-neighbor vacancy hops were considered. Energy barriers were successfully calculated for 49 elements in the fcc structure and 44 elements in the hcp structure.",
                "year": 2014,

                "links": {

                    "landing_page": "http://hdl.handle.net/11256/76",
                    "publication": ["http://dx.doi.org/10.1088/1367-2630/16/1/015018"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors=0
    for data_file in tqdm(find_files(input_path, "OUTCAR"), desc="Processing files", disable=not verbose):
        try:
            record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "vasp-out")
        except Exception as e:
            errors+=1
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "NIST fcc hcp structures - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": ,

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "outcar": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/nist_fcc_hcp_structures/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Errors: " + str(errors))
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Halogen-Substituted Ionic Liquids",
                "acl": ["public"],
                "source_name": "halogen_ionic_liquids",

                "data_contact": {
                    
                    "given_name": "Vitaly V.",
                    "family_name": "Chaban",
                    "email": "*****@*****.**",
                    "institution": "University of Rochester",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Chaban, Vitaly V. (2016). Halogen-Substituted Ionic Liquids [Data set]. Zenodo. http://doi.org/10.5281/zenodo.165493"],

                "author": [{

                    "given_name": "Vitaly V.",
                    "family_name": "Chaban",
                    "email": "*****@*****.**",
                    "institution": "Universidade Federal de São Paulo",

                }],

                "license": "https://creativecommons.org/licenses/by/4.0/",
                "collection": "Halogen Substituted Ionic Liquids",
                #"tags": [""],
                "description": "Pre-equilibrated systems for different size for AIMD for Halogen-Substituted Ionic Liquids.",
                "year": 2016,

                "links": {

                    "landing_page": "https://doi.org/10.5281/zenodo.165493",
                    #"publication": [""],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Halogen Ionic Liquids - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/halogen_ionic_liquids/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Theoretical Investigations and Density Functional Theory Based Quantitative Structure–Activity Relationships Model for Novel Cytotoxic Platinum(IV) Complexes",
                "acl": ["public"],
                "source_name": "cytotoxic_pt_complexes",

                "data_contact": {
                    
                    "given_name": "Markus",
                    "family_name": "Galanski",
                    "email": "*****@*****.**",
                    "institution": "University of Vienna",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Keppler, Bernhard K. (2013/01/10). Theoretical Investigations and Density Functional Theory Based Quantitative Structure–Activity Relationships Model for Novel Cytotoxic Platinum(IV) Complexes. Journal of Medicinal Chemistry, 56, 330-344. doi: 10.1021/jm3016427"],

                "author": [{

                    "given_name": "Hristo P.",
                    "family_name": "Varbanov",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Michael A.",
                    "family_name": "Jakupec",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Alexander",
                    "family_name": "Roller",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Frank",
                    "family_name": "Jensen",
                    "email": "*****@*****.**",
                    "institution": "University of Aarhus",

                },
                {

                    "given_name": "Markus",
                    "family_name": "Galanski",
                    "email": "*****@*****.**",
                    "institution": "University of Vienna",

                },
                {

                    "given_name": "Bernhard K.",
                    "family_name": "Keppler",
                    "institution": "University of Vienna",

                }],

                "license": "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection": "Cytotoxic Platinum Complexes",
                "tags": ["structure geometry", "series", "resistance", "Herein", "laboratory", "tetraki", "tris", "Relationship", "wb 97x", "mechanism", "cisplatin", "complex", "SW", "Cytotoxic", "calculation", "relationship", "Density Functional Theory", "DFT", "Reliable", "ComplexesOctahedral", "bi", "compound", "Quantitative", "Model", "QSAR investigations", "cytotoxicity", "candidate", "cell line CH 1", "descriptor", "optimization", "QSAR models", "toxicity", "Theoretical Investigations"],
                "description": "Octahedral platinum(IV) complexes are promising candidates in the fight against cancer. In order to rationalize the further development of this class of compounds, detailed studies on their mechanisms of action, toxicity, and resistance must be provided and structure–activity relationships must be drawn. Herein, we report on theoretical and QSAR investigations of a series of 53 novel bis-, tris-, and tetrakis(carboxylato)platinum(IV) complexes, synthesized and tested for cytotoxicity in our laboratories. ",
                "year": 2012,

                "links": {

                    "landing_page": "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3557934/",
                    "publication": ["https://dx.doi.org/10.1021%2Fjm3016427"],
                    #"data_doi": "",
                    #"related_id": ,

                    "cif": {

                        #"globus_endpoint": ,
                        "http_host": "https://ndownloader.figshare.com",

                        "path": "/files/3593325",
                        },
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "temp_file.cif"), desc="Processing files", disable=not verbose):
        #Temp_file is the same as the real file, but with authors and adresses deleted so that ase can read composition
        #It should only be used for converting purposes
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "cif")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Cytotoxic Platinum Complexes - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "cif": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/cytotoxic_pt_complexes/" + "jm3016427_si_002.cif",
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemple #5
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Uniting Ruthenium(II) and Platinum(II) Polypyridine Centers in Heteropolymetallic Complexes Giving Strong Two-Photon Absorption",
                "acl": ["public"],
                "source_name":
                "ru_pt_complexes",
                "data_contact": {
                    "given_name": "Pengfei",
                    "family_name": "Shi",
                    "email": "*****@*****.**",
                    "institution": "Huaihai Institute of Technology",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Shi, Pengfei; Coe, Benjamin J.; Sánchez, Sergio; Wang, Daqi; Tian, Yupeng; Nyk, Marcin; Samoc, Marek (2015): Uniting Ruthenium(II) and Platinum(II) Polypyridine Centers in Heteropolymetallic Complexes Giving Strong Two-Photon Absorption. ACS Publications. https://doi.org/10.1021/acs.inorgchem.5b02089 Retrieved: 15:54, Jul 27, 2017 (GMT)"
                ],
                "author": [{
                    "given_name": "Pengfei",
                    "family_name": "Shi",
                    "email": "*****@*****.**",
                    "institution": "Huaihai Institute of Technology",
                }, {
                    "given_name": "Benjamin J.",
                    "family_name": "Coe",
                    "email": "*****@*****.**",
                    "institution": "The University of Manchester",
                }, {
                    "given_name": "Sergio",
                    "family_name": "Sánchez",
                    "institution": "The University of Manchester",
                }, {
                    "given_name": "Daqi",
                    "family_name": "Wang",
                    "institution": "Liaocheng University",
                }, {
                    "given_name": "Yupeng",
                    "family_name": "Tian",
                    "institution": "Anhui University",
                }, {
                    "given_name": "Marcin",
                    "family_name": "Nyk",
                    "institution": "Wrocław University of Technology",
                }, {
                    "given_name": "Marek",
                    "family_name": "Samoc",
                    "institution": "Wrocław University of Technology",
                }],
                "license":
                "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection":
                "Ru Pt Heteropolymetallic Complexes",
                "tags": [
                    "Heteropolymetallic Complexes", "850 nm", "834 nm",
                    "polymetallic species", "Pt coordination",
                    "spectra change", "moietie", "qpy", "MLCT",
                    "2 PA activities", "complex", "301 GM", "PtII", "RuII",
                    "523 GM", "heptanuclear RuPt 6", "absorption bands"
                ],
                "description":
                "New trinuclear RuPt2 and heptanuclear RuPt6 complex salts are prepared by attaching PtII 2,2′:6′,2″-terpyridine (tpy) moieties to RuII 4,4′:2′,2″:4″,4‴-quaterpyridine (qpy) complexes. Characterization includes single crystal X-ray structures for both polymetallic species. The visible absorption bands are primarily due to RuII → qpy metal-to-ligand charge-transfer (MLCT) transitions, according to time-dependent density functional theory (TD-DFT) calculations. These spectra change only slightly on Pt coordination, while the orange-red emission from the complexes shows corresponding small red-shifts, accompanied by decreases in intensity. Cubic molecular nonlinear optical behavior has been assessed by using Z-scan measurements. These reveal relatively high two-photon absorption (2PA) cross sections σ2, with maximal values of 301 GM at 834 nm (RuPt2) and 523 GM at 850 nm (RuPt6) when dissolved in methanol or acetone, respectively. Attaching PtII(tpy) moieties triples or quadruples the 2PA activities when compared with the RuII-based cores.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "https://figshare.com/collections/Uniting_Ruthenium_II_and_Platinum_II_Polypyridine_Centers_in_Heteropolymetallic_Complexes_Giving_Strong_Two_Photon_Absorption/2204182",
                    "publication":
                    ["https://doi.org/10.1021/acs.inorgchem.5b02089"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "(xyz|cif)"),
                          desc="Processing files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), dtype)
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Ru Pt Heteropolymetallic Complexes - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ru_pt_complexes/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
Exemple #6
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
        "mdf": {
            "title": "Quantum Machine - MD Trajectories of C7O2H10",
            "acl": ["public"],
            "source_name": "qm_mdt_c",
            "citation": ["S. Chmiela, A. Tkatchenko, H. E. Sauceda, I. Poltavsky, K. T. Schütt, K.-R. Müller Machine Learning of Accurate Energy-Conserving Molecular Force Fields, 2017.", "K. T. Schütt, F. Arbabzadah, S. Chmiela, K.-R. Müller, A. Tkatchenko Quantum-Chemical Insights from Deep Tensor Neural Networks, Nat. Commun. 8, 13890, 2017."],

            "data_contact": {

                "given_name": "Alexandre",
                "family_name": "Tkatchenko",

                "email": "*****@*****.**",
                "institution": "University of Luxembourg"

                # IDs
                },

            "author": [{

                "given_name": "Alexandre",
                "family_name": "Tkatchenko",

                "email": "*****@*****.**",
                "institution": "University of Luxembourg"

                # IDs
                },
                {

                "given_name": "Kristof",
                "family_name": "Schütt",

                "institution": "Technical University of Berlin"

                # IDs
                },
                {

                "given_name": "Farhad",
                "family_name": "Arbabzadah",

                "institution": "Technical University of Berlin"

                # IDs
                },
                {

                "given_name": "Stefan",
                "family_name": "Chmiela",

                "institution": "Technical University of Berlin"

                # IDs
                },
                {
                "given_name": "Klaus",
                "family_name": "Müller",

                "institution": "Technical University of Berlin"
                }],

#            "license": ,

            "collection": "Quantum Machine",
            "tags": ["molecular", "dynamics", "trajectories", "DFT", "density functional theory", "PBE", "exchange", "simulation"],

            "description": "This data set consists of molecular dynamics trajectories of 113 randomly selected C7O2H10 isomers calculated at a temperature of 500 K and resolution of 1fs using density functional theory with the PBE exchange-correlation potential.",
            "year": 2016,

            "links": {

                "landing_page": "http://quantum-machine.org/datasets/#C7O2H10",

                "publication": ["https://dx.doi.org/10.1038/ncomms13890"],
#                "dataset_doi": ,

#                "related_id": ,

                "tar_gz": {

                    #"globus_endpoint": ,
                    "http_host": "http://quantum-machine.org",

                    "path": "/data/c7o2h10_md.tar.gz",
                    }
                },

#            "mrr": ,

            "data_contributor": [{
                "given_name": "Jonathon",
                "family_name": "Gaff",
                "email": "*****@*****.**",
                "institution": "The University of Chicago",
                "github": "jgaff"
                }]
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")


    dataset_validator = Validator(dataset_metadata)


    # Get the data
    for file_data in tqdm(find_files(os.path.join(input_path, "c7o2h10_md"), "xyz"), desc="Processing QM_MDT_C", disable= not verbose):
        file_path = os.path.join(file_data["path"], file_data["filename"])
        record = parse_ase(file_path, "xyz")
        record_metadata = {
        "mdf": {
            "title": "MD Trajectories of C7O2H10 - " + record.get("chemical_formula", "") + " - " + file_data["filename"],
            "acl": ["public"],

#            "tags": ,
#            "description": ,
            
            "composition": record.get("chemical_formula", ""),
#            "raw": ,

            "links": {
                "landing_page": "https://data.materialsdatafacility.org/collections/test/md_trajectories_of_c7o2h10/c7o2h10_md/" + file_data["no_root_path"] + "/" if file_data["no_root_path"] else "" + file_data["filename"],

#                "publication": ,
#                "dataset_doi": ,

#                "related_id": ,

                "xyz": {
 
                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                    "http_host": "https://data.materialsdatafacility.org",

                    "path": "/collections/test/md_trajectories_of_c7o2h10/c7o2h10_md/" + file_data["filename"],
                    },
                "energy": {
                    "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                    "http_host": "https://data.materialsdatafacility.org",

                    "path": "/collections/test/md_trajectories_of_c7o2h10/c7o2h10_md/" + file_data["filename"].replace(".xyz", "") + ".energy.dat"
                    }
                },

#            "citation": ,
#            "data_contact": {

#                "given_name": ,
#                "family_name": ,

#                "email": ,
#                "institution":,

                # IDs
#                },

#            "author": ,

#            "license": ,
#            "collection": ,
#            "data_format": ,
#            "data_type": ,
#            "year": ,

#            "mrr":

#            "processing": ,
#            "structure":,
            },
            "qm_mdt_c": {
            "temperature" : {
                "value": 500,
                "unit": "kelvin"
                },
            "resolution" : {
                "value" : 1,
                "unit" : "femtosecond"
                }
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])


    if verbose:
        print("Finished converting")
Exemple #7
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Prediction of Compounds in Different Local SAR Environments using ECP",
                "acl": ["public"],
                "source_name":
                "ecp_sar_environments",
                "data_contact": {
                    "given_name":
                    "Jürgen",
                    "family_name":
                    "Bajorath",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Namasivayam, V., Gupta-Ostermann, D., Balfer, J., Heikamp, K., & Bajorath, J. (2014). Prediction of Compounds in Different Local SAR Environments using ECP [Data set]. Zenodo. http://doi.org/10.5281/zenodo.8626"
                ],
                "author": [{
                    "given_name":
                    "Vigneshwaran",
                    "family_name":
                    "Namasivayam",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Disha",
                    "family_name":
                    "Gupta-Ostermann",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Jenny",
                    "family_name":
                    "Balfer",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Kathrin",
                    "family_name":
                    "Heikamp",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }, {
                    "given_name":
                    "Jürgen",
                    "family_name":
                    "Bajorath",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Rheinische Friedrich-Wilhelms-Universität Bonn",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "SAR Environments using ECP",
                #"tags": [""],
                "description":
                "Active compounds can participate in different local structure–activity relationship (SAR) environments and introduce different degrees of local SAR discontinuity, depending on their structural and potency relationships in data sets. Such SAR features have thus far mostly been analyzed using descriptive approaches, in particular, on the basis of activity landscape modeling. However, compounds in different local SAR environments have not yet been predicted. Herein, we adapt the emerging chemical patterns (ECP) method, a machine learning approach for compound classification, to systematically predict compounds with different local SAR characteristics. ECP analysis is shown to accurately assign many compounds to different local SAR environments across a variety of activity classes covering the entire range of observed local SARs.",
                "year":
                2014,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.8626",
                    "publication":
                    ["http://pubs.acs.org/doi/abs/10.1021/ci500147b"],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/8626/files/Data_sets.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "sdf"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), "sdf")
        except Exception as e:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "SAR Environments using ECP - " + record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "sdf": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ecp_sar_environments/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("ERRORS: " + str(errors))
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Data for the article \"Performance of SCAN density functional method for a set of ionic liquids\"",
                "acl": ["public"],
                "source_name": "scan_ionic_liquids",

                "data_contact": {
                    
                    "given_name": "Vladislav",
                    "family_name": "Ivaništšev",
                    "email": "*****@*****.**",
                    "institution": "University of Tartu",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Karu, Karl, Ers, Heigo, Mišin, Maksim, Sun, Jianwei, & Ivaništšev, Vladislav. (2017). Data for the article \"Performance of SCAN density functional method for a set of ionic liquids\" [Data set]. Zenodo. http://doi.org/10.5281/zenodo.495089"],

                "author": [{

                    "given_name": "Karl",
                    "family_name": "Karu",
                    "institution": "University of Tartu",

                },
                {

                    "given_name": "Heigo",
                    "family_name": "Ers",
                    "institution": "University of Tartu",

                },
                {

                    "given_name": "Maksim",
                    "family_name": "Mišin",
                    "institution": "University of Tartu",

                },
                {

                    "given_name": "Jianwei",
                    "family_name": "Sun",
                    "institution": "The University of Texas at El Paso",

                },
                {

                    "given_name": "Vladislav",
                    "family_name": "Ivaništšev",
                    "email": "*****@*****.**",
                    "institution": "University of Tartu",

                }],

                #"license": "",
                "collection": "SCAN of Ionic Liquids",
                #"tags": [""],
                "description": "The repository (https://github.com/vilab-tartu/SCAN) contains the database, geometries and an illustrative ipython notebook supporting the article \"Performance of SCAN density functional method for a set of ionic liquids\". ",
                "year": 2017,

                "links": {

                    "landing_page": "https://doi.org/10.5281/zenodo.495089",
                    "publication": ["https://github.com/vilab-tartu/SCAN/tree/v.05"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "SCAN of Ionic Liquids - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
               # "raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/scan_ionic_liquids/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    
                    "json": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/scan_ionic_liquids/database.json",
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemple #9
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Cyclometalated Platinum(II) Cyanometallates: Luminescent Blocks for Coordination Self-Assembly",
                "acl": ["public"],
                "source_name": "pt_cyanometallates",

                "data_contact": {

                    "given_name": "Igor O.",
                    "family_name": "Koshevoy",
                    "email": "*****@*****.**",
                    "institution": "University of Eastern Finland",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Schneider, Leon; Sivchik, Vasily; Chung, Kun-you; Chen, Yi-Ting; Karttunen, Antti J.; Chou, Pi-Tai; Koshevoy, Igor O. (2017): Cyclometalated Platinum(II) Cyanometallates: Luminescent Blocks for Coordination Self-Assembly. ACS Publications. https://doi.org/10.1021/acs.inorgchem.7b00006"],

                "author": [{

                    "given_name": "Leon",
                    "family_name": "Schneider",
                    "institution": "Julius-Maximilians-Universität",

                },
                {

                    "given_name": "Vasily",
                    "family_name": "Sivchik",
                    "institution": "University of Eastern Finland",

                },
                {

                    "given_name": "Kun-you",
                    "family_name": "Chung",
                    "institution": "National Taiwan University",

                },
                {

                    "given_name": "Yi-Ting",
                    "family_name": "Chen",
                    "institution": "National Taiwan University",

                },
                {

                    "given_name": "Antti J.",
                    "family_name": "Karttunen",
                    "email": "*****@*****.**",
                    "institution": "Aalto University",

                },
                {

                    "given_name": "Pi-Tai",
                    "family_name": "Chou",
                    "email": "*****@*****.**",
                    "institution": "National Taiwan University",
                    "orcid": "orcid.org/0000-0002-8925-7747",

                },
                {

                    "given_name": "Igor O.",
                    "family_name": "Koshevoy",
                    "email": "*****@*****.**",
                    "institution": "University of Eastern Finland",
                    "orcid": "orcid.org/0000-0003-4380-1302",

                }],

                "license": "https://creativecommons.org/licenses/by-nc/4.0/",
                "collection": "Cyclometalated Platinum(II) Cyanometallates",
                "tags": ["coordination geometries", "compound", "luminescence studies", "Coordination Self-Assembly", "Luminescent Blocks", "emission performance", "cyclometalated fragment", "chromophoric cycloplatinated metalloligands", "frontier orbitals", "complexes exhibit", "tetranuclear complexes", "time-dependent density", "η 1", "photophysical behavior", "cyanide-bridged heterometallic aggregates", "squarelike arrangement", "F 2 ppy", "phosphine motifs", "Cu", "M Pt LCT contribution", "alternative cluster topology", "metal ions", "tolpy", "10 fragments", "Ag", "room-temperature phosphorescence", "HF 2 ppy"],
                "description": "A family of cyanide-bridged heterometallic aggregates has been constructed of the chromophoric cycloplatinated metalloligands and coordinatively unsaturated d10 fragments {M(PPh3)n}. The tetranuclear complexes of general composition [Pt(C^N)(CN)2M(PPh3)2]2 [C^N = ppy, M = Cu (1), Ag (2); C^N = tolpy (Htolpy = 2-(4-tolyl)-pyridine), M = Cu (4), Ag (5); C^N = F2ppy (HF2ppy = 2-(4, 6-difluorophenyl)-pyridine), M = Cu (7), Ag (8)] demonstrate a squarelike arrangement of the molecular frameworks, which is achieved due to favorable coordination geometries of the bridging ligands and the metal ions. Variation of the amount of the ancillary phosphine (for M = Ag) afforded compounds [Pt(C^N)(CN)2Ag(PPh3)]2 (C^N = ppy, 3; C^N = tolpy, 6); for the latter one an alternative cluster topology, stabilized by the Pt–Ag metallophilic and η1-Cipso(C^N)–Ag bonding, was observed.",
                "year": 2017,

                "links": {

                    "landing_page": "https://figshare.com/collections/Cyclometalated_Platinum_II_Cyanometallates_Luminescent_Blocks_for_Coordination_Self-Assembly/3730237",
                    "publication": ["https://doi.org/10.1021/acs.inorgchem.7b00006"],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "(xyz|cif)"), desc="Processing Files", disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), dtype)
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Cyclometalated Platinum(II) Cyanometallates - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    dtype: {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/pt_cyanometallates/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemple #10
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title": "Derivation and Validation of Toxicophores for Mutagenicity Prediction",
                "acl": ['public'],
                "source_name": "bursi_toxicophores",
                "citation": ["Bursi, Roberta (2005/01/01). Derivation and Validation of Toxicophores for Mutagenicity Prediction. Journal of Medicinal Chemistry, 48, 312-320. doi: 10.1021/jm040835a"],
                "data_contact": {
    
                    "given_name": "Roberta",
                    "family_name": "Bursi",
                    
                    "email": "*****@*****.**",
                    "instituition": "Molecular Design & Informatics Department, N.V. Organon"
    
                    },
    
                "author": [{
                    
                    "given_name": "Roberta",
                    "family_name": "Bursi",
                    
                    "email": "*****@*****.**",
                    "instituition": "Molecular Design & Informatics Department, N.V. Organon"
                    
                    },
                    {
                    
                    "given_name": "Ross",
                    "family_name": "McGuire",
                    
                    "instituition": "Molecular Design & Informatics Department, N.V. Organon"
                    
                    },
                    {
                    
                    "given_name": "Jeroen",
                    "family_name": "Kazius",
                    
                    "instituition": "Universiteit Leiden"
                    
                    }],
    
                #"license": "",
    
                "collection": "Toxicophores for Mutagenicity Prediction",
                #"tags": ,
    
                "description": "Mutagenicity is one of the numerous adverse properties of a compound that hampers its potential to become a marketable drug. Toxic properties can often be related to chemical structure, more specifically, to particular substructures, which are generally identified as toxicophores. A number of toxicophores have already been identified in the literature. This study aims at increasing the current degree of reliability and accuracy of mutagenicity predictions by identifying novel toxicophores from the application of new criteria for toxicophore rule derivation and validation to a considerably sized mutagenicity dataset.",
                "year": 2005,
    
                "links": {
    
                    "landing_page": "http://pubs.acs.org/doi/full/10.1021/jm040835a",
    
                    #"publication": [""],
                    #"data_doi": ,
    
                    #"related_id": ,
    
                    "sdf": {
                    
                        #"globus_endpoint": ,
                        "http_host": "ftp://ftp.ics.uci.edu",
    
                        "path": "/pub/baldig/learning/Bursi/source/cas_4337.sdf",
                        }
                    },
    
    #            "mrr": ,
    
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                    }]
                }
            }
        
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "sdf"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "sdf")
        record_metadata = {
            "mdf": {
                "title": "Bursi Toxicophores - " + record["chemical_formula"],
                "acl": ['public'],
    
    #            "tags": ,
    #            "description": ,
                
                "composition": record["chemical_formula"],
    #            "raw": ,
    
                "links": {
    #                "landing_page": ,
    
    #                "publication": ,
    #                "data_doi": ,
    
    #                "related_id": ,
    
                    "sdf": {
                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
    
                        "path": "/collections/bursi_toxicophores/" + data_file["filename"],
                        },
                    },
    
    #            "citation": ,
    #            "data_contact": {
    
    #                "given_name": ,
    #                "family_name": ,
    
    #                "email": ,
    #                "institution":,
    
    #                },
    
    #            "author": ,
    
    #            "license": ,
    #            "collection": ,
    #            "year": ,
    
    #            "mrr":
    
    #            "processing": ,
    #            "structure":,
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
Exemple #11
0
def process_chembl_db(in_q, out_q, err_counter, killswitch):
    while killswitch.value == 0:
        try:
            full_path = in_q.get(timeout=10)
        except Empty:
            continue
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                sdf = parse_ase(full_path, data_format="sdf", verbose=False)
        except Exception as e:
            with err_counter.get_lock():
                err_counter.value += 1
            #in_q.task_done()
            #continue
        ## Metadata:record
        end_path = full_path.split("datasets/chembl_db/")[-1]
        record_metadata = {
            "mdf": {
                "title": "ChEMBL db - " + sdf["chemical_formula"],
                "acl": ["public"],
                "composition": sdf["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    "sdf": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
                        "path": "/collections/chembl_db/" + end_path,
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata

        out_q.put(record_metadata)
        in_q.task_done()
Exemple #12
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "PanDDA analysis of JMJD2D screened against Zenobia Fragment Library",
                "acl": ["public"],
                "source_name":
                "pandda_zenobia_fragment",
                "data_contact": {
                    "given_name": "Frank",
                    "family_name": "von Delft",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Pearce, N., Bradley, A., Marsden, B. D., & von Delft, F. (2016). PanDDA analysis of JMJD2D screened against Zenobia Fragment Library [Data set]. Zenodo. http://doi.org/10.5281/zenodo.48770"
                ],
                "author": [{
                    "given_name": "Nicholas",
                    "family_name": "Pearce",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Anthony",
                    "family_name": "Bradley",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Brian D",
                    "family_name": "Marsden",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Frank",
                    "family_name": "von Delft",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                }],
                "license":
                "https://creativecommons.org/licenses/by-sa/4.0/",
                "collection":
                "PanDDA Zenobia Fragment",
                "tags": [
                    "PANDDA", "Fragment Screening by X-ray Crystallography",
                    "Structural Genomics Consortium (SGC)",
                    "Diamond Light Source I04-1"
                ],
                "description":
                "De-methylase JMJD2D screened against the Zenobia Fragment Library by X-ray Crystallography.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.48770",
                    "publication":
                    ["https://www.nature.com/articles/ncomms15123#ref33"],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": "",

                    #"path": "",

                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb"),
                          desc="Processing Files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "PanDDA Zenobia Fragment Library - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    "pdb": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/pandda_zenobia_fragment/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata
        if "ligand" in data_file["filename"]:
            record_metadata["mdf"]["links"]["cif"] = {
                "globus_endpoint":
                "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                "http_host":
                "https://data.materialsdatafacility.org",
                "path":
                "/collections/pandda_zenobia_fragment/" +
                data_file["no_root_path"] + "/ligand.cif",
            }
        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # TODO: Save your converter as [mdf-source_name]_converter.py
    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Cys-Scanning Disulfide Crosslinking and Bayesian Modeling Probe the Transmembrane Signaling Mechanism of the Histidine Kinase, PhoQ",
                "acl": ["public"],
                "source_name":
                "cys_scanning_phoq",
                "data_contact": {
                    "given_name": "William F",
                    "family_name": "DeGrado",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Molnar, K. S., Bonomi, M., Pellarin, R., Clinthorne, G. D., Gonzalez, G., Goldberg, S. D., … DeGrado, W. F. (2014). Multi-state modeling of the PhoQ two-component system [Data set]. Structure. Zenodo. http://doi.org/10.5281/zenodo.46600"
                ],
                "author": [{
                    "given_name": "Kathleen S",
                    "family_name": "Molnar",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name":
                    "Massimiliano",
                    "family_name":
                    "Bonomi",
                    "institution":
                    "University of California, San Francisco",
                }, {
                    "given_name":
                    "Riccardo",
                    "family_name":
                    "Pellarin",
                    "institution":
                    "University of California, San Francisco",
                }, {
                    "given_name": "Graham D",
                    "family_name": "Clinthorne",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name": "Gabriel",
                    "family_name": "Gonzalez",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name": "Shalom D",
                    "family_name": "Goldberg",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name": "Mark",
                    "family_name": "Goulian",
                    "institution": "University of Pennsylvania",
                }, {
                    "given_name":
                    "Andrej",
                    "family_name":
                    "Sali",
                    "institution":
                    "University of California, San Francisco",
                }, {
                    "given_name":
                    "William F",
                    "family_name":
                    "DeGrado",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of California, San Francisco",
                }],
                "license":
                "http://www.opensource.org/licenses/LGPL-2.1",
                "collection":
                "Cys-Scanning PhoQ",
                "tags": [
                    "Integrative Modeling Platform (IMP)",
                    "Cysteine crosslinks", "Multi-state"
                ],
                "description":
                "Bacteria transduce signals across the membrane using two-component systems (TCSs), consisting of a membrane-spanning sensor histidine kinase and a cytoplasmic response regulator. In gram-negative bacteria, the PhoPQ TCS senses cations and antimicrobial peptides, yet little is known about the structural changes involved in transmembrane signaling. We construct a model of PhoQ signal transduction using Bayesian inference, based on disulfide crosslinking data and homologous crystal structures.",
                "year":
                2014,
                "links": {
                    "landing_page": "https://doi.org/10.5281/zenodo.46600",
                    "publication":
                    ["https://doi.org/10.1016/j.str.2014.04.019"],
                    #"data_doi": "",
                    #"related_id": "",
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/46600/files/phoq-v1.0.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb"),
                          desc="Processing Files",
                          disable=not verbose):
        if "data" not in data_file[
                "no_root_path"]:  #frame files are under pqr format which currently we do not have a file reader
            continue
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Cys-Scanning PhoQ - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    "pdb": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cys_scanning_phoq/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
Exemple #14
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title": "Dynamic behaviour of the silica-water-bio electrical double layer in the presence of a divalent electrolyte",
                "acl": ['public'],
                "source_name": "silica_water_edl",
                "citation": ["Lowe, B.M., Maekawa, Y., Shibuta, Y., Sakata, T., Skylaris, C.-K. and Green, N.G. (2016) Dynamic Behaviour of the Silica-Water-Bio Electrical Double Layer in the Presence of a Divalent Electrolyte. Physical Chemistry Chemical Physics, https://doi.org/10.1039/C6CP04101A"],
                "data_contact": {
    
                    "given_name": "Benjamin",
                    "family_name": "Lowe",
                    
                    "email": "*****@*****.**",
                    "instituition": "University of Southampton"
    
                    },
    
                "author": [{
                    
                    "given_name": "Benjamin",
                    "family_name": "Lowe",
                    
                    "email": "*****@*****.**",
                    "instituition": "University of Southampton"
                    
                    },
                    {
                        
                    "given_name": "Toshiya",
                    "family_name": "Sakata",
                    
                    "email": "*****@*****.**",
                    "institution": "The University of Tokyo"
                    
                    },
                    {
                    
                    "given_name": "Nicolas",
                    "family_name": "Green",
                    
                    "email": "*****@*****.**",
                    "instituition": "University of Southampton",
                    
                    },
                    {
                    
                    "given_name": "Yuki",
                    "family_name": "Maekawa",
                    
                    "instituition": "The University of Tokyo",
                    
                    },
                    {
                    
                    "given_name": "Yasushi",
                    "family_name": "Shibuta",
                    
                    "instituition": "The University of Tokyo",
                    
                    },
                    {
                    
                    "given_name": "Chris",
                    "family_name": "Skylaris",
                    
                    "instituition": "University of Southampton",
                    
                    }],
    
                "license": "http://creativecommons.org/licenses/by/4.0/",
    
                "collection": "Silica Water EDL",
                "tags": ["BioFET", "BioFETs", "BioFED", "molecular dynamics", "MD"],
    
                "description": "Explicit-solvent atomistic calculations of this electric field are presented and the structure and dynamics of the interface are investigated in different ionic strengths using molecular dynamics simulations. Novel results from simulation of the addition of DNA molecules and divalent ions are also presented, the latter of particular importance in both physiological solutions and biosensing experiments",
                "year": 2016,
    
                "links": {
    
                    "landing_page": "https://eprints.soton.ac.uk/401018/",
    
                    "publication": ["http://eprints.soton.ac.uk/401017", "http://dx.doi.org/10.1039/C6CP04101A"],
                    "data_doi": "https://eprints.soton.ac.uk/401018/",
    
    #                "related_id": ,
    
                    # data links: {
                    
                        #"globus_endpoint": ,
                        #"http_host": ,
    
                        #"path": ,
                        #}
                    },
    
    #            "mrr": ,
    
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                    }]
                }
            }
        
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "record-0.xyz"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        record_metadata = {
            "mdf": {
                "title": "Silica Water EDL - " + record["chemical_formula"],
                "acl": ['public'],
    
    #            "tags": ,
    #            "description": ,
                
                "composition": record["chemical_formula"],
    #            "raw": ,
    
                "links": {
                    #"landing_page": ,
    
    #                "publication": ,
    #                "data_doi": ,
    
    #                "related_id": ,
    
                    "xyz": {
                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
    
                        "path": "/collections/silica_water_edl/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },
    
    #            "citation": ,
    #            "data_contact": {
    
    #                "given_name": ,
    #                "family_name": ,
    
    #                "email": ,
    #                "institution":,
    
    #                },
    
    #            "author": ,
    
    #            "license": ,
    #            "collection": ,
    #            "year": ,
    
    #            "mrr":
    
    #            "processing": ,
    #            "structure":,
                },
            "silica_water_edl": {
                "number_of_frames": 3000
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Molecular architecture of the yeast Mediator complex",
                "acl": ["public"],
                "source_name": "yeast_mediator_complex",

                "data_contact": {

                    "given_name": "Benjamin",
                    "family_name": "Webb",
                    "email": "*****@*****.**",
                    "institution": "University of California San Francisco",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Robinson, Philip J, Trnka, Michael J, Pellarin, Riccardo, Greenberg, Charles H, Bushnell, David A, Davis, Ralph, … Kornberg, Roger D. (2015). Molecular architecture of the yeast Mediator complex [Data set]. eLife. Zenodo. http://doi.org/10.5281/zenodo.802915"],

                "author": [{

                    "given_name": "Philip J",
                    "family_name": "Robinson",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Michael J",
                    "family_name": "Trnka",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Riccardo",
                    "family_name": "Pellarin",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Charles H",
                    "family_name": "Greenberg",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "David A",
                    "family_name": "Bushnell",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Ralph",
                    "family_name": "Davis",
                    "institution": "Stanford University",

                },
                {

                    "given_name": "Alma L",
                    "family_name": "Burlingame",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "institution": "University of California San Francisco",

                },
                {

                    "given_name": "Roger D",
                    "family_name": "Kornberg",
                    "institution": "Stanford University",

                }],

                "license": "http://www.opensource.org/licenses/LGPL-2.1",
                "collection": "Yeast Mediator Complex",
                "tags": ["Integrative Modeling Platform (IMP)", "Chemical crosslinks", "PMI", "X-ray crystallography"],
                "description": "The 21-subunit Mediator complex transduces regulatory information from enhancers to promoters, and performs an essential role in the initiation of transcription in all eukaryotes. This repository contains files used in the 3-D modeling of the entire Mediator complex, using an integrative modeling approach that combines information from chemical cross-linking and mass spectrometry; X-ray crystallography; homology modeling; and cryo-electron microscopy.",
                "year": 2015,

                "links": {

                    "landing_page": "https://zenodo.org/record/802915",
                    "publication": ["https://doi.org/10.7554/eLife.08719", "https://github.com/integrativemodeling/mediator/tree/v1.0.3"],
                    "data_doi": "https://doi.org/10.5281/zenodo.802915",
                    #"related_id": "",

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": "",

                        #"path": "",

                    #},

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb"), desc="Processing Files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Yeast Mediator Complex - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    "pdb": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/yeast_mediator_complex/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemple #16
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Three-Dimensional Quantitative Structure−Activity Relationship (QSAR) and Receptor Mapping of Cytochrome P-45014αDM Inhibiting Azole Antifungal Agents",
                "acl": ["public"],
                "source_name":
                "cytochrome_qsar",
                "data_contact": {
                    "given_name": "Tanaji T.",
                    "family_name": "Talele",
                    "email": "*****@*****.**",
                    "institution": "University of Mumbai",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Kulkarni, Vithal M. (1999/03/22). Three-Dimensional Quantitative Structure−Activity Relationship (QSAR) and Receptor Mapping of Cytochrome P-45014αDM Inhibiting Azole Antifungal Agents. Journal of Chemical Information and Computer Sciences, 39, 204-210. doi: 10.1021/ci9800413"
                ],
                "author": [{
                    "given_name": "Tanaji T.",
                    "family_name": "Talele",
                    "email": "*****@*****.**",
                    "institution": "University of Mumbai",
                }, {
                    "given_name": "Vithal M.",
                    "family_name": "Kulkarni",
                    "institution": "University of Mumbai",
                }],

                # "license": "",
                "collection":
                "Cytochrome QSAR",
                #"tags": [""],
                "description":
                "Molecular modeling was performed by a combined use of conformational analysis and 3D-QSAR methods to distinguish structural attributes common to a series of azole antifungal agents. Apex-3D program was used to recognize the common biophoric structural patterns of 13 diverse sets of azole antifungal compounds demonstrating different magnitudes of biological activity. Apex-3D identified three common biophoric features significant for activity:  N1 atom of azole ring, the aromatic ring centroid 1, and aromatic ring centroid 2. A common biophore model proposed from the Apex-3D analysis can be useful for the design of novel cytochrome P-45014αDM inhibiting antifungal agents.",
                "year":
                1999,
                "links": {
                    "landing_page":
                    "ftp://ftp.ics.uci.edu/pub/baldig/learning/Cytochrome/",
                    "publication":
                    ["http://pubs.acs.org/doi/full/10.1021/ci9800413"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "sdf"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "sdf")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Cytochrome QSAR - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "sdf": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cytochrome_qsar/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
Exemple #17
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Modeling of the bacterial molecular chaperone GroEL using 3D EM data and cnmultifit",
                "acl": ["public"],
                "source_name":
                "cnmultifit_groel",
                "data_contact": {
                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California San Francisco",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Lasker, K., Velázquez-Muriel, J. A., Webb, B. M., Yang, Z., Ferrin, T. E., & Sali, A. (2012). Modeling of the bacterial molecular chaperone GroEL using 3D EM data and cnmultifit [Data set]. Methods Mol Biol. Zenodo. http://doi.org/10.5281/zenodo.46596"
                ],
                "author": [{
                    "given_name":
                    "Keren",
                    "family_name":
                    "Lasker",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Javier A.",
                    "family_name":
                    "Velázquez-Muriel",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Benjamin M.",
                    "family_name":
                    "Webb",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Zheng",
                    "family_name":
                    "Yang",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Thomas E.",
                    "family_name":
                    "Ferrin",
                    "institution":
                    "University of California San Francisco",
                }, {
                    "given_name":
                    "Andrej",
                    "family_name":
                    "Sali",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "University of California San Francisco",
                }],
                "license":
                "http://www.opensource.org/licenses/LGPL-2.1",
                "collection":
                "GroEL cnmultifit",
                "tags": [
                    "Integrative Modeling Platform (IMP)",
                    "Electron microscopy density map", "MODELLER", "MultiFit"
                ],
                "description":
                "These scripts demonstrate the use of IMP, MODELLER and Chimera in the modeling of the bacterial molecular chaperone GroEL. First, MODELLER is used to generate structures for the individual components in the GroEL complex. Then, IMP is used to fit these components together into the electron microscopy density map of the entire complex.",
                "year":
                2012,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.46596",
                    "publication": [
                        "https://doi.org/10.1007/978-1-61779-588-6_15",
                        "https://github.com/integrativemodeling/multifit_groel/tree/v1.0"
                    ],
                    #"data_doi": "",
                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",
                        "path": "/record/46596/files/multifit_groel-v1.0.zip",
                    },
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb$"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "GroEL cnmultifit - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "pdb": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cnmultifit_groel/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
Exemple #18
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Platinum pyridine cations: the DFT optimized geometries",
                "acl": ["public"],
                "source_name":
                "pt_pyridine_cations",
                "data_contact": {
                    "given_name":
                    "Alexander",
                    "family_name":
                    "Markov",
                    "email":
                    "sasha-markov.net",
                    "institution":
                    "Kurnakov Institute of General and Inorganic Chemistry of RAS",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Markov, A. (2015). Platinum pyridine cations: the DFT optimized geometries [Data set]. Zenodo. http://doi.org/10.5281/zenodo.31335"
                ],
                "author": [{
                    "given_name":
                    "Alexander",
                    "family_name":
                    "Markov",
                    "email":
                    "sasha-markov.net",
                    "institution":
                    "Kurnakov Institute of General and Inorganic Chemistry of RAS",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Platinum Pyridine Cations",
                "tags": ["platinum", "dft", "computational chemistry"],
                "description":
                "The geometries were optimized with the hybrid M06 functional, the mDZP all-electron basis set for platinum atom, and the def2-TZVP basis set for light atoms.",
                "year":
                2015,
                "links": {
                    "landing_page": "http://doi.org/10.5281/zenodo.31335",
                    #"publication": [""],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Platinum Pyridine Cations - " + record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                # "raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/pt_pyridine_cations/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Mg-X-Diffusion",
                "acl": ['public'],
                "source_name":
                "trinkle_mg_x_diffusion",
                "citation": [
                    "Citation for dataset Mg-X-Diffusion with author(s): Dallas Trinkle, Ravi Agarwal"
                ],
                "data_contact": {
                    "given_name": "Dallas",
                    "family_name": "Trinkle",
                    "email": "*****@*****.**",
                    "institution":
                    "University of Illinois at Urbana-Champaign",
                },
                "author": [{
                    "given_name":
                    "Dallas",
                    "family_name":
                    "Trinkle",
                    "email":
                    "*****@*****.**",
                    "instituition":
                    "University of Illinois at Urbana-Champaign"
                }, {
                    "given_name":
                    "Ravi",
                    "family_name":
                    "Agarwal",
                    "institution":
                    "University of Illinois at Urbana-Champaign"
                }],

                #"license": "",
                "collection":
                "Mg-X Diffusion Dataset",
                #            "tags": ,

                #"description": ,
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://data.materialsdatafacility.org/published/#trinkle_mg_x_diffusion",

                    # "publication": [""],
                    #"data_doi": "",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    total_errors = 0
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]),
                "vasp-out")
        except Exception as e:
            #print("Error on: " + data_file["path"] + "/" + data_file["filename"] + "\n" + repr(e))
            total_errors += 1
        record_metadata = {
            "mdf": {
                "title": "Mg-X Diffusion - ",
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,

                #"composition": ,
                #            "raw": ,
                "links": {
                    #"landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "outcar": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/mg-x/" + data_file["no_root_path"] +
                        "/" + data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }
        try:
            record_metadata["mdf"]["composition"] = record["mdf"][
                "chemical_formula"]
            record_metadata["mdf"]["title"] += record["mdf"][
                "chemical_formula"]
        except:
            #parse_ase unable to read composition of record 1386: https://data.materialsdatafacility.org/collections/mg-x/Elements/Eu/Mg-X_Eu/OUTCAR
            #Placing in the correct material composition
            record_metadata["mdf"]["composition"] = "EuMg149"
            record_metadata["mdf"]["title"] += "EuMg149"

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Total errors: " + str(total_errors))
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Synthesis and structural data of a Fe-base sodium metaphosphate compound, NaFe(PO3)3",
                "acl": ['public'],
                "source_name":
                "fe_na_metaphosphate_structure",
                "citation": [
                    "Lin, Xinghao et al. “Synthesis and Structural Data of a Fe-Base Sodium Metaphosphate Compound, NaFe(PO3)3.” Data in Brief 4 (2015): 217–221. PMC. Web. 30 June 2017."
                ],
                "data_contact": {
                    "given_name": "Yanming",
                    "family_name": "Zhao",
                    "email": "nc.ude.tucs@myoahz",
                    "instituition": "South China University of Technology"
                },
                "author": [{
                    "given_name":
                    "Xinghao",
                    "family_name":
                    "Lin",
                    "instituition":
                    "South China University of Technology"
                }, {
                    "given_name":
                    "Yanming",
                    "family_name":
                    "Zhao",
                    "email":
                    "nc.ude.tucs@myoahz",
                    "instituition":
                    "South China University of Technology"
                }, {
                    "given_name":
                    "Youzhong",
                    "family_name":
                    "Dong",
                    "instituition":
                    "South China University of Technology"
                }, {
                    "given_name":
                    "Quan",
                    "family_name":
                    "Kuang",
                    "instituition":
                    "South China University of Technology"
                }],
                "license":
                "http://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Fe-base sodium metaphosphate Synthesis and Structure",
                # "mdf-tags": ,
                "description":
                "In this data article, the synthesized process of this metaphosphate compound and the morphology of the obtained sample will be provided. The high-power XRD Rietveld refinement is applied to determine the crystal structure of this metaphosphate compound and the refinement result including the main refinement parameters, atomic coordinate and some important lattace parameters are stored in the cif file.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4510540/",

                    #  "publication": ,
                    # "data_doi": "",

                    #  "related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://www.ncbi.nlm.nih.gov",
                        "path": "/pmc/articles/PMC4510540/bin/mmc2.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "cif"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "cif")
        record_metadata = {
            "mdf": {
                "title":
                "Synthesis and Structure of - " + record["chemical_formula"],
                "acl": ['public'],

                #            "tags": ,
                #            "mdescription": ,
                "composition":
                record["chemical_formula"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "cif": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/fe_na_metaphosphate_structure/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Pore Shape Modification of a Microporous Metal–Organic Framework Using High Pressure: Accessing a New Phase with Oversized Guest Molecules",
                "acl": ['public'],
                "source_name":
                "porous_mof",
                "citation": [
                    "The University of Edinburgh School of Chemistry. (2016). Pore Shape Modification of a Microporous Metal-Organic Frame-work Using High Pressure: Accessing a New Phase with Oversized Guest Molecules, [dataset]. http://dx.doi.org/10.7488/ds/371."
                ],
                "data_contact": {
                    "given_name": "Stephen A.",
                    "family_name": "Moggach",
                    "email": "*****@*****.**",
                    "instituition": "University of Edinburgh"
                },
                "author": [{
                    "given_name": "Stephen A.",
                    "family_name": "Moggach",
                    "email": "*****@*****.**",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Scott C.",
                    "family_name": "McKellar",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Jorge",
                    "family_name": "Sotelo",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Alex",
                    "family_name": "Greenaway",
                    "instituition": "University of St Andrews"
                }, {
                    "given_name": "John P. S.",
                    "family_name": "Mowat",
                    "instituition": "University of St Andrews"
                }, {
                    "given_name": "Odin",
                    "family_name": "Kvam",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Carole A.",
                    "family_name": "Morrison",
                    "instituition": "University of Edinburgh"
                }, {
                    "given_name": "Paul A.",
                    "family_name": "Wright",
                    "instituition": "University of St Andrews"
                }],
                "license":
                "http://creativecommons.org/licenses/by/4.0/legalcode",
                "collection":
                "Porous Metal-Organic-Framework",
                #"tags": ,
                "description":
                "Pressures up to 0.8 GPa have been used to squeeze a range of sterically “oversized” C5–C8 alkane guest molecules into the cavities of a small-pore Sc-based metal–organic framework. Guest inclusion causes a pronounced reorientation of the aromatic rings of one-third of the terephthalate linkers, which act as “torsion springs”, resulting in a fully reversible change in the local pore structure. The study demonstrates how pressure-induced guest uptake can be used to investigate framework flexibility relevant to “breathing” behavior and to understand the uptake of guest molecules in MOFs relevant to hydrocarbon separation.",
                "year":
                2015,
                "links": {
                    "landing_page":
                    "http://datashare.is.ed.ac.uk/handle/10283/942",
                    "publication":
                    ["http://dx.doi.org/10.1021/acs.chemmater.5b02891"],
                    "data_doi":
                    "http://dx.doi.org//10.7488/ds/371",

                    #"related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host":
                        "http://datashare.is.ed.ac.uk",
                        "path":
                        "/download/10283/942/Pore_Shape_Modification_of_a_Microporous_Metal-Organic_Frame-work_Using_High_Pressure:_Accessing_a_New_Phase_with_Oversized_Guest_Molecules.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "cif"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "cif")
        record_metadata = {
            "mdf": {
                "title":
                "Metal-Organic-Frame-Work - " + record["chemical_formula"],
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": record["chemical_formula"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "cif": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/porous_mof/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
Exemple #22
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Heptamethyl Indenyl (Ind*) Enables Diastereoselective Benzamidation of Cyclopropenes via Rh(III)-Catalyzed C-H Activation",
                "acl": ["public"],
                "source_name":
                "cyclopropenes",
                "data_contact": {
                    "given_name": "Robert S.",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Paton, R., & Jackson, K. (2016). Heptamethyl Indenyl (Ind*) Enables Diastereoselective Benzamidation of Cyclopropenes via Rh(III)-Catalyzed C-H Activation [Data set]. Zenodo. http://doi.org/10.5281/zenodo.53736"
                ],
                "author": [{
                    "given_name": "Tomislav",
                    "family_name": "Rovis",
                    "institution": "Colorado State University",
                }, {
                    "given_name": "Robert S.",
                    "family_name": "Paton",
                    "email": "*****@*****.**",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Kelvin E.",
                    "family_name": "Jackson",
                    "institution": "University of Oxford",
                }, {
                    "given_name": "Natthawat",
                    "family_name": "Semakul",
                    "institution": "Colorado State University",
                }],
                "license":
                "https://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Diastereoselective Benzamidation of Cyclopropenes",
                "tags":
                ["DFT", "Gaussian", "Transition State", "Stereoselectivity"],
                "description":
                "The diastereoselective coupling of O-substituted arylhydroxamates and cyclopropenes mediated by Rh(III) catalysis was successfully developed. Through ligand development, the diastereoselectivity of this reaction was improved using a heptamethylindenyl (Ind*) ligand, which has been rationalized using quantum chemical calculations. In addition, the nature of the O-substituted ester of benzhydroxamic acid proved important for high diastereoselectivity. This transformation tolerates a variety of benzamides and cyclopropenes that furnish cyclopropa[c]dihydroisoquinolones with high diastereocontrol, which could then be easily transformed into synthetically useful building blocks for pharmaceuticals and bio-active molecules.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://zenodo.org/record/53736#.WWWmjMaZPFQ",
                    "publication": [
                        "http://pubs.rsc.org/en/content/articlelanding/2016/sc/c6sc02587k#!divAbstract"
                    ],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, ".out$"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]),
            "gaussian-out")
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Diastereoselective Benzamidation of Cyclopropenes - " +
                record["chemical_formula"],
                "acl": ["public"],
                "composition":
                record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    "out": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/cyclopropenes/" + data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Finished converting")
Exemple #23
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Uncertainty quantification for quantum chemical models of complex reaction networks	",
                "acl": ["public"],
                "source_name": "reiher_quantum_chemical_models",

                "data_contact": {
                    
                    "given_name": "Markus",
                    "family_name": "Reiher",
                    "email": "*****@*****.**",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },

                "data_contributor": [{
                    
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["(2016). Uncertainty quantification for quantum chemical models of complex reaction networks. , 195, 497-520. 10.1039/C6FD00144K"],

                "author": [{

                    "given_name": "Jonny",
                    "family_name": "Proppe",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Tamara",
                    "family_name": "Husch",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Gregor N.",
                    "family_name": "Simma",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                },
                {

                    "given_name": "Markus",
                    "family_name": "Reiher",
                    "email": "*****@*****.**",
                    "institution": "Laboratory of Physical Chemistry, ETH Zürich",

                }],

                "license": "http://creativecommons.org/licenses/by/3.0/",
                "collection": "Reiher Quantum Chemical Models",
                #"tags": [""],
                "description": "For the quantitative understanding of complex chemical reaction mechanisms, it is, in general, necessary to accurately determine the corresponding free energy surface and to solve the resulting continuous-time reaction rate equations for a continuous state space. For a general (complex) reaction network, it is computationally hard to fulfill these two requirements. However, it is possible to approximately address these challenges in a physically consistent way. On the one hand, it may be sufficient to consider approximate free energies if a reliable uncertainty measure can be provided. On the other hand, a highly resolved time evolution may not be necessary to still determine quantitative fluxes in a reaction network if one is interested in specific time scales. In this paper, we present discrete-time kinetic simulations in discrete state space taking free energy uncertainties into account.",
                "year": 2016,

                "links": {

                    "landing_page": "http://pubs.rsc.org/en/content/articlelanding/fd/2016/c6fd00144k#!divAbstract",
                    "publication": ["http://pubs.rsc.org/doi/c6fd90075e"],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                        #"globus_endpoint": ,
                        #"http_host": ,

                        #"path": ,
                        #},
                    },
                },

            #"mrr": {

                #},

            #"dc": {

                #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz$"), desc="Processing files", disable=not verbose):
        if "PaxHeaders" in data_file["path"]:
            continue
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "xyz")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "Reiher Quantum Chemical Models - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

#                "tags": ,
#                "description": ,
                #"raw": json.dumps(record),

                "links": {

#                    "landing_page": ,
#                    "publication": ,
#                    "data_doi": ,
#                    "related_id": ,

                    "xyz": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/reiher_quantum_chemical_models/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },

#                "citation": ,

#                "data_contact": {

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    },

#                "author": [{

#                    "given_name": ,
#                    "family_name": ,
#                    "email": ,
#                    "institution": ,

#                    }],

#                "year": ,

                },

           # "dc": {

           # },


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Synthesis of Ti3AuC2, Ti3Au2C2 and Ti3IrC2 by noble-metal substitution reaction in Ti3SiC2 for high-temperature-stable ohmic contacts to SiC",
                "acl": ["public"],
                "source_name":
                "ohmic_si_c_contacts",
                "data_contact": {
                    "given_name":
                    "Per",
                    "family_name":
                    "Eklund",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "Fashandi, Hossein, Dahlqvist, Martin, Lu, Jun, Palisaitis, Justinas, Simak, Sergei I, Abrikosov, Igor A, … Eklund, Per. (2017). Synthesis of Ti3AuC2, Ti3Au2C2 and Ti3IrC2 by noble-metal substitution reaction in Ti3SiC2 for high-temperature-stable ohmic contacts to SiC [Data set]. Zenodo. http://doi.org/10.5281/zenodo.376969"
                ],
                "author": [{
                    "given_name":
                    "Hossein",
                    "family_name":
                    "Fashandi",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Martin",
                    "family_name":
                    "Dahlqvist",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Jun",
                    "family_name":
                    "Lu",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Justinas",
                    "family_name":
                    "Palisaitis",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Sergei I",
                    "family_name":
                    "Simak",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Igor A",
                    "family_name":
                    "Abrikosov",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Johanna",
                    "family_name":
                    "Rosen",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Lars",
                    "family_name":
                    "Hultman",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Mike",
                    "family_name":
                    "Andersson",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Anita Lloyd",
                    "family_name":
                    "Spetz",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }, {
                    "given_name":
                    "Per",
                    "family_name":
                    "Eklund",
                    "email":
                    "*****@*****.**",
                    "institution":
                    "Department of Physics, Chemistry, and Biology (IFM), Linköping University",
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Ohmic Contact to SiC",
                "tags": [
                    "electronic structure calculations", "MAX phase", "XRD",
                    "I/V measurement", "spin-orbit coupling",
                    "density of states", "Nanoscale materials",
                    "Structure of solids and liquids",
                    "Surfaces, interfaces and thin films",
                    "Two-dimensional materials"
                ],
                "description":
                "The large class of layered ceramics encompasses both van der Waals (vdW) and non-vdW solids. While intercalation of noble metals in vdW solids is known, formation of compounds by incorporation of noble-metal layers in non-vdW layered solids is largely unexplored. Here, we show formation of Ti3AuC2 and Ti3Au2C2 phases with up to 31% lattice swelling by a substitutional solid-state reaction of Au into Ti3SiC2 single-crystal thin films with simultaneous out-diffusion of Si. Ti3IrC2 is subsequently produced by a substitution reaction of Ir for Au in Ti3Au2C2. These phases form Ohmic electrical contacts to SiC and remain stable after 1,000 h of ageing at 600 °C in air. The present results, by combined analytical electron microscopy and ab initio calculations, open avenues for processing of noble-metal-containing layered ceramics that have not been synthesized from elemental sources, along with tunable properties such as stable electrical contacts for high-temperature power electronics or gas sensors.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.376969",
                    "publication": [
                        "http://www.nature.com/nmat/journal/v16/n8/full/nmat4896.html"
                    ],
                    #"data_doi": "",
                    #"related_id": ,

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "(OUTCAR|cif$)"),
                          desc="Processing files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        if dtype == "cif":
            ftype = "cif"
        else:
            ftype = "vasp-out"
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), ftype)
        except:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title":
                "Ohmic Contact to SiC - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #                "tags": ,
                #                "description": ,
                #"raw": json.dumps(record),
                "links": {

                    #                    "landing_page": ,
                    #                    "publication": ,
                    #                    "data_doi": ,
                    #                    "related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/ohmic_si_c_contacts/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #                "citation": ,

                #                "data_contact": {

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    },

                #                "author": [{

                #                    "given_name": ,
                #                    "family_name": ,
                #                    "email": ,
                #                    "institution": ,

                #                    }],

                #                "year": ,
            },

            # "dc": {

            # },
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("Errors: " + str(errors))
        print("Finished converting")
Exemple #25
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Quantum Chemistry Structures and Properties of 134 kilo Molecules",
                "acl": ['public'],
                "source_name":
                "gdb9_14",
                "citation": [
                    "Raghunathan Ramakrishnan, Pavlo Dral, Matthias Rupp, O. Anatole von Lilienfeld: Quantum Chemistry Structures and Properties of 134 kilo Molecules, Scientific Data 1: 140022, 2014."
                ],
                "data_contact": {
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "institution": "Argonne National Laboratory",
                },
                "author": [{
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "instituition": "Argonne National Laboratory"
                }, {
                    "given_name": "Raghunathan",
                    "family_name": "Ramakrishnan",
                    "institution": "University of Basel"
                }, {
                    "given_name":
                    "Pavlo O.",
                    "family_name":
                    "Dral",
                    "instituition":
                    "Max-Planck-Institut für Kohlenforschung, University of Erlangen-Nuremberg",
                }, {
                    "given_name": "Matthias",
                    "family_name": "Rupp",
                    "instituition": "University of Basel",
                }],
                "license":
                "https://creativecommons.org/licenses/by-nc-sa/4.0/",
                "collection":
                "gdb9_14",
                "tags": [
                    "Computational chemistry", "Density functional theory",
                    "Quantum chemistry"
                ],
                "description":
                "133,885 small organic molecules with up to 9 C, O, N, F atoms, saturated with H. Geometries, harmonic frequencies, dipole moments, polarizabilities, energies, enthalpies, and free energies of atomization at the DFT/B3LYP/6-31G(2df,p) level of theory. For a subset of 6,095 constitutional isomers of C7H10O2, energies, enthalpies, and free energies of atomization are provided at the G4MP2 level of theory.",
                "year":
                2014,
                "links": {
                    "landing_page": "http://qmml.org/datasets.html#gdb9-14",
                    "publication": ["http://dx.doi.org/10.1038/sdata.2014.22"],
                    # "data_doi": "",

                    #                "related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "http://qmml.org",
                        "path": "/Datasets/gdb9-14.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        index = ""
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), "xyz")
        except Exception as e:  #Unable to convert string to float on some files.
            errors += 1  #String is in scientific form e.g. 6.2198*^-6
        comp = record["chemical_formula"]
        if data_file["no_root_path"] == "dsgdb9nsd.xyz":
            start = data_file["filename"].find('_')
            #index is between the underscore and ".xyz"
            index = int(data_file["filename"][start + 1:-4])

        record_metadata = {
            "mdf": {
                "title": "gdb9_14 - " + comp,
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": comp,
                #            "raw": ,
                "links": {
                    # "landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/gdb9_14/" + data_file["no_root_path"] +
                        "/" + data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            },
            "gdb9_14": {
                "index": index,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("TOTAL ERRORS: " + str(errors))
        print("Finished converting")
Exemple #26
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Machine learning of molecular electronic properties in chemical compound space",
                "acl": ['public'],
                "source_name":
                "gdb7_13",
                "citation": [
                    "Gr\'egoire Montavon, Matthias Rupp, Vivekanand Gobre, Alvaro Vazquez-Mayagoitia, Katja Hansen, Alexandre Tkatchenko, Klaus-Robert M\"uller, O. Anatole von Lilienfeld: Machine learning of molecular electronic properties in chemical compound space, New Journal of Physics, 15(9): 095003, IOP Publishing, 2013.DOI: 10.1088/1367-2630/15/9/095003"
                ],
                "data_contact": {
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "institution": "Argonne National Laboratory",
                },
                "author": [{
                    "given_name": "O. Anatole",
                    "family_name": "von Lilienfeld",
                    "email": "*****@*****.**",
                    "instituition": "Argonne National Laboratory"
                }, {
                    "given_name": "Grégoire",
                    "family_name": "Montavon",
                    "institution": "Technical University of Berlin"
                }, {
                    "given_name":
                    "Matthias",
                    "family_name":
                    "Rupp",
                    "instituition":
                    "Institute of Pharmaceutical Sciences, ETH Zurich",
                }, {
                    "given_name":
                    "Vivekanand",
                    "family_name":
                    "Gobre",
                    "instituition":
                    "Fritz-Haber-Institut der Max-Planck-Gesellschaft",
                }, {
                    "given_name": "Alvaro",
                    "family_name": "Vazquez-Mayagoitia",
                    "instituition": "Argonne National Laboratory",
                }, {
                    "given_name":
                    "Katja",
                    "family_name":
                    "Hansen",
                    "instituition":
                    "Fritz-Haber-Institut der Max-Planck-Gesellschaft",
                }, {
                    "given_name":
                    "Alexandre",
                    "family_name":
                    "Tkatchenko",
                    "email":
                    "*****@*****.**",
                    "instituition":
                    "Fritz-Haber-Institut der Max-Planck-Gesellschaft, Pohang University of Science and Technology",
                }, {
                    "given_name":
                    "Klaus-Robert",
                    "family_name":
                    "Müller",
                    "email":
                    "*****@*****.**",
                    "instituition":
                    "Technical University of Berlin, Korea University",
                }],
                "license":
                "https://creativecommons.org/licenses/by/3.0/",
                "collection":
                "gdb7_13",
                #            "tags": ,
                "description":
                "7k small organic molecules, in their ground state, 14 combinations of properties and theory levels. 7,211 small organic molecules composed of H, C, N, O, S, Cl, saturated with H, and up to 7 non-H atoms. Molecules relaxed using DFT with PBE functional. Properties are atomization energy (DFT/PBE0), averaged polarizability (DFT/PBE0, SCS), H**O and LUMO eigenvalues (GW, DFT/PBE0, ZINDO), and, ionization potential, electron affinity, first excitation energy, frequency of maximal absorption (all ZINDO).",
                "year":
                2013,
                "links": {
                    "landing_page":
                    "http://qmml.org/datasets.html#gdb7-13",
                    "publication":
                    ["http://dx.doi.org/10.1088/1367-2630/15/9/095003"],
                    #"data_doi": "",

                    #                "related_id": ,
                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "http://qmml.org",
                        "path": "/Datasets/gdb7-13.zip",
                    }
                },

                #            "mrr": ,
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                }]
            }
        }

    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "xyz"),
                          desc="Processing files",
                          disable=not verbose):
        record = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "xyz")
        record_metadata = {
            "mdf": {
                "title": "gdb7_13 " + data_file["filename"],
                "acl": ['public'],

                #            "tags": ,
                #            "description": ,
                "composition": record["chemical_formula"],
                #            "raw": ,
                "links": {
                    #"landing_page": ,

                    #                "publication": ,
                    #                "data_doi": ,

                    #                "related_id": ,
                    "xyz": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/gdb7_13/gdb7_13_data/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
Exemple #27
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title":
                "Ab initio calculations of the lattice parameter and elastic stiffness coefficients of bcc Fe with solutes",
                "acl": ["public"],
                "source_name":
                "trinkle_elastic_fe_bcc",
                "citation": [
                    "M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Comp. Mat. Sci. 126, 503 (2017).",
                    "M. R. Fellinger, L. G. Hector Jr., and D. R. Trinkle, Data in Brief 10, 147 (2017)."
                ],
                "data_contact": {
                    "given_name": "Michael",
                    "family_name": "Fellinger",
                    "email": "*****@*****.**",
                    "institution": "University of Illinois",
                },
                "author": [{
                    "given_name": "Michael",
                    "family_name": "Fellinger",
                    "email": "*****@*****.**",
                    "institution": "University of Illinois",
                }, {
                    "given_name": "Dallas",
                    "family_name": "Trinkle",
                    "institution": "University of Illinois",
                }, {
                    "given_name": "Louis",
                    "family_name": "Hector Jr.",
                    "institution": "General Motors",
                }],
                "license":
                "http://creativecommons.org/publicdomain/zero/1.0/",
                "collection":
                "Elastic Fe BCC",
                "tags": ["dft"],
                "description":
                "We introduce a solute strain misfit tensor that quantifies how solutes change the lattice parameter.",
                "year":
                2017,
                "links": {
                    "landing_page":
                    "https://materialsdata.nist.gov/dspace/xmlui/handle/11256/671",
                    "publication": [
                        "http://dx.doi.org/10.1016/j.commatsci.2016.09.040",
                        "http://dx.doi.org/10.1016/j.dib.2016.11.092"
                    ],
                    "data_doi":
                    "http://hdl.handle.net/11256/671",

                    #                "related_id": ,

                    # data links: {

                    #"globus_endpoint": ,
                    #"http_host": ,

                    #"path": ,
                    #}
                },

                #            "mrr": ,
                "data_contributor": {
                    "given_name": "Jonathon",
                    "family_name": "Gaff",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "jgaff"
                }
            }
        }
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    dataset_validator = Validator(dataset_metadata)

    # Get the data
    for data_file in tqdm(find_files(input_path, "OUTCAR"),
                          desc="Processing files",
                          disable=not verbose):
        data = parse_ase(
            os.path.join(data_file["path"], data_file["filename"]), "vasp-out")
        record_metadata = {
            "mdf": {
                "title": "Elastic BCC - " + data["chemical_formula"],
                "acl": ["public"],

                #            "tags": ,
                #            "description": ,
                "composition": data["chemical_formula"],
                #            "raw": ,
                "links": {
                    #                "landing_page": ,

                    #                "publication": ,
                    #                "dataset_doi": ,

                    #                "related_id": ,
                    "outcar": {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/" + data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #            "citation": ,
                #            "data_contact": {

                #                "given_name": ,
                #                "family_name": ,

                #                "email": ,
                #                "institution":,

                # IDs
                #                },

                #            "author": ,

                #            "license": ,
                #            "collection": ,
                #            "data_format": ,
                #            "data_type": ,
                #            "year": ,

                #            "mrr":

                #            "processing": ,
                #            "structure":,
            }
        }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    if not metadata:
        dataset_metadata = {
            "mdf": {
                "title": "Neighborhood Behavior:  A Useful Concept for Validation of “Molecular Diversity” Descriptors",
                "acl": ['public'],
                "source_name": "qsar_molecular_diversity",
                "citation": ["David E Patterson, Richard D Cramer, Allan M Ferguson, Robert D Clark, Laurence W Weinberger. Neighbourhood Behaviour: A Useful Concept for Validation of \"Molecular Diversity\" Descriptors. J. Med. Chem. 1996 (39) 3049 - 3059."],
                "data_contact": {
    
                    "given_name": "Richard D.",
                    "family_name": "Cramer",
                    
                    "email": "*****@*****.**",
    
                    },
    
                "author": [{
                    
                    "given_name": "David E.",
                    "family_name": "Patterson",
                    
                    },
                    {
                    
                    "given_name": "Richard D.",
                    "family_name": "Cramer",
                    
                    "email": "*****@*****.**",
                    
                    },
                    {
                    
                    "given_name": "Allan M.",
                    "family_name": "Ferguson",
                    
                    },
                    {
                    
                    "given_name": "Robert D.",
                    "family_name": "Clark",
                    
                    },
                    {
                    
                    "given_name": "Laurence E.",
                    "family_name": "Weinberger",
                    
                    }],
    
              #  "license": "",
    
                "collection": "QSAR Molecular Diversity",
                #"tags": ,
    
                "description": "If a molecular descriptor is to be a valid and useful measure of “similarity” in drug discovery, a plot of differences in its values vs differences in biological activities for a set of related molecules will exhibit a characteristic trapezoidal distribution enhancement, revealing a “neighborhood behavior” for the descriptor. Applying this finding to 20 datasets allows 11 molecular diversity descriptors to be ranked by their validity for compound library design",
                "year": 1996,
    
                "links": {
    
                    "landing_page": "ftp://ftp.ics.uci.edu/pub/baldig/learning/Patterson/",
    
                    "publication": ["http://pubs.acs.org/doi/abs/10.1021/jm960290n"],
                  #  "data_doi": ,
    
                   # "related_id": ,
    
                    # data links: {
                    
                        #"globus_endpoint": ,
                        #"http_host": ,
    
                        #"path": ,
                        #}
                    },
    
    #            "mrr": ,
    
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78"
                    }]
                }
            }
        
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "sdf"), desc="Processing files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "sdf")
        record_metadata = {
            "mdf": {
                "title": "QSAR Molecular Diversity - " + record["chemical_formula"],
                "acl": ['public'],
    
    #            "tags": ,
    #            "description": ,
                
                "composition": record["chemical_formula"],
               # "raw": json.dumps(record),
    
                "links": {
    #                "landing_page": ,
    
    #                "publication": ,
    #                "data_doi": ,
    
    #                "related_id": ,
    
                    "sdf": {
                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",
    
                        "path": "/collections/qsar_molecular_diversity/" + data_file["no_root_path"] + "/" + data_file["filename"],
                        },
                    },
    
    #            "citation": ,
    #            "data_contact": {
    
    #                "given_name": ,
    #                "family_name": ,
    
    #                "email": ,
    #                "institution":,
    
    #                },
    
    #            "author": ,
    
    #            "license": ,
    #            "collection": ,
    #            "year": ,
    
    #            "mrr":
    
    #            "processing": ,
    #            "structure":,
                }
            }

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and print a message if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if result["success"] is not True:
            print("Error:", result["message"])

    # You're done!
    if verbose:
        print("Finished converting")
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {

                "title": "Benchmark of the FRETR Bayesian restraint",
                "acl": ["public"],
                "source_name": "fretr_bayesian_restraint",

                "data_contact": {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",

                },

                "data_contributor": [{

                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",

                }],

                "citation": ["Bonomi, M., Pellarin, R., Kim, S. J., Russel, D., Sundin, B. A., Riffle, M., … Sali, A. (2014). Benchmark of the FRETR Bayesian restraint [Data set]. Mol Cell Proteomics. Zenodo. http://doi.org/10.5281/zenodo.46558"],

                "author": [{

                    "given_name": "Massimiliano",
                    "family_name": "Bonomi",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco, University of Cambridge",

                },
                {

                    "given_name": "Riccardo",
                    "family_name": "Pellarin",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Seung Joong",
                    "family_name": "Kim",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Daniel",
                    "family_name": "Russel",
                    "institution": "University of California, San Francisco",

                },
                {

                    "given_name": "Bryan A.",
                    "family_name": "Sundin",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Michael",
                    "family_name": "Riffle",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Daniel",
                    "family_name": "Jaschob",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Richard",
                    "family_name": "Ramsden",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Trisha N.",
                    "family_name": "Davis",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Eric G. D.",
                    "family_name": "Muller",
                    "email": "*****@*****.**",
                    "institution": "University of Washington",

                },
                {

                    "given_name": "Andrej",
                    "family_name": "Sali",
                    "email": "*****@*****.**",
                    "institution": "University of California, San Francisco",

                }],

                "license": "http://www.opensource.org/licenses/LGPL-2.1",
                "collection": "FRETR Bayesian Restraint",
                "tags": ["Integrative Modeling Platform (IMP)", "Benchmark", "Förster resonance energy transfer (FRET)"],
                "description": "The use of in vivo Förster resonance energy transfer (FRET) data to determine the molecular architecture of a protein complex in living cells is challenging due to data sparseness, sample heterogeneity, signal contributions from multiple donors and acceptors, unequal fluorophore brightness, photobleaching, flexibility of the linker connecting the fluorophore to the tagged protein, and spectral cross-talk. We addressed these challenges by using a Bayesian approach that produces the posterior probability of a model, given the input data. The posterior probability is defined as a function of the dependence of our FRET metric FRETR on a structure (forward model), a model of noise in the data, as well as prior information about the structure, relative populations of distinct states in the sample, forward model parameters, and data noise.",
                "year": 2014,

                "links": {

                    "landing_page": "https://zenodo.org/record/46558",
                    "publication": ["https://doi.org/10.1074/mcp.M114.040824", "https://github.com/integrativemodeling/fret_benchmark/tree/v1.0"],
                    "data_doi": "https://doi.org/10.5281/zenodo.46558",
                    #"related_id": "",

                    "zip": {

                        #"globus_endpoint": ,
                        "http_host": "https://zenodo.org",

                        "path": "/record/46558/files/fret_benchmark-v1.0.zip",

                    },

                },

            },

            #"mrr": {

            #},

            #"dc": {

            #},


        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")



    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)


    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    for data_file in tqdm(find_files(input_path, "pdb$"), desc="Processing Files", disable=not verbose):
        record = parse_ase(os.path.join(data_file["path"], data_file["filename"]), "proteindatabank")
        ## Metadata:record
        record_metadata = {
            "mdf": {

                "title": "FRETR Bayesian Restraint - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,

                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,

                    "pdb": {

                        "globus_endpoint": "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host": "https://data.materialsdatafacility.org",

                        "path": "/collections/fretr_bayesian_restraint/" + data_file["no_root_path"] + "/" + data_file["filename"],

                    },

                },

                #"citation": ,

                #"data_contact": {

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #},

                #"author": [{

                    #"given_name": ,
                    #"family_name": ,
                    #"email": ,
                    #"institution": ,

                #}],

                #"year": ,

            },

            #"dc": {

            #},


        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print("Error cancelling validation. The partial feedstock may not be removed.")
            raise ValueError(result["message"] + "\n" + result.get("details", ""))


    # You're done!
    if verbose:
        print("Finished converting")
Exemple #30
0
def convert(input_path, metadata=None, verbose=False):
    if verbose:
        print("Begin converting")

    # Collect the metadata
    # NOTE: For fields that represent people (e.g. mdf-data_contact), other IDs can be added (ex. "github": "jgaff").
    #    It is recommended that all people listed in mdf-data_contributor have a github username listed.
    #
    # If there are other useful fields not covered here, another block (dictionary at the same level as "mdf") can be created for those fields.
    # The block must be called the same thing as the source_name for the dataset.
    if not metadata:
        ## Metadata:dataset
        dataset_metadata = {
            "mdf": {
                "title":
                "Benzonitrile on Si(001). XPS, NEXAFS, and STM data. Accepted for publication in PCCP Sept. 2016",
                "acl": ["public"],
                "source_name":
                "benzonitrile_si",
                "data_contact": {
                    "given_name": "Steven",
                    "family_name": "Schofield",
                    "email": "*****@*****.**",
                    "institution": "University College London",
                },
                "data_contributor": [{
                    "given_name": "Evan",
                    "family_name": "Pike",
                    "email": "*****@*****.**",
                    "institution": "The University of Chicago",
                    "github": "dep78",
                }],
                "citation": [
                    "O'Donnell, Kane, Hedgeland, Holly, Moore, Gareth (2016) Benzonitrile on Si(001). XPS, NEXAFS, and STM data. Accepted for publication in PCCP Sept. 2016."
                ],
                "author": [{
                    "given_name": "Kane",
                    "family_name": "O'Donnell",
                    "institution": "Curtin University",
                }, {
                    "given_name": "Holly",
                    "family_name": "Hedgeland",
                    "institution": "The Open University",
                }, {
                    "given_name": "Gareth",
                    "family_name": "Moore",
                    "institution": "University College London",
                }, {
                    "given_name": "Asif",
                    "family_name": "Suleman",
                    "institution": "University College London",
                }, {
                    "given_name": "Manuel",
                    "family_name": "Siegl",
                    "institution": "University College London",
                }, {
                    "given_name": "Lars",
                    "family_name": "Thomsen",
                    "institution": "The Australian Synchrotron",
                }, {
                    "given_name": "Oliver",
                    "family_name": "Warschkow",
                    "institution": "The University of Sydney",
                }, {
                    "given_name": "Steven",
                    "family_name": "Schofield",
                    "email": "*****@*****.**",
                    "institution": "University College London",
                }],
                "license":
                "https://creativecommons.org/licenses/by/4.0/",
                "collection":
                "Benzonitrile on Si",
                "tags":
                ["benzonitrile", "Si(001)", "adsorption", "XPS", "NEXAFS"],
                "description":
                "This data set contains original XPS and NEXAFS data collected at the Australian Synchrotron.  The data are the results of experiments investigating benzonitrile adsorption to the Si(001) surface.  The results were written up and have been accepted for publication in Physical Chemistry Chemical Physics in Sept. 2016. The publication date is not yet known.",
                "year":
                2016,
                "links": {
                    "landing_page":
                    "https://doi.org/10.5281/zenodo.154112",
                    "publication": [
                        "http://pubs.rsc.org/en/content/articlepdf/2016/CP/C6CP04328C"
                    ],
                    #"data_doi": "",
                    #"related_id": "",

                    #"data_link": {

                    #"globus_endpoint": ,
                    #"http_host": "",

                    #"path": "",

                    #},
                },
            },

            #"mrr": {

            #},

            #"dc": {

            #},
        }
        ## End metadata
    elif type(metadata) is str:
        try:
            dataset_metadata = json.loads(metadata)
        except Exception:
            try:
                with open(metadata, 'r') as metadata_file:
                    dataset_metadata = json.load(metadata_file)
            except Exception as e:
                sys.exit("Error: Unable to read metadata: " + repr(e))
    elif type(metadata) is dict:
        dataset_metadata = metadata
    else:
        sys.exit("Error: Invalid metadata parameter")

    # Make a Validator to help write the feedstock
    # You must pass the metadata to the constructor
    # Each Validator instance can only be used for a single dataset
    # If the metadata is incorrect, the constructor will throw an exception and the program will exit
    dataset_validator = Validator(dataset_metadata)

    # Get the data
    #    Each record should be exactly one dictionary
    #    You must write your records using the Validator one at a time
    #    It is recommended that you use a parser to help with this process if one is available for your datatype
    #    Each record also needs its own metadata
    errors = 0
    for data_file in tqdm(find_files(input_path, "(pdb$|qe$)"),
                          desc="Processing Files",
                          disable=not verbose):
        dtype = data_file["filename"].split(".")[-1]
        if dtype == "pdb":
            ftype = "proteindatabank"
        else:
            ftype = "espresso-in"
        try:
            record = parse_ase(
                os.path.join(data_file["path"], data_file["filename"]), ftype)
        except Exception as e:
            errors += 1
        ## Metadata:record
        record_metadata = {
            "mdf": {
                "title": "Benzonitrile on Si - " + record["chemical_formula"],
                "acl": ["public"],
                "composition": record["chemical_formula"],

                #"tags": ,
                #"description": ,
                #"raw": ,
                "links": {

                    #"landing_page": ,
                    #"publication": ,
                    #"data_doi": ,
                    #"related_id": ,
                    dtype: {
                        "globus_endpoint":
                        "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec",
                        "http_host":
                        "https://data.materialsdatafacility.org",
                        "path":
                        "/collections/benzonitrile_si/" +
                        data_file["no_root_path"] + "/" +
                        data_file["filename"],
                    },
                },

                #"citation": ,

                #"data_contact": {

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #},

                #"author": [{

                #"given_name": ,
                #"family_name": ,
                #"email": ,
                #"institution": ,

                #}],

                #"year": ,
            },

            #"dc": {

            #},
        }
        ## End metadata

        # Pass each individual record to the Validator
        result = dataset_validator.write_record(record_metadata)

        # Check if the Validator accepted the record, and stop processing if it didn't
        # If the Validator returns "success" == True, the record was written successfully
        if not result["success"]:
            if not dataset_validator.cancel_validation()["success"]:
                print(
                    "Error cancelling validation. The partial feedstock may not be removed."
                )
            raise ValueError(result["message"] + "\n" +
                             result.get("details", ""))

    # You're done!
    if verbose:
        print("ERRORS: " + str(errors))
        print("Finished converting")