Example #1
0
 def csConsistencyCheck(self):
     """Perform a consistency check of this record against chemspider. Raise a ValidationError on error."""
     if not self.custom:
         errorList = []
         cs = ChemSpider(settings.CHEMSPIDER_TOKEN)
         if self.CSID is None or self.CSID is '':
             raise ValidationError('No CSID set', 'no_csid')
         else:
             csCompound = cs.get_compound(self.CSID)
             if self.name not in ('', None):
                 nameResults = cs.simple_search(self.name)
                 if csCompound not in nameResults:
                     errorList.append(ValidationError(
                         'A compound was consistency checked and was found to have an invalid name', code='invalid_inchi'))
             else:
                 self.name = csCompound.common_name
             if self.INCHI == '':
                 self.INCHI = csCompound.stdinchi
             elif self.INCHI != csCompound.stdinchi:
                 errorList.append(ValidationError(
                     'A compound was consistency checked and was found to have an invalid InChi', code='invalid_inchi'))
             if self.smiles == '':
                 self.smiles = csCompound.smiles
             elif self.smiles != csCompound.smiles:
                 errorList.append(ValidationError(
                     'A compound was consistency checked and was found to have an invalid smiles string', code='invalid_smiles'))
             if self.formula == '':
                 self.formula = csCompound.molecular_formula
             elif self.formula != csCompound.molecular_formula:
                 errorsList.append(ValidationError(
                     'A compound was consistency checked and was found to have an invalid formula', code="invalid_formula"))
             if len(errorList) > 0:
                 raise ValidationError(errorList)
Example #2
0
    def __init__(self):
        """
        Initializes all the object variables
        """

        # Reaction Dataframe
        self.reactions_dataframe = None

        # Reactant Dataframe
        self.species_df = None

        # Unique Reactants Dictionary
        self.unique_species_dict = None

        # Creating a transator for cleaning individual reactants off non-familiar characters
        self.translator = str.maketrans("Î", "α", "±€™")  # Argument style
        # (# intab,outtab,character string that should be mapped to None)

        # Autheticating ChemSpider API using the token
        self.security_token = "99c9f388-12be-4b22-8f83-00b6f1e2d7d0"  # Maneet's token
        self.cs = ChemSpider(
            self.security_token,
            user_agent="StudentResearcher, ChemSpiPy 1.0.5, Python 3.6")

        print('--Populator Initialized--')
Example #3
0
 def get_image_url(self):
     md = jsonpickle.decode(self.metadata)
     if 'csid' in md:
         # If this doc already has a csid, make the url
         return 'http://www.chemspider.com/ImagesHandler.ashx?id=' + str(
             self.csid)
     elif 'InChIKey' in md or 'inchikey' in md:
         # If it doesnt but it does have an InChIKey get the csid and make the image url
         # this code doesn't work...due to an upgrade in chemspider
         # if you want images, get the mol
         from chemspipy import ChemSpider
         cs = ChemSpider(settings.CHEMSPIDER_APIKEY)
         ikey = md.get('InChIKey', md.get('inchikey'))
         results = cs.search(ikey)
         if results:
             # Return the image_url and also save the csid
             csid = results[0].csid
             md['csid'] = csid
             self.metadata = jsonpickle.encode(md)
             self.save()
             return results[0].image_url
         else:
             return None
     else:
         # If it has neither, no image!
         return None
Example #4
0
def find_matches(matched_in_ChemSpider, massFile_Name):
    from chemspipy import ChemSpider
    cs = ChemSpider('dfdc677d-e7d3-435b-a74e-bfe6167a3899')
    for i in matched_in_ChemSpider.keys():
        print i
        # intialiaztion
        matched_compounds = []
        matches = {}
        # load mol file info of the product
        product_molFile = read_product_molFile(massFile_Name, i)
        # for each compound in data base with almost the same mass
        for CSID in matched_in_ChemSpider[i]:
            # extract the compound's mol file
            c = cs.get_compound(CSID)
            ChemSpider_compound_mol_info = c.mol_2d
            # compare the product's and compound's mol files
            is_the_same = compare_two_molFiles(product_molFile,
                                               ChemSpider_compound_mol_info)
            # add the compound to the list if it's molfile is the same as the product's
            if is_the_same:
                matched_compounds.append(CSID)
        # if at least one compound found as a match
        if matched_compounds != []:
            matches.update({i: matched_compounds})
    # return the whole matches for products
    return matches
Example #5
0
 def set_and_initialize_token(self, input_token):
     """
     Stores you ChemSpider security token as an object attribute and Associate your token to the ChemSpider api
     :param input_token: your security token (for ChemSpider)
     :return: None
     """
     self.security_token = input_token
     self.cs = ChemSpider(self.security_token)
Example #6
0
 def __init__(self, user, *args, **kwargs):
     """Overridden version of the init method allows us to place the user's lab groups as a restricted set."""
     super(CompoundForm, self).__init__(*args, **kwargs)
     self.compound = None
     self.chemSpider = ChemSpider(settings.CHEMSPIDER_TOKEN)
     self.fields['labGroups'].queryset = user.labgroup_set.all()
     if user.labgroup_set.all().exists():
         self.fields['labGroups'].empty_label = None
Example #7
0
 def clean_name(self):
     """Check the name is a valid synonym."""
     chemSpider = ChemSpider(settings.CHEMSPIDER_TOKEN)
     nameResults = chemSpider.simple_search(self.cleaned_data['name'])
     if self.instance.CSID not in (nameResult.csid
                                   for nameResult in nameResults):
         raise ValidationError(
             "That name is not a known synonym for this compound")
     else:
         return self.cleaned_data['name']
Example #8
0
def search_by_mass(mass, margine):
    # pip install chemspipy
    from chemspipy import ChemSpider
    # register to generate a sequrity code
    cs = ChemSpider('dfdc677d-e7d3-435b-a74e-bfe6167a3899')
    # search the data base
    CSIDs = []
    for result in cs.simple_search_by_mass(mass, margine):
        CSIDs.append(result.csid)
    return CSIDs
Example #9
0
def find_common_name(inchikey, formula):
    # Try to find the common name for the compound, if not use the formula.

    name = formula

    if chemspikey:
        cs = ChemSpider(chemspikey)

        if (len(inchikey) > 0):
          result = cs.search(inchikey)
          if (len(result) == 1):
            name = result[0].common_name

    return name
Example #10
0
def find_common_name(inchikey):
    # Try to find the common name for the compound, if not, return None.

    name = None

    if chemspikey:
        cs = ChemSpider(chemspikey)

        if (len(inchikey) > 0):
            result = cs.search(inchikey)
            if (len(result) == 1):
                name = result[0].common_name

    return name
Example #11
0
	def structure_url(self):
		from chemspipy import ChemSpider
		try:
			cs_key = settings.CHEMSPIDER_KEY
		except AttributeError:
			url = 'http://discovermagazine.com/~/media/Images/Zen%20Photo/N/nanoputian/3487.gif'
		else:
			cs = ChemSpider(cs_key)
			IUPAC = self.name
			search_results = cs.simple_search(IUPAC)
			try:
				url = search_results[0].image_url
			except IndexError:
				url = ""
		return url
Example #12
0
def database_setup():
    """
    Download 2D & 3D molecule structure
    from ChemSpider server to create a database
    """

    from chemspipy import ChemSpider

    # compile id list for calling molecules
    id_list = get_id()

    directory = DATABASE
    # make directory database_chemspider/ if needed
    if os.path.isdir(directory):
        print('Database folder already existed! Aborting... \n '
              'Please remove the folder and rerun')
        exit()
    else:
        os.mkdir(directory)

    print('downloading..')
    os.chdir(directory)  # change dir to database_chemspider/

    # access API key
    cs = ChemSpider('text')

    # go through each id
    for id_chemspider in id_list:
        if os.path.exists(str(id_chemspider) + '_2d.txt'):
            # pass if id already exist
            print('ID ' + str(id_chemspider) + ' already existed')
            continue

        # access molecule data
        c = cs.get_compound(id_chemspider)
        # write 2d coord and bond data
        f = open(str(id_chemspider) + '_2d.txt', 'w')
        f.write(c.mol_2d)
        f.close()

        # write 3d coord and bond data
        f = open(str(id_chemspider) + '_3d.txt', 'w')
        f.write(c.mol_3d)
        f.close()

    os.chdir('../')
Example #13
0
def get_chemspider_structure(csid):
    """
   Get a molecular structure from ChemSpider, generate a PDB file of the 
   structure, and return the name of the PDB file
   """
    pdbpath = '{}.pdb'.format(csid)
    token = 'a03b1636-afc3-4204-9a2c-ede27680577c'  # XXX

    cs = ChemSpider(token)
    cmpd = cs.get_compound(csid)

    conv = ob.OBConversion()
    conv.SetInAndOutFormats('mol', 'pdb')
    mol = ob.OBMol()
    conv.ReadString(mol, cmpd.mol_3d)
    mol.AddHydrogens()
    with open(pdbpath, 'w') as f:
        f.write(conv.WriteString(mol))
    return pdbpath
Example #14
0
 def get_image_url(self):
     md = jsonpickle.decode(self.metadata)
     if 'csid' in md:
         # If this doc already has a csid, make the url
         return 'http://www.chemspider.com/ImagesHandler.ashx?id=' + str(self.csid)
     elif 'InChIKey' in md:
         # If it doesnt but it does have an InChIKey get the csid and make the image url
         from chemspipy import ChemSpider
         cs = ChemSpider('b07b7eb2-0ba7-40db-abc3-2a77a7544a3d')
         results = cs.search(md['InChIKey'])
         if results:
             # Return the image_url and also save the csid
             csid = results[0].csid
             md['csid'] = csid
             self.metadata = jsonpickle.encode(md)
             self.save()
             return results[0].image_url
         else:
             return None
     else:
         # If it has neither, no image!
         return None
Example #15
0
def smiles2cas(smiles_input):
    myToken = 'a1d50aa3-6729-49df-a3e1-cd66240fab22'
    cs = ChemSpider(security_token=myToken)

    comp = cs.search(smiles_input)
    for result in comp:
        temp = result
    res = temp.csid
    res = str(res)

    http = requests.session()
    url = 'http://www.chemspider.com/MassSpecApi.asmx/GetExtendedCompoundInfoArray'
    params = {'token': myToken}
    http.post(url, data=params)

    url_search = 'http://www.chemspider.com/Search.aspx?q=' + res
    r = http.get(url_search)
    soup = bs4.BeautifulSoup(r.text, "html.parser")
    cas = [a.attrs.get('href') for a in soup.select('div.syn a[title="RN"]')]

    for x in range(len(cas)):
        cas[x] = re.findall(r"\"(.+?)\"", cas[x])

    return (cas)
Example #16
0
from nose.tools import eq_, ok_, raises
import requests
import six

from chemspipy import ChemSpider, MOL2D, MOL3D, BOTH
from chemspipy.errors import ChemSpiPyAuthError, ChemSpiPyServerError


logging.basicConfig(level=logging.WARN)
logging.getLogger('chemspipy').setLevel(logging.DEBUG)

# Security token is retrieved from environment variables
CHEMSPIDER_SECURITY_TOKEN = os.environ['CHEMSPIDER_SECURITY_TOKEN']

# Chemspider instances with and without a security token
cs = ChemSpider(security_token=CHEMSPIDER_SECURITY_TOKEN)
cs2 = ChemSpider()


def test_no_security_token():
    """Test ChemSpider can be initialized with no parameters."""
    eq_(cs2.security_token, None)


def test_security_token():
    """Test security token is set correctly when initializing ChemSpider"""
    eq_(cs.security_token, CHEMSPIDER_SECURITY_TOKEN)


def test_chemspider_repr():
    """Test ChemSpider object repr."""
Example #17
0
 def __init__(self):
     sett = SettingsConstants()
     self.key = sett.get('CHEMSPI_KEY')
     self.url = sett.get('CHEMSPI_API_URL')
     self.cs = ChemSpider(self.key, api_url=self.url)
Example #18
0
    if not args.from_db:
        from chemspipy import ChemSpider

    if not args.export_db_only:
        import pandas as pd

    if args.from_db or args.export_db_only or args.export_db_csv:
        import shelve

    ## ==================== set up chemspider ====================

    if not args.from_db:
        possiblefile = os.path.expanduser(args.token)
        if os.path.exists(possiblefile):  # is file
            with open(possiblefile) as f:
                csp = ChemSpider(f.read().strip())
        else:
            csp = ChemSpider(args.token)  # else is token
    else:
        csp = None

    spq = spiderquery(csp, args.prefix + '_p')

    ## ==================== list of compounds ====================

    if args.inputfile:
        with open(args.inputfile) as csvfile:
            f = csv.reader(csvfile)
            compounds = []
            j = 0
            for i, row in enumerate(f):
Example #19
0
        description="Script to obtain SMILES for a solutes in a list")
    argparser.add_argument('-db', '--db', help="the molecule database")
    argparser.add_argument('-solvent',
                           '--solvent',
                           help="the solvent",
                           default="water")
    argparser.add_argument('-solutes', '--solutes', help="the list of solutes")
    args = argparser.parse_args()

    db = dblib.SolvDb(filename=args.db, type="abs", filehandle="^0")
    solutes = [s.strip() for s in open(args.solutes, 'r').readlines()]

    if os.getenv("SPIDERKEY") is None:
        print "SPIDERKEY environmental variable not set! Exit."
        quit()
    cs = ChemSpider(os.getenv("SPIDERKEY"))

    # Loop over all the database entries in the solute lists
    n = 0
    for entry in db.itersolutelist(args.solvent, solutes):
        if os.path.exists(entry.FileHandle + ".smi"): continue
        hits = cs.search(entry.SoluteName)
        if len(hits) > 0:
            smi = hits[0].smiles
            with open(entry.FileHandle + ".smi", "w") as f:
                f.write("%s\n" % smi)
        else:
            print entry.SoluteName, entry.FileHandle
        n += 1

    print "Looped over %d solutes" % n
Example #20
0
from chemspipy import ChemSpider

cs = ChemSpider('c48d4595-ead2-40e7-85c9-1e5d2a77754c')


def get_chem(query):
    chem = None
    results = cs.search(query)
    if results:
        name = results[0].common_name
        smiles = results[0].smiles
        chem = {'name': name, 'smiles': smiles}

    return chem


def get_smiles(query):
    chem = None
    results = cs.search(query)
    if results:
        smiles = results[0].smiles
        return smiles
    else:
        return None
Example #21
0
 def __init__(self, api_key):
     self.chemspider_web_api = ChemSpider(api_key)
Example #22
0
    def process(self,
                input_text: str = "",
                input_file: str = "",
                output_file: str = "",
                output_file_sdf: str = "",
                sdf_append: bool = False,
                input_type: str = "",
                lang: str = "eng",
                paged_text: bool = False,
                format_output: bool = True,
                opsin_types: list = None,
                standardize_mols: bool = True,
                convert_ions: bool = True,
                write_header: bool = True,
                iob_format: bool = False,
                dry_run: bool = False,
                csv_delimiter: str = ";",
                normalize_text: bool = True,
                remove_duplicates: bool = False,
                annotate: bool = True,
                annotation_sleep: int = 2,
                chemspider_token: str = "",
                continue_on_failure: bool = False) -> OrderedDict:
        r"""
        Process the input file with ChemSpot.

        Parameters
        ----------
        input_text : str
            String to be processed by ChemSpot.
        input_file : str
            Path to file to be processed by ChemSpot.
        output_file : str
            File to write output in.
        output_file_sdf : str
            File to write SDF output in. SDF is from OPSIN converted entities.
        sdf_append : bool
            If True, append new molecules to existing SDF file or create new one if doesn't exist. SDF is from OPSIN converted entities.
        input_type : str
            | When empty, input (MIME) type will be determined from magic bytes.
            | Or you can specify "pdf", "pdf_scan", "image" or "text" and magic bytes check will be skipped.
        lang : str
            | Language which will Tesseract use for OCR. Available languages: https://github.com/tesseract-ocr/tessdata
            | Multiple languages can be specified with "+" character, i.e. "eng+bul+fra".
        paged_text : bool
            If True and `input_type` is "text" or `input_text` is provided, try to assign pages to chemical entities.
            ASCII control character 12 (Form Feed, '\f') is expected between pages.
        format_output : bool
            | If True, the value of "content" key of returned dict will be list of OrderedDicts.
            | If True and `output_file` is set, the CSV file will be written.
            | If False, the value of "content" key of returned dict will be None.
        opsin_types : list
            | List of ChemSpot entity types. Entities of types in this list will be converted with OPSIN. If you don't want
              to convert entities, pass empty list.
            | OPSIN is designed to convert IUPAC names to linear notation (SMILES etc.) so default value of `opsin_types`
              is ["SYSTEMATIC"] (these should be only IUPAC names).
            | ChemSpot entity types: "SYSTEMATIC", "IDENTIFIER", "FORMULA", "TRIVIAL", "ABBREVIATION", "FAMILY", "MULTIPLE"
        standardize_mols : bool
            If True, use molvs (https://github.com/mcs07/MolVS) to standardize molecules converted by OPSIN.
        convert_ions : bool
            If True, try to convert ion entities (e.g. "Ni(II)") to SMILES. Entities matching ion regex won't be converted
            with OPSIN.
        write_header : bool
            If True and if `output_file` is set and `output_format` is True, write a CSV write_header:
            "smiles", "bond_length", "resolution", "confidence", "learn", "page", "coordinates"
        iob_format : bool
            If True, output will be in IOB format.
        dry_run : bool
            If True, only return list of commands to be called by subprocess.
        csv_delimiter : str
            Delimiter for output CSV file.
        normalize_text : bool
            If True, normalize text before performing NER. It is strongly recommended to do so, because without normalization
            can ChemSpot produce unpredictable results which cannot be parsed.
        remove_duplicates : bool
            If True, remove duplicated chemical entities. Note that some entities-compounds can have different names, but
            same notation (SMILES, InChI etc.). This will only remove entities with same names. Not applicable for IOB format.
        annotate : bool
            | If True, try to annotate entities in PubChem and ChemSpider. Compound IDs will be assigned by searching with
              each identifier, separately for entity name, SMILES etc.
            | If entity has InChI key yet, prefer it in searching.
            | If "*" is present in SMILES, skip annotation.
            | If textual entity has single result in DB when searched by name, fill in missing identifiers (SMILES etc.).
        annotation_sleep: int
            How many seconds to sleep between annotation of each entity. It's for preventing overloading of databases.
        chemspider_token : str
            Your personal token for accessing the ChemSpider API (needed for annotation). Make account there to obtain it.
        continue_on_failure : bool
            | If True, continue running even if ChemSpot returns non-zero exit code.
            | If False and error occurs, print it and return.

        Returns
        -------
        dict
            Keys:

            - stdout: str ... standard output from ChemSpot
            - stderr: str ... standard error output from ChemSpot
            - exit_code: int ... exit code from ChemSpot
            - content

              - list of OrderedDicts ... when `format_output` is True
              - None ... when `format_output` is False

            - normalized_text : str
        """

        if opsin_types is None:
            opsin_types = ["SYSTEMATIC"]

        if input_text and input_file:
            input_file = ""
            self.logger.warning("Both 'input_text' and 'input_file' are set, but 'input_text' will be prefered.")
        elif not input_text and not input_file:
            raise ValueError("One of 'input_text' or 'input_file' must be set.")

        if not input_type and not input_text:
            possible_input_types = ["pdf", "image", "text"]
            input_type = get_input_file_type(input_file)
            if input_type not in possible_input_types:
                raise ValueError("Input file type ({}) is not one of {}".format(input_type, possible_input_types))
        elif input_type and not input_text:
            possible_input_types = ["pdf", "pdf_scan", "image", "text"]
            if input_type not in possible_input_types:
                raise ValueError("Unknown 'input_type'. Possible 'input_type' values are {}".format(possible_input_types))

        if input_type in ["pdf", "pdf_scan", "image"]:
            input_text, _ = get_text(input_file, input_type, lang=lang, tessdata_prefix=os.environ["TESSDATA_PREFIX"])
            input_file = ""

        if annotate and not chemspider_token:
            self.logger.warning("Cannot perform annotation in ChemSpider: 'chemspider_token' is empty.")

        options = ChainMap({k: v for k, v in {"iob_format": iob_format}.items() if v},
                           self.options_internal)
        output_file_temp = None

        commands, _, _ = self.build_commands(options, self._OPTIONS_REAL, self.path_to_binary)
        commands.insert(1, str(self.options_internal["max_memory"]))
        commands.append("-t")

        if normalize_text:
            normalizer = Normalizer(strip=True, collapse=True, hyphens=True, quotes=True, slashes=True, tildes=True, ellipsis=True)

            if input_file:
                with open(input_file, mode="r") as f:
                    input_text = f.read()

            input_text = normalizer(input_text)

            if not input_text:
                raise UserWarning("'input_text' is empty after normalization.")

            input_text = self.normalize_text(text=input_text)
            input_file_normalized = NamedTemporaryFile(mode="w", encoding="utf-8")
            input_file_normalized.write(input_text)
            input_file_normalized.flush()
            input_file = input_file_normalized.name
        else:
            if input_text:
                input_file_temp = NamedTemporaryFile(mode="w", encoding="utf-8")
                input_file_temp.write(input_text)
                input_file_temp.flush()
                input_file = input_file_temp.name

        commands.append(os.path.abspath(input_file))
        commands.append("-o")
        if format_output:
            output_file_temp = NamedTemporaryFile(mode="w", encoding="utf-8")
            commands.append(os.path.abspath(output_file_temp.name))
        else:
            commands.append(os.path.abspath(output_file))

        if dry_run:
            return " ".join(commands)

        stdout, stderr, exit_code = common_subprocess(commands)

        if "OutOfMemoryError" in stderr:
            raise RuntimeError("ChemSpot memory error: {}".format(stderr))

        to_return = {"stdout": stdout, "stderr": stderr, "exit_code": exit_code, "content": None,
                     "normalized_text": input_text if normalize_text else None}

        if not continue_on_failure and exit_code > 0:
            self.logger.warning("ChemSpot error:")
            eprint("\n\t".join("\n{}".format(stderr).splitlines()))
            return to_return

        if normalize_text:
            to_return["normalized_text"] = input_text

        if not format_output:
            return to_return
        elif format_output:
            with open(output_file_temp.name, mode="r", encoding="utf-8") as f:
                output_chs = f.read()

            entities = self.parse_chemspot_iob(text=output_chs) if iob_format else self.parse_chemspot(text=output_chs)
            to_return["content"] = entities

            if remove_duplicates and not iob_format:
                seen = set()
                seen_add = seen.add
                to_return["content"] = [x for x in to_return["content"] if not (x["entity"] in seen or seen_add(x["entity"]))]

            if input_type in ["pdf", "pdf_scan"] or paged_text:
                page_ends = []
                for i, page in enumerate(input_text.split("\f")):
                    if page.strip():
                        try:
                            page_ends.append(page_ends[-1] + len(page) - 1)
                        except IndexError:
                            page_ends.append(len(page) - 1)

            if opsin_types:
                if convert_ions:
                    to_convert = [x["entity"] for x in to_return["content"] if x["type"] in opsin_types and not self.re_ion.match(x["entity"])]
                else:
                    to_convert = [x["entity"] for x in to_return["content"] if x["type"] in opsin_types]

                if to_convert:
                    opsin = OPSIN(verbosity=self.verbosity)
                    opsin_converted = opsin.process(input=to_convert, output_formats=["smiles", "inchi", "inchikey"],
                                                    standardize_mols=standardize_mols, output_file_sdf=output_file_sdf,
                                                    sdf_append=sdf_append)
                    opsin_converted = iter(opsin_converted["content"])
                else:
                    self.logger.info("Nothing to convert with OPSIN.")

            if annotate:
                chemspider = ChemSpider(chemspider_token) if chemspider_token else None

            for i, ent in enumerate(to_return["content"]):
                if input_type in ["pdf", "pdf_scan"] or paged_text:
                    ent["page"] = str(bisect.bisect_left(page_ends, int(ent["start"])) + 1)

                if convert_ions:
                    match_ion = self.re_ion.match(ent["entity"])
                    if match_ion:
                        match_ion = match_ion.groupdict()
                        match_charge = self.re_charge.search(match_ion["charge"])
                        if match_charge:
                            match_charge = match_charge.groupdict()
                            if match_charge["roman"]:
                                smiles = "[{}+{}]".format(match_ion["ion"], len(match_charge["roman"]))
                            elif match_charge["digit"]:
                                if "+" in match_ion["charge"]:
                                    smiles = "[{}+{}]".format(match_ion["ion"], match_charge["digit"])
                                elif "-" in match_ion["charge"]:
                                    smiles = "[{}-{}]".format(match_ion["ion"], match_charge["digit"])
                            elif match_charge["signs"]:
                                smiles = "[{}{}{}]".format(match_ion["ion"], match_charge["signs"][0],
                                                           len(match_charge["signs"]))

                            mol = MolFromSmiles(smiles)
                            if mol:
                                inchi = MolToInchi(mol)
                                if inchi:
                                    ent.update(OrderedDict(
                                        [("smiles", smiles), ("inchi", inchi), ("inchikey", InchiToInchiKey(inchi))]))
                                else:
                                    ent.update(OrderedDict([("smiles", smiles), ("inchi", ""), ("inchikey", "")]))
                            else:
                                ent.update(OrderedDict([("smiles", ""), ("inchi", ""), ("inchikey", "")]))
                    else:
                        ent.update(OrderedDict([("smiles", ""), ("inchi", ""), ("inchikey", "")]))

                if opsin_types and to_convert:
                    if ent["entity"] in to_convert:
                        ent_opsin = next(opsin_converted)
                        ent.update(OrderedDict([("smiles", ent_opsin["smiles"]), ("inchi", ent_opsin["inchi"]),
                                                ("inchikey", ent_opsin["inchikey"]), ("opsin_error", ent_opsin["error"])]))
                    elif convert_ions and self.re_ion.match(ent["entity"]):
                        ent.update(OrderedDict([("opsin_error", "")]))
                    elif (convert_ions and not self.re_ion.match(ent["entity"])) or (not convert_ions and ent["entity"] not in to_convert):
                        ent.update(OrderedDict([("smiles", ""), ("inchi", ""), ("inchikey", ""), ("opsin_error", "")]))

                # TODO: this should be simplified...looks like garbage code
                if annotate:
                    self.logger.info("Annotating entity {}/{}...".format(i + 1, len(to_return["content"])))
                    ent.update(OrderedDict([("pch_cids_by_inchikey", ""), ("chs_cids_by_inchikey", ""),
                                            ("pch_cids_by_name", ""), ("chs_cids_by_name", ""),
                                            ("pch_cids_by_smiles", ""), ("chs_cids_by_smiles", ""),
                                            ("pch_cids_by_inchi", ""), ("chs_cids_by_inchi", ""),
                                            ("pch_cids_by_formula", ""),
                                            ("pch_iupac_name", ""), ("chs_common_name", ""),
                                            ("pch_synonyms", "")]))

                    # do "double-annotation": some entities can be found in only one DB, updated and then searched in second DB
                    found_in_pch = False
                    found_in_chs = False
                    for _ in range(2):
                        results = []

                        # prefer InChI key
                        if "inchikey" in ent and ent["inchikey"]:
                            try:
                                results = get_compounds(ent["inchikey"], "inchikey")
                                if results:
                                    if len(results) == 1:
                                        result = results[0]
                                        synonyms = result.synonyms
                                        if synonyms:
                                            ent["pch_synonyms"] = "\"{}\"".format("\",\"".join(synonyms))
                                        ent["pch_iupac_name"] = result.iupac_name
                                        if not found_in_chs:
                                            ent["smiles"] = result.canonical_smiles or ent["smiles"]
                                            ent["inchi"] = result.inchi or ent["inchi"]
                                            ent["inchikey"] = result.inchikey or ent["inchikey"]
                                    ent["pch_cids_by_inchikey"] = "\"{}\"".format(",".join([str(c.cid) for c in results]))
                            except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                pass

                            results = chemspider.search(ent["inchikey"]) if chemspider_token else []
                            if results:
                                if len(results) == 1:
                                    result = results[0]
                                    ent["chs_common_name"] = result.common_name
                                    if not found_in_pch:
                                        ent["smiles"] = result.smiles or ent["smiles"]
                                        ent["inchi"] = result.stdinchi or ent["inchi"]
                                        ent["inchikey"] = result.stdinchikey or ent["inchikey"]
                                ent["chs_cids_by_inchikey"] = "\"{}\"".format(",".join([str(c.csid) for c in results]))
                        else:
                            if (not found_in_pch and not found_in_chs) or (not found_in_pch and found_in_chs):
                                try:
                                    results = get_compounds(ent["entity"] or ent["abbreviation"], "name")
                                    if results:
                                        if len(results) == 1:
                                            found_in_pch = True
                                            result = results[0]
                                            synonyms = result.synonyms
                                            if synonyms:
                                                ent["pch_synonyms"] = "\"{}\"".format("\",\"".join(synonyms))
                                            # only update identifiers if they weren't found in second DB
                                            if not found_in_chs:
                                                ent["smiles"] = result.canonical_smiles or ent["smiles"]
                                                ent["inchi"] = result.inchi or ent["inchi"]
                                                ent["inchikey"] = result.inchikey or ent["inchikey"]
                                            ent["pch_iupac_name"] = result.iupac_name
                                        ent["pch_cids_by_name"] = "\"{}\"".format(",".join([str(c.cid) for c in results]))
                                except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                    pass

                            if (not found_in_pch and not found_in_chs) or (found_in_pch and not found_in_chs):
                                results = chemspider.search(ent["entity"] or ent["abbreviation"]) if chemspider_token else []
                                if results:
                                    if len(results) == 1:
                                        found_in_chs = True
                                        result = results[0]
                                        if not found_in_pch:
                                            ent["smiles"] = result.smiles or ent["smiles"]
                                            ent["inchi"] = result.stdinchi or ent["inchi"]
                                            ent["inchikey"] = result.stdinchikey or ent["inchikey"]
                                        ent["chs_common_name"] = result.common_name
                                    ent["chs_cids_by_name"] = "\"{}\"".format(",".join([str(c.csid) for c in results]))

                            for search_field, col_pch, col_chs in [("smiles", "pch_cids_by_smiles", "chs_cids_by_smiles"),
                                                                   ("inchi", "pch_cids_by_inchi", "chs_cids_by_inchi"),
                                                                   ("formula", "pch_cids_by_formula", "")]:
                                results_pch = []
                                results_chs = []

                                if search_field == "smiles" and "smiles" in ent and ent["smiles"] and "*" not in ent["smiles"]:
                                    if (not found_in_pch and not found_in_chs) or (not found_in_pch and found_in_chs):
                                        try:
                                            results_pch = get_compounds(ent["smiles"], "smiles")
                                        except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                            pass
                                    if (not found_in_pch and not found_in_chs) or (found_in_pch and not found_in_chs):
                                        results_chs = chemspider.search(ent["smiles"]) if chemspider_token else []
                                elif search_field == "inchi" and "inchi" in ent and ent["inchi"]:
                                    if (not found_in_pch and not found_in_chs) or (not found_in_pch and found_in_chs):
                                        try:
                                            results_pch = get_compounds(ent["inchi"], "inchi")
                                        except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                            pass
                                    if (not found_in_pch and not found_in_chs) or (found_in_pch and not found_in_chs):
                                        results_chs = chemspider.search(ent["inchi"]) if chemspider_token else []
                                elif search_field == "formula":
                                    if (not found_in_pch and not found_in_chs) or (not found_in_pch and found_in_chs):
                                        try:
                                            results_pch = get_compounds(ent["entity"], "formula")
                                        except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                            pass
                                    # ChemSpider doesn't have search field for 'formula'

                                if results_pch:
                                    ent[col_pch] = "\"{}\"".format(",".join([str(c.cid) for c in results_pch]))
                                if results_chs:
                                    ent[col_chs] = "\"{}\"".format(",".join([str(c.csid) for c in results_chs]))

                                sleep(0.5)

                        sleep(annotation_sleep)

                        if not found_in_pch and not found_in_chs:
                            break

            if output_file:
                dict_to_csv(to_return["content"], output_file=output_file, csv_delimiter=csv_delimiter, write_header=write_header)

        return to_return
Example #23
0
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException

from dotenv import load_dotenv
load_dotenv()

DISCORD_TOKEN = os.getenv('DISCORD_TOKEN')
CHEMSPIDER_TOKEN = os.getenv('CHEMSPIDER_TOKEN')
WOLFRAM_TOKEN = os.getenv('WOLFRAM_TOKEN')

cs = ChemSpider(CHEMSPIDER_TOKEN)
wolfram = wolframalpha.Client(WOLFRAM_TOKEN)

client = commands.Bot(command_prefix='!')

op = webdriver.ChromeOptions()
op.binary_location = os.getenv('GOOGLE_CHROME_BIN')
op.add_argument('--headless')
op.add_argument('--no-sandbox')
op.add_argument('--disable-dev-sh-usage')

driver = webdriver.Chrome(executable_path=os.getenv('CHROMEDRIVER_PATH'),
                          chrome_options=op)
# for local testing purposes only; comment out when deployed to Heroku
#driver = webdriver.Firefox()
Example #24
0
from chemspipy import ChemSpider

cs = ChemSpider("CHEMSPIDER_API_KEY")


def remove_prefix(disctext):
    """Removes the command prefix from the passed string"""
    cprefix = "!chem "
    if disctext.content.startswith(cprefix):
        return disctext.content[len(cprefix):]


def getCompound(id):
    """Gets a compound object from a compound name or id"""
    slist = cs.search(id)  # Returns list of compounds
    compound = slist[0]  # Fetches top item of list as compound object
    return compound
Example #25
0
#python script
#This uses the api package chemspider to maybe spit out information about chemical compounds

#import the ChemSpider API package
from chemspipy import ChemSpider
import csv #for importing data
import os #for working directory
import pandas as pd #apparently this is better for making a working directory
import numpy as np #this is something else for dataframes

#define the chemSpider object with my security code 
cs = ChemSpider('6c2e700b-6a92-4551-9cc1-70f28c021f23')

#example of how to get a compound info from the ChemSpider ID
#compound = cs.get_compound(2157)
#print(compound.smiles)

#working directory info
#cwd = os.getcwd()
os.chdir('/Users/mkamarck/Documents/chemspipy') #change working directory

#figure out how to make a dataframe or matrix variable
f = open('SymriseOdorList_forChemSpiPy.csv')
csv_f = csv.reader(f)
for row in csv_f:
  print row
  
#  660002', 'ACETANISOLE CRYST.', '100-06-1', 'KETONE', 'S'
  #I can get chem ID from the CAS number
#for row in csv_f:
#	print row[3]
Example #26
0
import CoolProp
from chemspipy import ChemSpider
from chemspipy_key import key  # private file with the key (DO NOT COMMIT!!)
import glob, json
cs = ChemSpider(key)

# Map from name to Chemspider ID
backup_map = {
    'Propyne': 6095,
    'R236EA': 71342,
    'R245ca': 62827,
    'trans-2-Butene': 56442,
    'Oxygen': 952,
    'Fluorine': 22932,
    'Hydrogen': 762,
    'Deuterium': 22931,
    'HFE143m': 66577,
    'SulfurHexafluoride': 16425,
    'R114': 13853215
}

# Make sure the key works
c = cs.get_compound(2157)
assert (c.inchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYAW')

for fname in glob.glob('../fluids/*.json'):
    with open(fname, 'r') as fp:
        jj = json.load(fp)

    fluid = jj['INFO']['NAME']
Example #27
0
    def handle(self, *args, **kwargs):
        folder = kwargs['directory']
        with open(path.join(folder, 'User.tsv')) as userFile:
            reader = csv.DictReader(userFile, delimiter='\t')
            for r in reader:
                if not User.objects.filter(username=r['username']).exists():
                    u = User(
                        username=r['username'],
                        first_name=r['first_name'],
                        last_name=r['last_name'],
                        email=r['email'],
                        is_staff=int(r['is_staff']),
                        is_superuser=int(r['is_superuser']),
                    )
                    u.password = r['password']
                    u.save()
        with open(path.join(folder, 'labGroup.tsv')) as labGroups:
            reader = csv.DictReader(labGroups, delimiter='\t')
            for r in reader:
                if not LabGroup.objects.filter(title=r['title']).exists():
                    l = LabGroup(**r)
                    l.save()
        with open(path.join(folder, 'labgroup_users.tsv')) as labGroupUsers:
            reader = csv.DictReader(labGroupUsers, delimiter='\t')
            for r in reader:
                l = LabGroup.objects.get(title=r['title'])
                l.users.add(User.objects.get(username=r['username']))

        if not path.isfile(path.join(folder, 'performedReactionsNoDups.tsv')):
            self.stdout.write(
                'Writing file with duplicate references disambiguated (arbitrarily)'
            )
            with open(path.join(folder,
                                'performedReactions.tsv')) as in_file, open(
                                    path.join(folder,
                                              'performedReactionsNoDups.tsv'),
                                    'w') as out_file:
                references = set()
                reader = csv.DictReader(in_file, delimiter='\t')
                writer = csv.DictWriter(out_file,
                                        delimiter='\t',
                                        fieldnames=reader.fieldnames)
                writer.writeheader()

                case_count = 0
                valid_case_count = 0
                dup_count = 0
                for r in reader:
                    ref = r['reference'].lower()
                    if ref != r['reference']:
                        self.stderr.write(
                            'Reference {} was not in lowercase. Converted.'.
                            format(r['reference']))
                        case_count += 1
                        if r['valid'] == '1':
                            valid_case_count += 1

                    if ref in references:
                        r['notes'] += ' Duplicated reference'
                        r['valid'] = 0
                        dup_count += 1
                        i = 1
                        new_ref = ref
                        while new_ref in references:
                            new_ref = '{}_dup{}'.format(ref, i)
                            i += 1
                        self.stderr.write(
                            'Reference {} duplicated {} times. Renamed and invalidated'
                            .format(ref, i))
                        ref = new_ref
                    references.add(ref)
                    r['reference'] = ref
                    writer.writerow(r)
            self.stderr.write(
                '{} references converted to lowercase. {} were valid'.format(
                    case_count, valid_case_count))
            self.stderr.write(
                '{} references with _dupX appended to remove duplicate reference'
                .format(dup_count))

        with open(path.join(folder,
                            'performedReactionsNoDups.tsv')) as reactions:
            reader = csv.DictReader(reactions, delimiter='\t')
            for r in reader:
                if not PerformedReaction.objects.filter(
                        reference=r['reference'].lower()).exists():
                    p = PerformedReaction(
                        reference=r['reference'],
                        labGroup=LabGroup.objects.get(
                            title=r['labGroup.title']),
                        notes=r['notes'],
                        user=User.objects.get(username=r['user.username']),
                        valid=int(r['valid']),
                        legacyRecommendedFlag=r['legacyRecommendedFlag'] ==
                        'Yes',
                        insertedDateTime=r['insertedDateTime'],
                        public=int(r['public']))
                    self.stdout.write(
                        'Creating reaction with reference {}'.format(
                            p.reference))
                    p.validate_unique()
                    p.save(calcDescriptors=False)
        with open(path.join(folder,
                            'performedReactionsNoDups.tsv')) as reactions:
            reader = csv.DictReader(reactions, delimiter='\t')
            outValues = []
            outBoolValues = []
            purityValues = []
            temperatureValues = []
            timeValues = []
            pHValues = []
            preHeatStandingValues = []
            teflonValues = []

            for r in reader:
                self.stdout.write(
                    'Reiterating for reaction with reference {}'.format(
                        r['reference'].lower()))
                ps = PerformedReaction.objects.filter(
                    reference=r['reference'].lower())
                if ps.count() > 1:
                    ps = ps.filter(valid=True)
                if ps.exists():
                    if ps.count() > 1:
                        raise RuntimeError(
                            '{} has more than one reaction'.format(
                                r['reference'].lower()))
                    p = ps[0]
                    try:
                        p.duplicateOf = PerformedReaction.objects.get(
                            reference=r['duplicateOf.reference'].lower())
                        p.save()
                    except PerformedReaction.DoesNotExist:
                        pass

                    #outValue = OrdRxnDescriptorValue.objects.get_or_create(descriptor=outcomeDescriptor, reaction=p)[0]
                    outcomeValue = int(r['outcome']) if (r['outcome'] in (
                        str(x) for x in range(1, 5))) else None
                    try:
                        v = OrdRxnDescriptorValue.objects.get(
                            descriptor=outcomeDescriptor, reaction=p)
                        if v.value != outcomeValue:
                            v.value = outcomeValue
                            v.save()
                    except OrdRxnDescriptorValue.DoesNotExist:
                        outValue = outcomeDescriptor.createValue(
                            p, outcomeValue)
                        # outValue.save()
                        outValues.append(outValue)

                    #outBoolValue = BoolRxnDescriptorValue.objects.get_or_create(descriptor=outcomeBooleanDescriptor, reaction=p)[0]
                    value = True if (outcomeValue > 2) else False
                    try:
                        v = BoolRxnDescriptorValue.objects.get(
                            descriptor=outcomeBooleanDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except BoolRxnDescriptorValue.DoesNotExist:
                        # outBoolValue.save()
                        outBoolValue = outcomeBooleanDescriptor.createValue(
                            p, value)
                        outBoolValues.append(outBoolValue)

                    #purityValue = OrdRxnDescriptorValue.objects.get_or_create(descriptor=purityDescriptor, reaction=p)[0]
                    value = int(r['purity']) if (r['purity']
                                                 in ('1', '2')) else None
                    try:
                        v = OrdRxnDescriptorValue.objects.get(
                            descriptor=purityDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except OrdRxnDescriptorValue.DoesNotExist:
                        # purityValue.save()
                        purityValue = purityDescriptor.createValue(p, value)
                        purityValues.append(purityValue)

                    #temperatureDescriptorValue = NumRxnDescriptorValue.objects.get_or_create(descriptor=temperatureDescriptor, reaction=p)[0]
                    value = (float(r['temp']) +
                             273.15) if (r['temp'] not in ('', '?')) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(
                            descriptor=temperatureDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        # temperatureDescriptorValue.save()
                        temperatureDescriptorValue = temperatureDescriptor.createValue(
                            p, value)
                        temperatureValues.append(temperatureDescriptorValue)

                    #timeDescriptorValue = NumRxnDescriptorValue.objects.get_or_create(descriptor=timeDescriptor, reaction=p)[0]
                    value = float(r['time']) * 60 if (
                        r['time'] not in ['', '?']) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(
                            descriptor=timeDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        # timeDescriptorValue.save()
                        timeDescriptorValue = timeDescriptor.createValue(
                            p, value)
                        timeValues.append(timeDescriptorValue)

                    #pHDescriptorValue = NumRxnDescriptorValue.objects.get_or_create(descriptor=pHDescriptor, reaction=p)[0]
                    value = float(r['pH']) if (r['pH'] not in ('',
                                                               '?')) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(
                            descriptor=pHDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        # pHDescriptorValue.save()
                        pHDescriptorValue = pHDescriptor.createValue(p, value)
                        pHValues.append(pHDescriptorValue)

                    #preHeatStandingDescriptorValue = NumRxnDescriptorValue.objects.get_or_create(descriptor=preHeatStandingDescriptor, reaction=p)[0]
                    value = bool(r['pre_heat standing']) if (
                        r.get('pre_heat standing') not in ('', None)) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(
                            descriptor=preHeatStandingDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        # preHeatStandingDescriptorValue.save()
                        preHeatStandingDescriptorValue = preHeatStandingDescriptor.createValue(
                            p, value)
                        preHeatStandingValues.append(
                            preHeatStandingDescriptorValue)

                    #teflonDescriptorValue = BoolRxnDescriptorValue.objects.get_or_create(descriptor=teflonDescriptor, reaction=p)[0]
                    value = bool(int(r['teflon_pouch'])) if (
                        r.get('teflon_pouch') not in (None, '')) else None
                    try:
                        v = BoolRxnDescriptorValue.objects.get(
                            descriptor=teflonDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except BoolRxnDescriptorValue.DoesNotExist:
                        # teflonDescriptorValue.save()
                        teflonDescriptorValue = teflonDescriptor.createValue(
                            p, value)
                        teflonValues.append(teflonDescriptorValue)

                    if len(outValues) > 500:
                        self.stdout.write("Saving...")
                        OrdRxnDescriptorValue.objects.bulk_create(outValues)
                        BoolRxnDescriptorValue.objects.bulk_create(
                            outBoolValues)
                        OrdRxnDescriptorValue.objects.bulk_create(purityValues)
                        NumRxnDescriptorValue.objects.bulk_create(
                            temperatureValues)
                        NumRxnDescriptorValue.objects.bulk_create(timeValues)
                        NumRxnDescriptorValue.objects.bulk_create(pHValues)
                        NumRxnDescriptorValue.objects.bulk_create(
                            preHeatStandingValues)
                        BoolRxnDescriptorValue.objects.bulk_create(
                            teflonValues)

                        outValues = []
                        outBoolValues = []
                        purityValues = []
                        temperatureValues = []
                        timeValues = []
                        pHValues = []
                        preHeatStandingValues = []
                        teflonValues = []
                        self.stdout.write("...saved")

        with open(path.join(folder, 'compound_labs.tsv')) as compounds:
            reader = csv.DictReader(compounds, delimiter='\t')
            cs = ChemSpider(settings.CHEMSPIDER_TOKEN)
            for r in reader:
                l = LabGroup.objects.get(title=r['labGroup.title'])
                if not Compound.objects.filter(abbrev=r['abbrev']).exists():
                    self.stdout.write(
                        'Importing compound with abbreviation {} and name {}'.
                        format(r['abbrev'], r['name']))
                    if r.get('custom') != '1':
                        try:
                            if r.get('CSID') not in ('', None):
                                c = Compound(CSID=r['CSID'],
                                             labGroup=l,
                                             abbrev=r['abbrev'])
                                c.csConsistencyCheck()
                                c.save()
                            else:
                                if r.get('CAS_ID') not in (None, ''):
                                    CASResults = cs.simple_search(r['CAS_ID'])
                                else:
                                    CASResults = []
                                if len(CASResults) != 1:
                                    nameResults = cs.simple_search(
                                        r.get('name'))
                                    if len(nameResults) != 1:
                                        raise RuntimeError(
                                            'Could not get unambiguous chemspider entry for CAS ID {} with name{}'
                                            .format(r['CAS_ID'], r['name']))
                                    else:
                                        c = Compound(CSID=nameResults[0].csid,
                                                     labGroup=l,
                                                     abbrev=r['abbrev'])
                                        c.csConsistencyCheck()
                                        c.save()
                                else:
                                    c = Compound(CSID=CASResults[0].csid,
                                                 labGroup=l,
                                                 abbrev=r['abbrev'])
                                    c.csConsistencyCheck()
                                    c.save()
                        except ValidationError as e:
                            c.delete()
                            raise e
                    else:
                        if r.get('INCHI') is None:
                            r['INCHI'] = ''
                        if r.get('smiles') is None:
                            r['smiles'] = ''
                        c = Compound.objects.get_or_create(
                            labGroup=l,
                            custom=True,
                            name=r['name'],
                            abbrev=r['abbrev'],
                            formula=r['formula'],
                            smiles=r['smiles'],
                            INCHI=r['INCHI'])[0]
                    self.stdout.write(c.name.encode('utf-8'))
                    c.save()
        with open(path.join(
                folder, 'compound_chemicalClasses.tsv')) as chemicalClasses:
            reader = csv.DictReader(chemicalClasses, delimiter='\t')
            for r in reader:
                self.stdout.write('working with class {}'.format(
                    r['chemicalClass.label']))
                cs = Compound.objects.filter(abbrev=r['compound.abbrev'])
                if cs.count() > 0:
                    c1 = ChemicalClass.objects.get_or_create(
                        label=r['chemicalClass.label'])[0]
                    for c2 in cs:
                        if c1 not in c2.chemicalClasses.all():
                            c2.chemicalClasses.add(c1)
                            c2.save()
        with open(path.join(folder, 'compoundquantities.tsv')) as cqs:
            reader = csv.DictReader(cqs, delimiter='\t')
            for r in reader:
                try:
                    reaction = PerformedReaction.objects.get(
                        reference=r['reaction.reference'].lower())
                    compound = Compound.objects.get(
                        abbrev=r['compound.abbrev'],
                        labGroup=reaction.labGroup)
                    if r['compound.abbrev'] in ('water', 'H2O'):
                        r['density'] = 1
                    mw = NumMolDescriptorValue.objects.get(
                        compound=compound, descriptor__heading='mw').value
                    if r['compoundrole.name'] != 'pH':
                        self.stdout.write('adding {} to {}'.format(
                            compound.abbrev, reaction.reference))
                        compoundrole = CompoundRole.objects.get_or_create(
                            label=r['compoundrole.name'])[0]
                        if r['amount'] in ('', '?'):
                            amount = None
                        elif r['unit'] == 'g':
                            amount = float(r['amount']) / mw
                        elif r['unit'] == 'd':
                            amount = float(r['amount']) * \
                                0.0375 * float(r['density']) / mw
                        elif r['unit'] == 'mL':
                            amount = float(r['amount']) * \
                                float(r['density']) / mw
                        else:
                            raise RuntimeError('invalid unit entered')
                        if amount is not None:
                            amount = (amount * 1000)
                        cqq = CompoundQuantity.objects.filter(
                            role=compoundrole,
                            compound=compound,
                            reaction=reaction)
                        if cqq.count() > 1:
                            cqq.delete()
                        quantity = CompoundQuantity.objects.get_or_create(
                            role=compoundrole,
                            compound=compound,
                            reaction=reaction)[0]
                        quantity.amount = amount
                        quantity.save()
                    else:
                        reaction.notes += ' pH adjusting reagent used: {}, {}{}'.format(
                            r['compound.abbrev'], r['amount'], r['unit'])
                        reaction.save(calcDescriptors=False)
                except Compound.DoesNotExist as e:
                    self.stderr.write(
                        'Unknown Reactant {} with amount {} {} in reaction {}'.
                        format(r['compound.abbrev'], r['amount'], r['unit'],
                               r['reaction.reference']))
                    raw_input("Continue?")
                    reaction.notes += ' Unknown Reactant {} with amount {} {}'.format(
                        r['compound.abbrev'], r['amount'], r['unit'])
                    reaction.valid = False
                    reaction.save(calcDescriptors=False)
                except PerformedReaction.DoesNotExist as e:
                    raise e
Example #28
0
# Base Python libraries
import sys, os, json
from types import *
from distutils.dir_util import mkpath

# NIH resolver interface
import cirpy
# PubChem interface
import pubchempy as pcp

# ChemSpider
CST = os.environ['CHEMSPIDER_SECURITY_TOKEN']
from chemspipy import ChemSpider
cs = ChemSpider(security_token=CST)

def getJSON(inchikey):
    items = {}
    filename = 'json/%s/%s.json' % (inchikey[0:2], inchikey)
    try:
        with open(filename) as file:
            items = json.load(file)
    except IOError, ValueError:
        items["inchikey"] = inchikey

    # check if we need to get various keys

    # PubChem CID
    if not "pubchem_cid" in items:
        results = pcp.get_compounds(inchikey, 'inchikey')
        results.sort()
        pcpCmpd = results[0]
Example #29
0
from django.utils.text import slugify
from chemspipy import ChemSpider

from professor_oak.models import ScoreMixin

log = logging.getLogger(__name__)


# Load the chemspider API for accessing the RSC structure database
try:
    cs_key = settings.CHEMSPIDER_KEY
except AttributeError:
    log.warn('CHEMSPIDER_KEY not found in localsettings.py')
    chemspider_api = None
else:
    chemspider_api = ChemSpider(cs_key)


class Hazard(models.Model):
    """A hazard type as defined by the global harmonized system.

    Attributes
    ----------
    - pictogram : Image file that represents this image. If not
	  provided, we will look in `static_files/ghs_pictograms/` for one
	  that matches the `name` attribute
	"""
    PHYSICAL = 'p'
    HEALTH = 'h'
    PHYSICAL_AND_HEALTH = 'ph'
    ENVIRONMENTAL = 'e'
Example #30
0
# Tool uses the ChemSpiPy library to assist in accessing the ChemSpider Database

# Syntax to run command: python ChemSpider.py -(f/n) term
#     -f name -> get the formula for the common name formula
#     -n formula -> get the common name for the formula

import sys

# allows us to use command line arguments

if len(sys.argv) < 3:
    print("Incorrect input.\n\t==> python ChemSpider.py [-f/-n] <argument>")
    sys.exit()

from chemspipy import ChemSpider

cs = ChemSpider('3e05e0a6-9f49-4dff-ba0e-a9d6ca3d04ea')
# imports the ChemSpider api, and passes our access token to it

for result in cs.search(sys.argv[2])[:5]:
    # Give the first five results for -f.
    if sys.argv[1] == "-f":
        print(result.common_name)
        print(result.molecular_formula)
        # print(result.common_name)
    if sys.argv[1] == "-n":
        print(result.common_name)
        break