コード例 #1
0
    def __init__(self):
        """
        Initializes all the object variables
        """

        # Reaction Dataframe
        self.reactions_dataframe = None

        # Reactant Dataframe
        self.species_df = None

        # Unique Reactants Dictionary
        self.unique_species_dict = None

        # Creating a transator for cleaning individual reactants off non-familiar characters
        self.translator = str.maketrans("Î", "α", "±€™")  # Argument style
        # (# intab,outtab,character string that should be mapped to None)

        # Autheticating ChemSpider API using the token
        self.security_token = "99c9f388-12be-4b22-8f83-00b6f1e2d7d0"  # Maneet's token
        self.cs = ChemSpider(
            self.security_token,
            user_agent="StudentResearcher, ChemSpiPy 1.0.5, Python 3.6")

        print('--Populator Initialized--')
コード例 #2
0
def find_matches(matched_in_ChemSpider, massFile_Name):
    from chemspipy import ChemSpider
    cs = ChemSpider('dfdc677d-e7d3-435b-a74e-bfe6167a3899')
    for i in matched_in_ChemSpider.keys():
        print i
        # intialiaztion
        matched_compounds = []
        matches = {}
        # load mol file info of the product
        product_molFile = read_product_molFile(massFile_Name, i)
        # for each compound in data base with almost the same mass
        for CSID in matched_in_ChemSpider[i]:
            # extract the compound's mol file
            c = cs.get_compound(CSID)
            ChemSpider_compound_mol_info = c.mol_2d
            # compare the product's and compound's mol files
            is_the_same = compare_two_molFiles(product_molFile,
                                               ChemSpider_compound_mol_info)
            # add the compound to the list if it's molfile is the same as the product's
            if is_the_same:
                matched_compounds.append(CSID)
        # if at least one compound found as a match
        if matched_compounds != []:
            matches.update({i: matched_compounds})
    # return the whole matches for products
    return matches
コード例 #3
0
 def get_image_url(self):
     md = jsonpickle.decode(self.metadata)
     if 'csid' in md:
         # If this doc already has a csid, make the url
         return 'http://www.chemspider.com/ImagesHandler.ashx?id=' + str(
             self.csid)
     elif 'InChIKey' in md or 'inchikey' in md:
         # If it doesnt but it does have an InChIKey get the csid and make the image url
         # this code doesn't work...due to an upgrade in chemspider
         # if you want images, get the mol
         from chemspipy import ChemSpider
         cs = ChemSpider(settings.CHEMSPIDER_APIKEY)
         ikey = md.get('InChIKey', md.get('inchikey'))
         results = cs.search(ikey)
         if results:
             # Return the image_url and also save the csid
             csid = results[0].csid
             md['csid'] = csid
             self.metadata = jsonpickle.encode(md)
             self.save()
             return results[0].image_url
         else:
             return None
     else:
         # If it has neither, no image!
         return None
コード例 #4
0
ファイル: Compound.py プロジェクト: zhaojhao/DRP
 def csConsistencyCheck(self):
     """Perform a consistency check of this record against chemspider. Raise a ValidationError on error."""
     if not self.custom:
         errorList = []
         cs = ChemSpider(settings.CHEMSPIDER_TOKEN)
         if self.CSID is None or self.CSID is '':
             raise ValidationError('No CSID set', 'no_csid')
         else:
             csCompound = cs.get_compound(self.CSID)
             if self.name not in ('', None):
                 nameResults = cs.simple_search(self.name)
                 if csCompound not in nameResults:
                     errorList.append(ValidationError(
                         'A compound was consistency checked and was found to have an invalid name', code='invalid_inchi'))
             else:
                 self.name = csCompound.common_name
             if self.INCHI == '':
                 self.INCHI = csCompound.stdinchi
             elif self.INCHI != csCompound.stdinchi:
                 errorList.append(ValidationError(
                     'A compound was consistency checked and was found to have an invalid InChi', code='invalid_inchi'))
             if self.smiles == '':
                 self.smiles = csCompound.smiles
             elif self.smiles != csCompound.smiles:
                 errorList.append(ValidationError(
                     'A compound was consistency checked and was found to have an invalid smiles string', code='invalid_smiles'))
             if self.formula == '':
                 self.formula = csCompound.molecular_formula
             elif self.formula != csCompound.molecular_formula:
                 errorsList.append(ValidationError(
                     'A compound was consistency checked and was found to have an invalid formula', code="invalid_formula"))
             if len(errorList) > 0:
                 raise ValidationError(errorList)
コード例 #5
0
ファイル: compound.py プロジェクト: ssttv/DRP
 def csConsistencyCheck(self):
     """Perform a consistency check of this record against chemspider. Raise a ValidationError on error."""
     if not self.custom:
         errorList = []
         cs = ChemSpider(settings.CHEMSPIDER_TOKEN)
         if self.CSID is None or self.CSID is '':
             raise ValidationError('No CSID set', 'no_csid')
         else:
             csCompound = cs.get_compound(self.CSID)
             if self.name not in ('', None):
                 nameResults = cs.simple_search(self.name)
                 if csCompound not in nameResults:
                     errorList.append(ValidationError(
                         'A compound was consistency checked and was found to have an invalid name', code='invalid_inchi'))
             else:
                 self.name = csCompound.common_name
             if self.INCHI == '':
                 self.INCHI = csCompound.stdinchi
             elif self.INCHI != csCompound.stdinchi:
                 errorList.append(ValidationError(
                     'A compound was consistency checked and was found to have an invalid InChi', code='invalid_inchi'))
             if self.smiles == '':
                 self.smiles = csCompound.smiles
             elif self.smiles != csCompound.smiles:
                 errorList.append(ValidationError(
                     'A compound was consistency checked and was found to have an invalid smiles string', code='invalid_smiles'))
             if self.formula == '':
                 self.formula = csCompound.molecular_formula
             elif self.formula != csCompound.molecular_formula:
                 errorsList.append(ValidationError(
                     'A compound was consistency checked and was found to have an invalid formula', code="invalid_formula"))
             if len(errorList) > 0:
                 raise ValidationError(errorList)
コード例 #6
0
ファイル: modelforms.py プロジェクト: nihaoCC/DRP
 def clean_name(self):
     chemSpider = ChemSpider(settings.CHEMSPIDER_TOKEN)
     nameResults = chemSpider.simple_search(self.cleaned_data['name'])
     if self.instance.CSID not in (nameResult.csid for nameResult in nameResults):
       raise ValidationError("That name is not a known synonym for this compound")
     else:
       return self.cleaned_data['name']
コード例 #7
0
 def set_and_initialize_token(self, input_token):
     """
     Stores you ChemSpider security token as an object attribute and Associate your token to the ChemSpider api
     :param input_token: your security token (for ChemSpider)
     :return: None
     """
     self.security_token = input_token
     self.cs = ChemSpider(self.security_token)
コード例 #8
0
ファイル: modelforms.py プロジェクト: skylere27/DRP
 def __init__(self, user, *args, **kwargs):
     """Overridden version of the init method allows us to place the user's lab groups as a restricted set."""
     super(CompoundForm, self).__init__(*args, **kwargs)
     self.compound = None
     self.chemSpider = ChemSpider(settings.CHEMSPIDER_TOKEN)
     self.fields['labGroups'].queryset = user.labgroup_set.all()
     if user.labgroup_set.all().exists():
         self.fields['labGroups'].empty_label = None
コード例 #9
0
ファイル: modelforms.py プロジェクト: skylere27/DRP
 def clean_name(self):
     """Check the name is a valid synonym."""
     chemSpider = ChemSpider(settings.CHEMSPIDER_TOKEN)
     nameResults = chemSpider.simple_search(self.cleaned_data['name'])
     if self.instance.CSID not in (nameResult.csid
                                   for nameResult in nameResults):
         raise ValidationError(
             "That name is not a known synonym for this compound")
     else:
         return self.cleaned_data['name']
コード例 #10
0
def search_by_mass(mass, margine):
    # pip install chemspipy
    from chemspipy import ChemSpider
    # register to generate a sequrity code
    cs = ChemSpider('dfdc677d-e7d3-435b-a74e-bfe6167a3899')
    # search the data base
    CSIDs = []
    for result in cs.simple_search_by_mass(mass, margine):
        CSIDs.append(result.csid)
    return CSIDs
コード例 #11
0
def getLongNames(molsDict, pref=4, onlyLettersDigits=False, token="2228d430-a955-416b-b920-14547d28df9e"):
    cs = ChemSpider(token)
    names = {}
    for mol in molsDict:
        comp = cs.get_compound(mol[pref:])
        nName = comp.common_name.encode("ascii", "ignore")
        if onlyLettersDigits:
            nName = leftOnlyLettersDigits(nName)
        names[mol] = nName
    return names
コード例 #12
0
class ChemSp(object):
    def __init__(self):
        sett = SettingsConstants()
        self.key = sett.get('CHEMSPI_KEY')
        self.url = sett.get('CHEMSPI_API_URL')
        self.cs = ChemSpider(self.key, api_url=self.url)

    def get_cmpd(self, csid):
        return self.cs.get_compound(csid)

    def search(self, query):
        print('Connected to ChemSpider API')
        print("Searching started")
        print("Searching for: " + query)
        i = 0
        results = []
        for result in self.cs.search(query):
            if i > 5:
                break
            print("Compound " + str(i))
            formula = str(result.molecular_formula)
            csid = str(result.csid)
            inchi = result.inchi
            name = result.common_name
            cas = cirpy.resolve(inchi, 'cas')
            iupac_name = cirpy.resolve(inchi, 'iupac_name')

            if type(cas) is list:
                c_cas = query
                sim_cas = difflib.get_close_matches(str(c_cas), cas, 3, 0)
                print(sim_cas)
                cas_ = sim_cas[0]
            else:
                cas_ = cas
            image = result.image_url
            print(image)
            i = i + 1
            result_line = {'csid': csid, 'name': name, 'iupac_name': iupac_name, 'cas': cas_, 'inchi': inchi, \
                           'formula': formula, 'image': image}
            results.append(result_line)

        print("Searching finished")
        print(results)

        return results

    def render_image(self, csid, image_id):

        image_png = self.get_cmpd(csid).image
        temp_image = '/home/marcin/Dokumenty/projekty/production/Chem/chembase/static/chembase/temp/temp' + image_id + '.png'
        with open(temp_image, 'wb+') as destination:
            destination.write(image_png)
        image_path = '/static/chembase/temp/temp' + image_id + '.png?timestamp=' + str(
            datetime.datetime.now())
        return image_path
コード例 #13
0
ファイル: Compound.py プロジェクト: nihaoCC/DRP
    def fromCsv(self, fileName, labGroup=None):
        """Read a CSV into the creating objects, returning a list of compounds which have not yet been saved.

        This assumes that the uploaded csv will have headers which map to the names of the fields and that compound classes are
        stored as comma separated lists of the chemicalClass LABEL only.

        Each compound will perform a chemspider-based consistency check on the information it has been created with to ensure
        information is consistent- this throws an ValidationError if it is not.
        """
        if labGroup is None and hasattr(self, 'instance'):
            # we presume that if this is being called without a labgroup that's because this manager belongs to a lab group
            labGroup = self.instance

        compoundsList = []
        cs = ChemSpider(settings.CHEMSPIDER_TOKEN)
        with open(fileName) as f:
            reader = csv.DictReader(f, restkey='restKey')
            rowCount = 0
            errors = []
            for row in reader:
                try:
                    rowCount += 1
                    if 'chemicalClasses' in row:
                        classes = (c.strip() for c in row['chemicalClasses'].split(','))
                        chemicalClasses = []
                        for c in classes:
                            chemicalClass, created = ChemicalClass.objects.get_or_create(label=c)
                            chemicalClasses.append(chemicalClass)
                    if row.get('CAS') not in ('', None) and row.get('CSID') in ('', None):
                        CASResults = cs.simple_search(row['CAS'])
                        if len(CASResults) < 1:
                            errors.append(ValidationError('CAS Number returned no results from ChemSpider on row %(rowCount)d of uploaded csv.', params={'rowCount': rowCount}))
                        elif len(CASResults) == 1:
                            row['CSID'] = CASResults[0].csid  # a little hacky, but it gets the job done
                        else:
                            errors.append(ValidationError('CAS number returns more than one ChemSpider ID on row %(rowCount)d of uploaded csv.', params={'rowCount': rowCount}))
                    elif row.get('CSID') in ('', None):
                        errors.append(ValidationError('No CSID provided on row %(rowCount)d of uploaded csv.', params={'rowCount': rowCount}))
                    kwargs = {}
                    kwargs['CSID'] = row.get('CSID')
                    kwargs['abbrev'] = row.get('abbrev')
                    kwargs['smiles'] = row.get('smiles')
                    kwargs['name'] = row.get('name')
                    kwargs['INCHI'] = row.get('INCHI')
                    compound = Compound(labGroup=labGroup, **kwargs)
                    for chemicalClass in chemicalClasses:
                        compound.lazyChemicalClasses.append(chemicalClass)
                    compoundsList.append(compound)
                except ValidationError as e:
                    for message in e.messages:
                        errors.append(ValidationError(message + ' on row %(rowCount)d of uploaded csv', params={'rowCount': rowCount}))
            if len(errors) > 0:
                raise ValidationError(errors)
        return compoundsList
コード例 #14
0
def find_common_name(inchikey, formula):
    # Try to find the common name for the compound, if not use the formula.

    name = formula

    if chemspikey:
        cs = ChemSpider(chemspikey)

        if (len(inchikey) > 0):
          result = cs.search(inchikey)
          if (len(result) == 1):
            name = result[0].common_name

    return name
コード例 #15
0
def find_common_name(inchikey):
    # Try to find the common name for the compound, if not, return None.

    name = None

    if chemspikey:
        cs = ChemSpider(chemspikey)

        if (len(inchikey) > 0):
            result = cs.search(inchikey)
            if (len(result) == 1):
                name = result[0].common_name

    return name
コード例 #16
0
ファイル: models.py プロジェクト: m3wolf/professor_oak
 def structure_url(self):
     from chemspipy import ChemSpider
     try:
         cs_key = settings.CHEMSPIDER_KEY
     except AttributeError:
         url = 'http://discovermagazine.com/~/media/Images/Zen%20Photo/N/nanoputian/3487.gif'
     else:
         cs = ChemSpider(cs_key)
         IUPAC = self.name
         search_results = cs.simple_search(IUPAC)
         try:
             url = search_results[0].image_url
         except IndexError:
             url = ""
             return url
コード例 #17
0
	def structure_url(self):
		from chemspipy import ChemSpider
		try:
			cs_key = settings.CHEMSPIDER_KEY
		except AttributeError:
			url = 'http://discovermagazine.com/~/media/Images/Zen%20Photo/N/nanoputian/3487.gif'
		else:
			cs = ChemSpider(cs_key)
			IUPAC = self.name
			search_results = cs.simple_search(IUPAC)
			try:
				url = search_results[0].image_url
			except IndexError:
				url = ""
		return url
コード例 #18
0
def database_setup():
    """
    Download 2D & 3D molecule structure
    from ChemSpider server to create a database
    """

    from chemspipy import ChemSpider

    # compile id list for calling molecules
    id_list = get_id()

    directory = DATABASE
    # make directory database_chemspider/ if needed
    if os.path.isdir(directory):
        print('Database folder already existed! Aborting... \n '
              'Please remove the folder and rerun')
        exit()
    else:
        os.mkdir(directory)

    print('downloading..')
    os.chdir(directory)  # change dir to database_chemspider/

    # access API key
    cs = ChemSpider('text')

    # go through each id
    for id_chemspider in id_list:
        if os.path.exists(str(id_chemspider) + '_2d.txt'):
            # pass if id already exist
            print('ID ' + str(id_chemspider) + ' already existed')
            continue

        # access molecule data
        c = cs.get_compound(id_chemspider)
        # write 2d coord and bond data
        f = open(str(id_chemspider) + '_2d.txt', 'w')
        f.write(c.mol_2d)
        f.close()

        # write 3d coord and bond data
        f = open(str(id_chemspider) + '_3d.txt', 'w')
        f.write(c.mol_3d)
        f.close()

    os.chdir('../')
コード例 #19
0
ファイル: modelforms.py プロジェクト: zhaojhao/DRP
 def __init__(self, user, *args, **kwargs):
     """Overridden version of the init method allows us to place the user's lab groups as a restricted set."""
     super(CompoundForm, self).__init__(*args, **kwargs)
     self.compound = None
     self.chemSpider = ChemSpider(settings.CHEMSPIDER_TOKEN)
     self.fields['labGroup'].queryset = user.labgroup_set.all()
     if user.labgroup_set.all().exists():
         self.fields['labGroup'].empty_label = None
コード例 #20
0
ファイル: views.py プロジェクト: woodenchopsticks/Pennapps
def index(req):

    if req.method == "POST":

        response = HttpResponse
        # response["Access-Control-Allow-Origin"] = "*"
        print "POST METHOD"
        body = json.loads(req.body)
        print body
        print body["chemType"]
        chemType = body["chemType"]
        values = body["value"]
        print chemType == "compound"
        if chemType == "compound":
            print "in this function"
            CS = ChemSpider(security_token)
            current_chem_symbol = CS.search(body)
            print current_chem_symbol
            print current_chem_symbol[0].common_name
            returned_responses = [formulaToLatex(c.molecularFormula) for c in current_chem_symbol]
            if len(returned_responses) > 4:
                returned_responses = returned_responses[:4]
            return JSONResponse({"latex": str(returned_resposnes)})

        elif chemType == "element":
            name = body["name"]
            print "arrived at function"
            for i in range(4):
                print values[i]
            print name
            print textToLatex(name, values[0], values[1], values[2], values[3])

            # return JSONResponse({"latex": textToLatex(name, values[0], values[1], values[2], values[3])})
            return HttpResponse(
                textToLatex(name, values[0], values[1], values[2], values[3]), content_type="text/plain"
            )

        else:
            return

    else:
        template = loader.get_template("chemInterpreter/index.html")
        return render(req, "chemInterpreter/index.html")
コード例 #21
0
def get_chemspider_structure(csid):
    """
   Get a molecular structure from ChemSpider, generate a PDB file of the 
   structure, and return the name of the PDB file
   """
    pdbpath = '{}.pdb'.format(csid)
    token = 'a03b1636-afc3-4204-9a2c-ede27680577c'  # XXX

    cs = ChemSpider(token)
    cmpd = cs.get_compound(csid)

    conv = ob.OBConversion()
    conv.SetInAndOutFormats('mol', 'pdb')
    mol = ob.OBMol()
    conv.ReadString(mol, cmpd.mol_3d)
    mol.AddHydrogens()
    with open(pdbpath, 'w') as f:
        f.write(conv.WriteString(mol))
    return pdbpath
コード例 #22
0
def getChemspiderCompounds(token, list, pref, delim="_", longNames=True, onlyLettersDigits=False):
    cs = ChemSpider(token)
    names = []
    molecules = []
    for chsId in list:
        comp = cs.get_compound(chsId)
        name = pref + delim + str(chsId)
        if longNames:
            name += delim
            sn = comp.common_name.encode("ascii", "ignore")
            if onlyLettersDigits:
                sn = leftOnlyLettersDigits(sn)
            name += sn
            # .replace('(', '_').replace(')', '_').replace('[', '_').replace(']', '_').replace(',', '_').replace(' ', '_').replace(';', '_')[:25]
        print(name)
        smiles = comp.smiles.encode("ascii", "ignore")
        mol = Chem.MolFromSmiles(smiles)
        mol2 = Chem.AddHs(mol)
        molecules.append(mol2)
        names.append(name)
    return molecules, names
コード例 #23
0
ファイル: models.py プロジェクト: haidnguyen0909/ms2ldaviz
 def get_image_url(self):
     md = jsonpickle.decode(self.metadata)
     if 'csid' in md:
         # If this doc already has a csid, make the url
         return 'http://www.chemspider.com/ImagesHandler.ashx?id=' + str(self.csid)
     elif 'InChIKey' in md:
         # If it doesnt but it does have an InChIKey get the csid and make the image url
         from chemspipy import ChemSpider
         cs = ChemSpider('b07b7eb2-0ba7-40db-abc3-2a77a7544a3d')
         results = cs.search(md['InChIKey'])
         if results:
             # Return the image_url and also save the csid
             csid = results[0].csid
             md['csid'] = csid
             self.metadata = jsonpickle.encode(md)
             self.save()
             return results[0].image_url
         else:
             return None
     else:
         # If it has neither, no image!
         return None
コード例 #24
0
ファイル: models.py プロジェクト: sdrogers/ms2ldaviz
 def get_image_url(self):
     md = jsonpickle.decode(self.metadata)
     if 'csid' in md:
         # If this doc already has a csid, make the url
         return 'http://www.chemspider.com/ImagesHandler.ashx?id=' + str(self.csid)
     elif 'InChIKey' in md or 'inchikey' in md:
         # If it doesnt but it does have an InChIKey get the csid and make the image url
         from chemspipy import ChemSpider
         cs = ChemSpider('b07b7eb2-0ba7-40db-abc3-2a77a7544a3d')
         ikey = md.get('InChIKey',md.get('inchikey'))
         results = cs.search(ikey)
         if results:
             # Return the image_url and also save the csid
             csid = results[0].csid
             md['csid'] = csid
             self.metadata = jsonpickle.encode(md)
             self.save()
             return results[0].image_url
         else:
             return None
     else:
         # If it has neither, no image!
         return None
コード例 #25
0
ファイル: smilesSearch.py プロジェクト: Mouihena/casfinder
def smiles2cas(smiles_input):
    myToken = 'a1d50aa3-6729-49df-a3e1-cd66240fab22'
    cs = ChemSpider(security_token=myToken)

    comp = cs.search(smiles_input)
    for result in comp:
        temp = result
    res = temp.csid
    res = str(res)

    http = requests.session()
    url = 'http://www.chemspider.com/MassSpecApi.asmx/GetExtendedCompoundInfoArray'
    params = {'token': myToken}
    http.post(url, data=params)

    url_search = 'http://www.chemspider.com/Search.aspx?q=' + res
    r = http.get(url_search)
    soup = bs4.BeautifulSoup(r.text, "html.parser")
    cas = [a.attrs.get('href') for a in soup.select('div.syn a[title="RN"]')]

    for x in range(len(cas)):
        cas[x] = re.findall(r"\"(.+?)\"", cas[x])

    return (cas)
コード例 #26
0
ファイル: chemspider_utils.py プロジェクト: kokellab/klgists
class SpiderRecovery:
    def __init__(self, chemspider_api_key: str):
        self._cs = ChemSpider(chemspider_api_key)
        self._cs = None
        self._has_stero = re.compile(
            '(?:\([RSrsEZez+\-]\))|(?:[RSrsEZez][- \(])')

    def recover_spider(self, name: str) -> Optional[str]:
        """Makes a best-effort attempt to recover SMILES strings from compound names unambiguously by searching ChemSpider.
		Errs slightly on the side of failure.
		If the compound name doesn't contain R, S, E, or Z (case-insensitive) in parantheses or followed by a hyphen or space,
		assumes the compound has no defined sterocenters. In other words, it assumes minimal sterochemistry.
		Returns the SMILES string if it was found unambiguously; otherwise returns None.
		"""

        results = self._cs.search(name)

        if len(results) == 1:
            return results[0].smiles

        elif len(results) > 0:  # try to recover if they're just enantiomers
            connectivities = {result.inchikey[0:14] for result in results}
            if len(connectivities) == 1:
                if self._has_stero.match(name) is None:
                    no_sterocenters = {
                        result.smiles
                        for result in results if '@' not in result.smiles and
                        '/' not in result.smiles and '\\' not in result.smiles
                    }
                    if len(no_sterocenters) == 1:
                        return next(iter(no_sterocenters))
                    elif len(no_sterocenters) > 1:
                        warnings.warn(
                            "There are somehow {} compounds with the same connectivity and no defined sterocenters for {}"
                            .format(len(no_sterocenters), name))

        return None  # give up

    def recover_spiders(
            self,
            names: Iterable[str],
            sleep_seconds: float = 0.1) -> Iterator[Tuple[str, str]]:
        """Yields a SMILES string each time one is found. Returns a tuple of (name, smiles), which can be made into a dict."""
        for name in names:
            smiles = self.recover_spider(name)
            time.sleep(sleep_seconds)  # don't annoy the admins!
            if smiles is not None:
                yield name, smiles
コード例 #27
0
ファイル: chemspider_utils.py プロジェクト: kokellab/klgists
class ChemspiderSearcher:
    def __init__(self, api_key: str):
        self.cs = ChemSpider(api_key)

    def chemspider_names(
        self,
        names: Iterable[str],
        partial_dict: Mapping[str, chemspipy.objects.Compound] = {},
        sleep_secs_between: float = 0.1
    ) -> Mapping[str, chemspipy.objects.Compound]:
        """Build a dictionary mapping compound names to unique ChemSpider hits as chemspipy.objects.Compound objects, using partial_dict as a starting point.
		Does not modify partial_dict. Warns for each compound that has multiple or no hits.
		Immediately pickling the fetched results may be a good idea.
		Example usage:
			for compounds in chemspider_names(['Trichostatin A', 'Oxamflatin', 'Vinblastine']):
				print("{} → {}".format(result.csid, result.smiles))
		Result:
			UserWarning: Multiple (2) hits found for Oxamflatin
			392575 → C[C@H](/C=C(\C)/C=C/C(=O)NO)C(=O)c1ccc(cc1)N(C)C
			12773 → CC[C@@]1(C[C@H]2C[C@@](c3c(c4ccccc4[nH]3)CCN(C2)C1)(c5cc6c(cc5OC)N([C@@H]7[C@]68CCN9[C@H]8[C@@](C=CC9)([C@H]([C@@]7(C(=O)OC)O)OC(=O)C)CC)C)C(=O)OC)O
		"""
        def fetch(name: str) -> Optional[chemspipy.objects.Compound]:
            results = []
            for result in self.cs.search(name):  # blocks
                results.append(result)
            if len(results) == 0:
                warnings.warn("No results found for {}".format(name))
            elif len(results) > 1:
                warnings.warn('Multiple ({}) hits found for {}'.format(
                    len(results), name))
            else:
                return results[0]

        new_dict = partial_dict.copy()
        for name in set(names) - set(new_dict.keys()):
            got = fetch(name)
            time.sleep(sleep_secs_between)
            if got is not None:
                new_dict[name] = got
        return new_dict
コード例 #28
0
ファイル: chem_spider.py プロジェクト: lbuss/module-scripting
from chemspipy import ChemSpider

cs = ChemSpider('c48d4595-ead2-40e7-85c9-1e5d2a77754c')


def get_chem(query):
    chem = None
    results = cs.search(query)
    if results:
        name = results[0].common_name
        smiles = results[0].smiles
        chem = {'name': name, 'smiles': smiles}

    return chem


def get_smiles(query):
    chem = None
    results = cs.search(query)
    if results:
        smiles = results[0].smiles
        return smiles
    else:
        return None
コード例 #29
0
import CoolProp
from chemspipy import ChemSpider
from chemspipy_key import key  # private file with the key (DO NOT COMMIT!!)
import glob, json
cs = ChemSpider(key)

# Map from name to Chemspider ID
backup_map = {
    'Propyne': 6095,
    'R236EA': 71342,
    'R245ca': 62827,
    'trans-2-Butene': 56442,
    'Oxygen': 952,
    'Fluorine': 22932,
    'Hydrogen': 762,
    'Deuterium': 22931,
    'HFE143m': 66577,
    'SulfurHexafluoride': 16425,
    'R114': 13853215
}

# Make sure the key works
c = cs.get_compound(2157)
assert (c.inchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYAW')

for fname in glob.glob('../fluids/*.json'):
    with open(fname, 'r') as fp:
        jj = json.load(fp)

    fluid = jj['INFO']['NAME']
コード例 #30
0
    def process(self,
                input_text: str = "",
                input_file: str = "",
                output_file: str = "",
                output_file_sdf: str = "",
                sdf_append: bool = False,
                input_type: str = "",
                lang: str = "eng",
                paged_text: bool = False,
                format_output: bool = True,
                opsin_types: list = None,
                standardize_mols: bool = True,
                convert_ions: bool = True,
                write_header: bool = True,
                iob_format: bool = False,
                dry_run: bool = False,
                csv_delimiter: str = ";",
                normalize_text: bool = True,
                remove_duplicates: bool = False,
                annotate: bool = True,
                annotation_sleep: int = 2,
                chemspider_token: str = "",
                continue_on_failure: bool = False) -> OrderedDict:
        r"""
        Process the input file with ChemSpot.

        Parameters
        ----------
        input_text : str
            String to be processed by ChemSpot.
        input_file : str
            Path to file to be processed by ChemSpot.
        output_file : str
            File to write output in.
        output_file_sdf : str
            File to write SDF output in. SDF is from OPSIN converted entities.
        sdf_append : bool
            If True, append new molecules to existing SDF file or create new one if doesn't exist. SDF is from OPSIN converted entities.
        input_type : str
            | When empty, input (MIME) type will be determined from magic bytes.
            | Or you can specify "pdf", "pdf_scan", "image" or "text" and magic bytes check will be skipped.
        lang : str
            | Language which will Tesseract use for OCR. Available languages: https://github.com/tesseract-ocr/tessdata
            | Multiple languages can be specified with "+" character, i.e. "eng+bul+fra".
        paged_text : bool
            If True and `input_type` is "text" or `input_text` is provided, try to assign pages to chemical entities.
            ASCII control character 12 (Form Feed, '\f') is expected between pages.
        format_output : bool
            | If True, the value of "content" key of returned dict will be list of OrderedDicts.
            | If True and `output_file` is set, the CSV file will be written.
            | If False, the value of "content" key of returned dict will be None.
        opsin_types : list
            | List of ChemSpot entity types. Entities of types in this list will be converted with OPSIN. If you don't want
              to convert entities, pass empty list.
            | OPSIN is designed to convert IUPAC names to linear notation (SMILES etc.) so default value of `opsin_types`
              is ["SYSTEMATIC"] (these should be only IUPAC names).
            | ChemSpot entity types: "SYSTEMATIC", "IDENTIFIER", "FORMULA", "TRIVIAL", "ABBREVIATION", "FAMILY", "MULTIPLE"
        standardize_mols : bool
            If True, use molvs (https://github.com/mcs07/MolVS) to standardize molecules converted by OPSIN.
        convert_ions : bool
            If True, try to convert ion entities (e.g. "Ni(II)") to SMILES. Entities matching ion regex won't be converted
            with OPSIN.
        write_header : bool
            If True and if `output_file` is set and `output_format` is True, write a CSV write_header:
            "smiles", "bond_length", "resolution", "confidence", "learn", "page", "coordinates"
        iob_format : bool
            If True, output will be in IOB format.
        dry_run : bool
            If True, only return list of commands to be called by subprocess.
        csv_delimiter : str
            Delimiter for output CSV file.
        normalize_text : bool
            If True, normalize text before performing NER. It is strongly recommended to do so, because without normalization
            can ChemSpot produce unpredictable results which cannot be parsed.
        remove_duplicates : bool
            If True, remove duplicated chemical entities. Note that some entities-compounds can have different names, but
            same notation (SMILES, InChI etc.). This will only remove entities with same names. Not applicable for IOB format.
        annotate : bool
            | If True, try to annotate entities in PubChem and ChemSpider. Compound IDs will be assigned by searching with
              each identifier, separately for entity name, SMILES etc.
            | If entity has InChI key yet, prefer it in searching.
            | If "*" is present in SMILES, skip annotation.
            | If textual entity has single result in DB when searched by name, fill in missing identifiers (SMILES etc.).
        annotation_sleep: int
            How many seconds to sleep between annotation of each entity. It's for preventing overloading of databases.
        chemspider_token : str
            Your personal token for accessing the ChemSpider API (needed for annotation). Make account there to obtain it.
        continue_on_failure : bool
            | If True, continue running even if ChemSpot returns non-zero exit code.
            | If False and error occurs, print it and return.

        Returns
        -------
        dict
            Keys:

            - stdout: str ... standard output from ChemSpot
            - stderr: str ... standard error output from ChemSpot
            - exit_code: int ... exit code from ChemSpot
            - content

              - list of OrderedDicts ... when `format_output` is True
              - None ... when `format_output` is False

            - normalized_text : str
        """

        if opsin_types is None:
            opsin_types = ["SYSTEMATIC"]

        if input_text and input_file:
            input_file = ""
            self.logger.warning("Both 'input_text' and 'input_file' are set, but 'input_text' will be prefered.")
        elif not input_text and not input_file:
            raise ValueError("One of 'input_text' or 'input_file' must be set.")

        if not input_type and not input_text:
            possible_input_types = ["pdf", "image", "text"]
            input_type = get_input_file_type(input_file)
            if input_type not in possible_input_types:
                raise ValueError("Input file type ({}) is not one of {}".format(input_type, possible_input_types))
        elif input_type and not input_text:
            possible_input_types = ["pdf", "pdf_scan", "image", "text"]
            if input_type not in possible_input_types:
                raise ValueError("Unknown 'input_type'. Possible 'input_type' values are {}".format(possible_input_types))

        if input_type in ["pdf", "pdf_scan", "image"]:
            input_text, _ = get_text(input_file, input_type, lang=lang, tessdata_prefix=os.environ["TESSDATA_PREFIX"])
            input_file = ""

        if annotate and not chemspider_token:
            self.logger.warning("Cannot perform annotation in ChemSpider: 'chemspider_token' is empty.")

        options = ChainMap({k: v for k, v in {"iob_format": iob_format}.items() if v},
                           self.options_internal)
        output_file_temp = None

        commands, _, _ = self.build_commands(options, self._OPTIONS_REAL, self.path_to_binary)
        commands.insert(1, str(self.options_internal["max_memory"]))
        commands.append("-t")

        if normalize_text:
            normalizer = Normalizer(strip=True, collapse=True, hyphens=True, quotes=True, slashes=True, tildes=True, ellipsis=True)

            if input_file:
                with open(input_file, mode="r") as f:
                    input_text = f.read()

            input_text = normalizer(input_text)

            if not input_text:
                raise UserWarning("'input_text' is empty after normalization.")

            input_text = self.normalize_text(text=input_text)
            input_file_normalized = NamedTemporaryFile(mode="w", encoding="utf-8")
            input_file_normalized.write(input_text)
            input_file_normalized.flush()
            input_file = input_file_normalized.name
        else:
            if input_text:
                input_file_temp = NamedTemporaryFile(mode="w", encoding="utf-8")
                input_file_temp.write(input_text)
                input_file_temp.flush()
                input_file = input_file_temp.name

        commands.append(os.path.abspath(input_file))
        commands.append("-o")
        if format_output:
            output_file_temp = NamedTemporaryFile(mode="w", encoding="utf-8")
            commands.append(os.path.abspath(output_file_temp.name))
        else:
            commands.append(os.path.abspath(output_file))

        if dry_run:
            return " ".join(commands)

        stdout, stderr, exit_code = common_subprocess(commands)

        if "OutOfMemoryError" in stderr:
            raise RuntimeError("ChemSpot memory error: {}".format(stderr))

        to_return = {"stdout": stdout, "stderr": stderr, "exit_code": exit_code, "content": None,
                     "normalized_text": input_text if normalize_text else None}

        if not continue_on_failure and exit_code > 0:
            self.logger.warning("ChemSpot error:")
            eprint("\n\t".join("\n{}".format(stderr).splitlines()))
            return to_return

        if normalize_text:
            to_return["normalized_text"] = input_text

        if not format_output:
            return to_return
        elif format_output:
            with open(output_file_temp.name, mode="r", encoding="utf-8") as f:
                output_chs = f.read()

            entities = self.parse_chemspot_iob(text=output_chs) if iob_format else self.parse_chemspot(text=output_chs)
            to_return["content"] = entities

            if remove_duplicates and not iob_format:
                seen = set()
                seen_add = seen.add
                to_return["content"] = [x for x in to_return["content"] if not (x["entity"] in seen or seen_add(x["entity"]))]

            if input_type in ["pdf", "pdf_scan"] or paged_text:
                page_ends = []
                for i, page in enumerate(input_text.split("\f")):
                    if page.strip():
                        try:
                            page_ends.append(page_ends[-1] + len(page) - 1)
                        except IndexError:
                            page_ends.append(len(page) - 1)

            if opsin_types:
                if convert_ions:
                    to_convert = [x["entity"] for x in to_return["content"] if x["type"] in opsin_types and not self.re_ion.match(x["entity"])]
                else:
                    to_convert = [x["entity"] for x in to_return["content"] if x["type"] in opsin_types]

                if to_convert:
                    opsin = OPSIN(verbosity=self.verbosity)
                    opsin_converted = opsin.process(input=to_convert, output_formats=["smiles", "inchi", "inchikey"],
                                                    standardize_mols=standardize_mols, output_file_sdf=output_file_sdf,
                                                    sdf_append=sdf_append)
                    opsin_converted = iter(opsin_converted["content"])
                else:
                    self.logger.info("Nothing to convert with OPSIN.")

            if annotate:
                chemspider = ChemSpider(chemspider_token) if chemspider_token else None

            for i, ent in enumerate(to_return["content"]):
                if input_type in ["pdf", "pdf_scan"] or paged_text:
                    ent["page"] = str(bisect.bisect_left(page_ends, int(ent["start"])) + 1)

                if convert_ions:
                    match_ion = self.re_ion.match(ent["entity"])
                    if match_ion:
                        match_ion = match_ion.groupdict()
                        match_charge = self.re_charge.search(match_ion["charge"])
                        if match_charge:
                            match_charge = match_charge.groupdict()
                            if match_charge["roman"]:
                                smiles = "[{}+{}]".format(match_ion["ion"], len(match_charge["roman"]))
                            elif match_charge["digit"]:
                                if "+" in match_ion["charge"]:
                                    smiles = "[{}+{}]".format(match_ion["ion"], match_charge["digit"])
                                elif "-" in match_ion["charge"]:
                                    smiles = "[{}-{}]".format(match_ion["ion"], match_charge["digit"])
                            elif match_charge["signs"]:
                                smiles = "[{}{}{}]".format(match_ion["ion"], match_charge["signs"][0],
                                                           len(match_charge["signs"]))

                            mol = MolFromSmiles(smiles)
                            if mol:
                                inchi = MolToInchi(mol)
                                if inchi:
                                    ent.update(OrderedDict(
                                        [("smiles", smiles), ("inchi", inchi), ("inchikey", InchiToInchiKey(inchi))]))
                                else:
                                    ent.update(OrderedDict([("smiles", smiles), ("inchi", ""), ("inchikey", "")]))
                            else:
                                ent.update(OrderedDict([("smiles", ""), ("inchi", ""), ("inchikey", "")]))
                    else:
                        ent.update(OrderedDict([("smiles", ""), ("inchi", ""), ("inchikey", "")]))

                if opsin_types and to_convert:
                    if ent["entity"] in to_convert:
                        ent_opsin = next(opsin_converted)
                        ent.update(OrderedDict([("smiles", ent_opsin["smiles"]), ("inchi", ent_opsin["inchi"]),
                                                ("inchikey", ent_opsin["inchikey"]), ("opsin_error", ent_opsin["error"])]))
                    elif convert_ions and self.re_ion.match(ent["entity"]):
                        ent.update(OrderedDict([("opsin_error", "")]))
                    elif (convert_ions and not self.re_ion.match(ent["entity"])) or (not convert_ions and ent["entity"] not in to_convert):
                        ent.update(OrderedDict([("smiles", ""), ("inchi", ""), ("inchikey", ""), ("opsin_error", "")]))

                # TODO: this should be simplified...looks like garbage code
                if annotate:
                    self.logger.info("Annotating entity {}/{}...".format(i + 1, len(to_return["content"])))
                    ent.update(OrderedDict([("pch_cids_by_inchikey", ""), ("chs_cids_by_inchikey", ""),
                                            ("pch_cids_by_name", ""), ("chs_cids_by_name", ""),
                                            ("pch_cids_by_smiles", ""), ("chs_cids_by_smiles", ""),
                                            ("pch_cids_by_inchi", ""), ("chs_cids_by_inchi", ""),
                                            ("pch_cids_by_formula", ""),
                                            ("pch_iupac_name", ""), ("chs_common_name", ""),
                                            ("pch_synonyms", "")]))

                    # do "double-annotation": some entities can be found in only one DB, updated and then searched in second DB
                    found_in_pch = False
                    found_in_chs = False
                    for _ in range(2):
                        results = []

                        # prefer InChI key
                        if "inchikey" in ent and ent["inchikey"]:
                            try:
                                results = get_compounds(ent["inchikey"], "inchikey")
                                if results:
                                    if len(results) == 1:
                                        result = results[0]
                                        synonyms = result.synonyms
                                        if synonyms:
                                            ent["pch_synonyms"] = "\"{}\"".format("\",\"".join(synonyms))
                                        ent["pch_iupac_name"] = result.iupac_name
                                        if not found_in_chs:
                                            ent["smiles"] = result.canonical_smiles or ent["smiles"]
                                            ent["inchi"] = result.inchi or ent["inchi"]
                                            ent["inchikey"] = result.inchikey or ent["inchikey"]
                                    ent["pch_cids_by_inchikey"] = "\"{}\"".format(",".join([str(c.cid) for c in results]))
                            except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                pass

                            results = chemspider.search(ent["inchikey"]) if chemspider_token else []
                            if results:
                                if len(results) == 1:
                                    result = results[0]
                                    ent["chs_common_name"] = result.common_name
                                    if not found_in_pch:
                                        ent["smiles"] = result.smiles or ent["smiles"]
                                        ent["inchi"] = result.stdinchi or ent["inchi"]
                                        ent["inchikey"] = result.stdinchikey or ent["inchikey"]
                                ent["chs_cids_by_inchikey"] = "\"{}\"".format(",".join([str(c.csid) for c in results]))
                        else:
                            if (not found_in_pch and not found_in_chs) or (not found_in_pch and found_in_chs):
                                try:
                                    results = get_compounds(ent["entity"] or ent["abbreviation"], "name")
                                    if results:
                                        if len(results) == 1:
                                            found_in_pch = True
                                            result = results[0]
                                            synonyms = result.synonyms
                                            if synonyms:
                                                ent["pch_synonyms"] = "\"{}\"".format("\",\"".join(synonyms))
                                            # only update identifiers if they weren't found in second DB
                                            if not found_in_chs:
                                                ent["smiles"] = result.canonical_smiles or ent["smiles"]
                                                ent["inchi"] = result.inchi or ent["inchi"]
                                                ent["inchikey"] = result.inchikey or ent["inchikey"]
                                            ent["pch_iupac_name"] = result.iupac_name
                                        ent["pch_cids_by_name"] = "\"{}\"".format(",".join([str(c.cid) for c in results]))
                                except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                    pass

                            if (not found_in_pch and not found_in_chs) or (found_in_pch and not found_in_chs):
                                results = chemspider.search(ent["entity"] or ent["abbreviation"]) if chemspider_token else []
                                if results:
                                    if len(results) == 1:
                                        found_in_chs = True
                                        result = results[0]
                                        if not found_in_pch:
                                            ent["smiles"] = result.smiles or ent["smiles"]
                                            ent["inchi"] = result.stdinchi or ent["inchi"]
                                            ent["inchikey"] = result.stdinchikey or ent["inchikey"]
                                        ent["chs_common_name"] = result.common_name
                                    ent["chs_cids_by_name"] = "\"{}\"".format(",".join([str(c.csid) for c in results]))

                            for search_field, col_pch, col_chs in [("smiles", "pch_cids_by_smiles", "chs_cids_by_smiles"),
                                                                   ("inchi", "pch_cids_by_inchi", "chs_cids_by_inchi"),
                                                                   ("formula", "pch_cids_by_formula", "")]:
                                results_pch = []
                                results_chs = []

                                if search_field == "smiles" and "smiles" in ent and ent["smiles"] and "*" not in ent["smiles"]:
                                    if (not found_in_pch and not found_in_chs) or (not found_in_pch and found_in_chs):
                                        try:
                                            results_pch = get_compounds(ent["smiles"], "smiles")
                                        except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                            pass
                                    if (not found_in_pch and not found_in_chs) or (found_in_pch and not found_in_chs):
                                        results_chs = chemspider.search(ent["smiles"]) if chemspider_token else []
                                elif search_field == "inchi" and "inchi" in ent and ent["inchi"]:
                                    if (not found_in_pch and not found_in_chs) or (not found_in_pch and found_in_chs):
                                        try:
                                            results_pch = get_compounds(ent["inchi"], "inchi")
                                        except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                            pass
                                    if (not found_in_pch and not found_in_chs) or (found_in_pch and not found_in_chs):
                                        results_chs = chemspider.search(ent["inchi"]) if chemspider_token else []
                                elif search_field == "formula":
                                    if (not found_in_pch and not found_in_chs) or (not found_in_pch and found_in_chs):
                                        try:
                                            results_pch = get_compounds(ent["entity"], "formula")
                                        except (BadRequestError, NotFoundError, PubChemHTTPError, ResponseParseError, ServerError, TimeoutError, PubChemPyError):
                                            pass
                                    # ChemSpider doesn't have search field for 'formula'

                                if results_pch:
                                    ent[col_pch] = "\"{}\"".format(",".join([str(c.cid) for c in results_pch]))
                                if results_chs:
                                    ent[col_chs] = "\"{}\"".format(",".join([str(c.csid) for c in results_chs]))

                                sleep(0.5)

                        sleep(annotation_sleep)

                        if not found_in_pch and not found_in_chs:
                            break

            if output_file:
                dict_to_csv(to_return["content"], output_file=output_file, csv_delimiter=csv_delimiter, write_header=write_header)

        return to_return
コード例 #31
0
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException

from dotenv import load_dotenv
load_dotenv()

DISCORD_TOKEN = os.getenv('DISCORD_TOKEN')
CHEMSPIDER_TOKEN = os.getenv('CHEMSPIDER_TOKEN')
WOLFRAM_TOKEN = os.getenv('WOLFRAM_TOKEN')

cs = ChemSpider(CHEMSPIDER_TOKEN)
wolfram = wolframalpha.Client(WOLFRAM_TOKEN)

client = commands.Bot(command_prefix='!')

op = webdriver.ChromeOptions()
op.binary_location = os.getenv('GOOGLE_CHROME_BIN')
op.add_argument('--headless')
op.add_argument('--no-sandbox')
op.add_argument('--disable-dev-sh-usage')

driver = webdriver.Chrome(executable_path=os.getenv('CHROMEDRIVER_PATH'),
                          chrome_options=op)
# for local testing purposes only; comment out when deployed to Heroku
#driver = webdriver.Firefox()
コード例 #32
0
ファイル: modelforms.py プロジェクト: zhaojhao/DRP
class CompoundForm(forms.ModelForm):

    """
    A form for users to add compounds to the compound guide.

    Forces a check against the chemspider database to ensure no spurious compounds make their way into the compound guide.
    """

    CAS_ID = forms.CharField(label='CAS ID', required=False)
    """Adding this field, not in the database, allows users to match compounds to a CAS_ID without us incuring issues for storing them."""
    CSID = forms.IntegerField(label='Chemspider ID', min_value=1, error_messages={
                              'required': 'This value must be set or selected'})
    """If the user already knows the right value for this it allows them to skip a step."""

    class Meta:
        fields = ('labGroup', 'abbrev', 'CSID',
                  'name', 'CAS_ID', 'chemicalClasses')
        model = Compound
        help_texts = {
            'abbrev': 'A local abbreviation by which the compound is known.',
            'name': 'A common or IUPAC name for the compound.',
            'CAS_ID': 'The CAS number for the compound. Optional.',
            'CSID': 'The Chemspider ID for the compound. If this is not included, a list will be provided for you to choose from.'
        }

    def __init__(self, user, *args, **kwargs):
        """Overridden version of the init method allows us to place the user's lab groups as a restricted set."""
        super(CompoundForm, self).__init__(*args, **kwargs)
        self.compound = None
        self.chemSpider = ChemSpider(settings.CHEMSPIDER_TOKEN)
        self.fields['labGroup'].queryset = user.labgroup_set.all()
        if user.labgroup_set.all().exists():
            self.fields['labGroup'].empty_label = None

    def clean_CSID(self):
        """Check that the CSID is actually a valid id from chemspider."""
        searchResults = self.chemSpider.simple_search(
            self.cleaned_data['CSID'])
        if(len(searchResults) < 1):
            raise ValidationError(
                'The CSID you have provided is invalid', code='invalid_csid')
        else:
            self.compound = searchResults[0]
        return self.cleaned_data['CSID']

    def clean(self):
        """Verify that the CSID, CAS_ID (where supplied) and name are consistent."""
        self.cleaned_data = super(CompoundForm, self).clean()
        if self.cleaned_data.get('name'):
            nameResults = self.chemSpider.simple_search(
                self.cleaned_data['name'])
            if self.cleaned_data.get('CAS_ID') != '':
                CAS_IDResults = self.chemSpider.simple_search(
                    self.cleaned_data['CAS_ID'])
                compoundChoices = [
                    compound for compound in nameResults if compound in CAS_IDResults][0:10]
                # the CAS_ID always generates a more restrictive set
            else:
                compoundChoices = nameResults[0:10]
                # if the CAS_ID is not supplied, then we just create a subset
                # based on the name search alone

            if self.compound is None and len(compoundChoices) > 0:
                self.fields['CSID'] = forms.ChoiceField(choices=(
                    (choice.csid, choice.common_name) for choice in compoundChoices), widget=forms.widgets.RadioSelect)
                # in essence, if a CSID was not supplied, but the chemspider
                # search returned chemspider results, then we offer those
                # results to the user to make a selection.
                return self.cleaned_data
            elif self.compound is None:
                raise ValidationError(
                    'Your search terms failed to validate against the Chemspider database. Please contact a local administrator.', code='no_compounds')
            else:
                if self.compound not in nameResults:
                    raise ValidationError(
                        'The name provided was not valid for the CSID provided. Please change the entry, or contact your local administrator.', code='name_csid_conflict')
                elif self.cleaned_data.get('CAS_ID') and self.compound not in CAS_IDResults:
                    raise ValidationError(
                        'The CAS ID provided is not valid for the CSID provided. Remove, replace, or contact your local administrator.', 'name_cas_id_conflict')
                else:
                    return self.cleaned_data
        else:
            if self.compound is not None:
                # this is probably some of the most horrible code I have
                # written, but it is the only way to get this to work - Phil.
                data = self.data.copy()  # because otherwise the query dict is immutable
                # replace the data directly, as bad as that is...
                data['name'] = self.compound.common_name
                # manually input an error message which is less demanding (this
                # is actually canonical method)
                self._errors['name'] = self.error_class(
                    ['Please review this suggestion'])
                self.data = data  # override the old data
            return self.cleaned_data

    def save(self, commit=True):
        """Create (and if appropriate, saves) the compound instance, and adds Inchi and smiles from chemspider."""
        compound = super(CompoundForm, self).save(commit=False)
        csCompound = self.chemSpider.get_compound(compound.CSID)
        compound.INCHI = csCompound.inchi
        compound.smiles = csCompound.smiles
        compound.formula = csCompound.molecular_formula
        if commit:
            compound.save()
            self.save_m2m()
        return compound
コード例 #33
0
ファイル: make_descs.py プロジェクト: DajeRoma/clicc-flask
 def __init__(self,argv):
     ''' load input arguments'''
     self.CAS_file = argv[1]
     ''' my chemsphder token '''
     self.cs=ChemSpider('d1778a9f-c41f-41f6-920e-fc6d9ff739ca')
コード例 #34
0
ファイル: reimport_reactions.py プロジェクト: zhaojhao/DRP
    def handle(self, *args, **kwargs):
        """Handle the command call."""
        folder = kwargs['directory']
        start_at_reactions = kwargs['reactions']
        start_at_descriptors = kwargs['descriptors']
        start_at_quantities = kwargs['quantities']
        start_number = kwargs['start_number']
        delete_all = kwargs['delete_all']
        no_compound_prompts = kwargs['no_compound_prompts']
        start_at_delete = not (
            start_at_reactions or start_at_descriptors or start_at_quantities)

        if start_at_delete:
            self.stdout.write('Deleting reactions')
            if delete_all:
                PerformedReaction.objects.all().delete()
            else:
                with transaction.atomic():
                    with open(path.join(folder, 'performedReactions.tsv')) as reactions:
                        reader = csv.DictReader(reactions, delimiter='\t')
                        for i, r in enumerate(reader):
                            if start_at_delete and i < start_number:
                                continue
                            ref = convert_legacy_reference(r['reference'])
                            legacyID = r['id']
                            ps = PerformedReaction.objects.filter(
                                reference=ref)
                            if ps:
                                self.stdout.write(
                                    '{}: Deleting reaction with reference {}'.format(i, ref))
                                ps.delete()
                            ps = PerformedReaction.objects.filter(
                                reference=ref.lower())
                            if ps:
                                self.stdout.write(
                                    '{}: Deleting reaction with converted legacy reference {}'.format(i, ref))
                                ps.delete()
                            ps = PerformedReaction.objects.filter(
                                convertedLegacyRef=ref)
                            if ps:
                                self.stdout.write(
                                    '{}: Deleting reaction with converted legacy reference {}'.format(i, ref))
                                ps.delete()
                            ps = PerformedReaction.objects.filter(
                                legacyID=legacyID)
                            if ps:
                                self.stdout.write(
                                    '{}: Deleting reaction with legacy id {}'.format(i, legacyID))
                                ps.delete()

        if start_at_reactions or start_at_delete:
            warnings.simplefilter('error')
            with open(path.join(folder, 'performedReactions.tsv')) as reactions:
                self.stdout.write('Creating reactions')
                reader = csv.DictReader(reactions, delimiter='\t')
                for i, r in enumerate(reader):
                    if start_at_reactions and i < start_number:
                        continue
                    ref = convert_legacy_reference(r['reference'])
                    convertedLegacyRef = ref
                    ps = PerformedReaction.objects.filter(
                        convertedLegacyRef=convertedLegacyRef)
                    if ps.exists():
                        ref = '{}_{}'.format(ref, r['id'])
                        valid = False
                        notes = r[
                            'notes'] + ' Duplicate reference disambiguated with legacy id.'
                        for p in ps:
                            if p.convertedLegacyRef == p.reference:
                                p.valid = False
                                p.notes += u' Duplicate reference disambiguated with legacy id.'
                                p.reference = '{}_{}'.format(
                                    p.convertedLegacyRef, p.legacyID)
                                p.save(calcDescriptors=False)
                    else:
                        valid = bool(int(r['valid']))
                        notes = r['notes']

                    p = PerformedReaction(
                        reference=ref,
                        legacyRef=r['reference'],
                        convertedLegacyRef=convertedLegacyRef,
                        labGroup=LabGroup.objects.get(
                            title=r['labGroup.title']),
                        legacyID=r['id'],
                        notes=notes,
                        user=User.objects.get(username=r['user.username']),
                        valid=valid,
                        legacyRecommendedFlag=(
                            r['legacyRecommendedFlag'] == 'Yes'),
                        insertedDateTime=r['insertedDateTime'],
                        public=int(r['public'])
                    )
                    self.stdout.write(
                        '{}: Creating reaction with reference {}'.format(i, ref))
                    p.full_clean()
                    p.save(calcDescriptors=False)
        if start_at_delete or start_at_reactions or start_at_descriptors:
            with open(path.join(folder, 'performedReactions.tsv')) as reactions:
                self.stdout.write('Creating manual descriptors')
                reader = csv.DictReader(reactions, delimiter='\t')
                outValues = []
                outBoolValues = []
                purityValues = []
                temperatureValues = []
                timeValues = []
                pHValues = []
                preHeatStandingValues = []
                teflonValues = []
                slowCoolValues = []
                leakValues = []

                for i, r in enumerate(reader):
                    if start_at_descriptors and i < start_number:
                        continue
                    ref = convert_legacy_reference(r['reference'])

                    id = r['id']
                    self.stdout.write(
                        '{}: Reiterating for reaction with reference {}, legacyID {}'.format(i, ref, id))
                    p = PerformedReaction.objects.get(legacyID=id)
                    if r['duplicateOf.reference']:
                        convertedDupRef = convert_legacy_reference(
                            r['duplicateOf.reference'])
                        try:
                            p.duplicateOf = PerformedReaction.objects.get(
                                convertedLegacyRef=convertedDupRef)
                            p.save(calcDescriptors=False)
                        except PerformedReaction.DoesNotExist:
                            self.stderr.write('Reaction {} marked as duplicate of reaction {}, but the latter does not exist'.format(
                                ref, r['duplicateOf.reference']))
                            p.notes += 'Marked as duplicate of reaction with legacy reference {}, but it does not exist'.format(r[
                                                                                                                                'duplicateOf.reference'])
                            p.valid = False
                            p.save(calcDescriptors=False)
                        except PerformedReaction.MultipleObjectsReturned:
                            self.stderr.write('Reaction {} marked as duplicate of reaction {}, but more than one of the latter exists'.format(
                                ref, r['duplicateOf.reference']))
                            p.notes += 'Marked as duplicate of reaction with legacy reference {}, but more than one reaction with that reference exists'.format(r[
                                                                                                                                                                'duplicateOf.reference'])
                            p.valid = False
                            p.save(calcDescriptors=False)

                    outcomeValue = int(r['outcome']) if (
                        r['outcome'] in (str(x) for x in range(1, 5))) else None
                    try:
                        v = OrdRxnDescriptorValue.objects.get(
                            descriptor=outcomeDescriptor, reaction=p)
                        if v.value != outcomeValue:
                            v.value = outcomeValue
                            v.save()
                    except OrdRxnDescriptorValue.DoesNotExist:
                        outValue = outcomeDescriptor.createValue(
                            p, outcomeValue)
                        outValues.append(outValue)

                    value = True if (outcomeValue > 2) else False
                    try:
                        v = BoolRxnDescriptorValue.objects.get(
                            descriptor=outcomeBooleanDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except BoolRxnDescriptorValue.DoesNotExist:
                        outBoolValue = outcomeBooleanDescriptor.createValue(
                            p, value)
                        outBoolValues.append(outBoolValue)

                    value = int(r['purity']) if (
                        r['purity'] in ('1', '2')) else None
                    try:
                        v = OrdRxnDescriptorValue.objects.get(
                            descriptor=purityDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except OrdRxnDescriptorValue.DoesNotExist:
                        purityValue = purityDescriptor.createValue(p, value)
                        purityValues.append(purityValue)

                    value = (float(r['temp']) + 273.15) if (r['temp']
                                                            not in ('', '?')) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(
                            descriptor=temperatureDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        temperatureDescriptorValue = temperatureDescriptor.createValue(
                            p, value)
                        temperatureValues.append(temperatureDescriptorValue)

                    value = float(r['time']) * 60 if (r['time']
                                                      not in ['', '?']) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(
                            descriptor=timeDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        timeDescriptorValue = timeDescriptor.createValue(
                            p, value)
                        timeValues.append(timeDescriptorValue)

                    value = float(r['pH']) if (
                        r['pH'] not in ('', '?')) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(
                            descriptor=pHDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        pHDescriptorValue = pHDescriptor.createValue(p, value)
                        pHValues.append(pHDescriptorValue)

                    value = bool(r['pre_heat standing']) if (
                        r.get('pre_heat standing') not in ('', None)) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(
                            descriptor=preHeatStandingDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        preHeatStandingDescriptorValue = preHeatStandingDescriptor.createValue(
                            p, value)
                        preHeatStandingValues.append(
                            preHeatStandingDescriptorValue)

                    value = bool(int(r['teflon_pouch'])) if (
                        r.get('teflon_pouch') not in(None, '')) else None
                    try:
                        v = BoolRxnDescriptorValue.objects.get(
                            descriptor=teflonDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except BoolRxnDescriptorValue.DoesNotExist:
                        teflonDescriptorValue = teflonDescriptor.createValue(
                            p, value)
                        teflonValues.append(teflonDescriptorValue)

                    leak_string = r['leak']
                    if leak_string in (None, '', '?'):
                        value = None
                    elif leak_string.lower() == 'yes':
                        value = True
                    elif leak_string.lower() == 'no':
                        value = False
                    else:
                        raise RuntimeError(
                            "Unrecognized string '{}' in leak column".format(leak_string))
                    try:
                        v = BoolRxnDescriptorValue.objects.get(
                            descriptor=leakDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except BoolRxnDescriptorValue.DoesNotExist:
                        leakDescriptorValue = leakDescriptor.createValue(
                            p, value)
                        leakValues.append(leakDescriptorValue)

                    slow_cool_string = r['slow_cool']
                    if slow_cool_string in (None, '', '?'):
                        value = None
                    elif slow_cool_string.lower() == 'yes':
                        value = True
                    elif slow_cool_string.lower() == 'no':
                        value = False
                    else:
                        raise RuntimeError(
                            "Unrecognized string '{}' in slow_cool column".format(slow_cool_string))
                    try:
                        v = BoolRxnDescriptorValue.objects.get(
                            descriptor=slowCoolDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except BoolRxnDescriptorValue.DoesNotExist:
                        slowCoolDescriptorValue = slowCoolDescriptor.createValue(
                            p, value)
                        slowCoolValues.append(slowCoolDescriptorValue)

                    if len(outValues) > save_at_once:
                        self.stdout.write("Saving outValues...")
                        OrdRxnDescriptorValue.objects.bulk_create(outValues)
                        outValues = []
                        self.stdout.write("...saved")
                    if len(outBoolValues) > save_at_once:
                        self.stdout.write("Saving outBoolValues...")
                        BoolRxnDescriptorValue.objects.bulk_create(
                            outBoolValues)
                        outBoolValues = []
                        self.stdout.write("...saved")
                    if len(purityValues) > save_at_once:
                        self.stdout.write("Saving purityValues...")
                        OrdRxnDescriptorValue.objects.bulk_create(purityValues)
                        purityValues = []
                        self.stdout.write("...saved")
                    if len(temperatureValues) > save_at_once:
                        self.stdout.write("Saving temperatureValues...")
                        NumRxnDescriptorValue.objects.bulk_create(
                            temperatureValues)
                        temperatureValues = []
                        self.stdout.write("...saved")
                    if len(timeValues) > save_at_once:
                        self.stdout.write("Saving timeValues...")
                        NumRxnDescriptorValue.objects.bulk_create(timeValues)
                        timeValues = []
                        self.stdout.write("...saved")
                    if len(pHValues) > save_at_once:
                        self.stdout.write("Saving pHValues...")
                        NumRxnDescriptorValue.objects.bulk_create(pHValues)
                        pHValues = []
                        self.stdout.write("...saved")
                    if len(preHeatStandingValues) > save_at_once:
                        self.stdout.write("Saving preHeatStandingValues...")
                        NumRxnDescriptorValue.objects.bulk_create(
                            preHeatStandingValues)
                        preHeatStandingValues = []
                        self.stdout.write("...saved")
                    if len(teflonValues) > save_at_once:
                        self.stdout.write("Saving teflonValues...")
                        BoolRxnDescriptorValue.objects.bulk_create(
                            teflonValues)
                        teflonValues = []
                        self.stdout.write("...saved")
                    if len(leakValues) > save_at_once:
                        self.stdout.write("Saving leakValues...")
                        BoolRxnDescriptorValue.objects.bulk_create(leakValues)
                        leakValues = []
                        self.stdout.write("...saved")
                    if len(slowCoolValues) > save_at_once:
                        self.stdout.write("Saving slowCoolValues...")
                        BoolRxnDescriptorValue.objects.bulk_create(
                            slowCoolValues)
                        slowCoolValues = []
                        self.stdout.write("...saved")

                self.stdout.write("Saving all remaining values...")
                OrdRxnDescriptorValue.objects.bulk_create(outValues)
                BoolRxnDescriptorValue.objects.bulk_create(outBoolValues)
                OrdRxnDescriptorValue.objects.bulk_create(purityValues)
                NumRxnDescriptorValue.objects.bulk_create(temperatureValues)
                NumRxnDescriptorValue.objects.bulk_create(timeValues)
                NumRxnDescriptorValue.objects.bulk_create(pHValues)
                NumRxnDescriptorValue.objects.bulk_create(
                    preHeatStandingValues)
                BoolRxnDescriptorValue.objects.bulk_create(teflonValues)
                BoolRxnDescriptorValue.objects.bulk_create(leakValues)
                BoolRxnDescriptorValue.objects.bulk_create(slowCoolValues)

                outValues = []
                outBoolValues = []
                purityValues = []
                temperatureValues = []
                timeValues = []
                pHValues = []
                preHeatStandingValues = []
                teflonValues = []
                leakValues = []
                slowCoolValues = []
                self.stdout.write("...saved")

        with open(path.join(folder, 'compoundquantities.tsv')) as cqs, open(path.join(folder, 'compoundquantities_fixed.tsv'), 'a') as fixed_cqs:
            self.stdout.write('Creating or updating compound quantities')
            reader = csv.DictReader(cqs, delimiter='\t')
            writer = csv.DictWriter(
                fixed_cqs, reader.fieldnames + ['compound.old_abbrev'], delimiter='\t')
            if not (start_at_quantities and start_number > 0):
                writer.writeheader()
            quantities = []
            cs = ChemSpider(settings.CHEMSPIDER_TOKEN)
            for i, r in enumerate(reader):
                if start_at_quantities and (i < start_number):
                    continue
                if not (r['compound.abbrev'] or r['compoundrole.name'] or r['amount']):
                    # this is just a blank entry
                    continue

                legacyID = r['reaction.id']
                reaction = PerformedReaction.objects.get(legacyID=legacyID)
                compound_abbrev = r['compound.abbrev']

                correct_abbrev = reagent_dict[
                    compound_abbrev] if compound_abbrev in reagent_dict else compound_abbrev

                compound_found = False
                while correct_abbrev and not compound_found:
                    try:
                        compound = Compound.objects.get(
                            abbrev=correct_abbrev, labGroup=reaction.labGroup)
                        compound_found = True
                        r['compound.old_abbrev'] = r['compound.abbrev']
                        r['compound.abbrev'] = correct_abbrev
                    except Compound.DoesNotExist:
                        if no_compound_prompts:
                            correct_abbrev = ''
                        else:
                            self.stderr.write(
                                'Could not find compound with abbreviation {}. Checking chemspider...'.format(correct_abbrev))
                            results = cs.simple_search(correct_abbrev)
                            if len(results) != 1:
                                CSID = raw_input(
                                    'Could not find unique compound with abbreviation {}. Do you know the CSID? '.format(correct_abbrev))
                                if CSID.isdigit():
                                    results = cs.simple_search(CSID)
                            if len(results) == 1:
                                compound = None
                                try:
                                    compound = Compound.objects.get(
                                        CSID=results[0].csid, labGroup=reaction.labGroup)
                                except Compound.DoesNotExist:
                                    pass
                                user_response = None
                                while user_response is None:
                                    if compound is None:
                                        user_verification = raw_input('Found unique compound with CSID {} and name {} for abbreviation {}. This is NOT IN THE COMPOUND GUIDE. Is this correct? (y/n): '.format(
                                            results[0].csid, results[0].common_name, correct_abbrev))
                                    else:
                                        user_verification = raw_input('Found unique compound with CSID {}, name {}, and abbreviation {} for abbreviation {} in the compound guide. Is this correct? (y/n): '.format(
                                            compound.CSID, compound.name, compound.abbrev, correct_abbrev))
                                    if user_verification and user_verification.lower()[0] == 'y':
                                        user_response = True
                                    elif user_verification and user_verification.lower()[0] == 'n':
                                        user_response = False
                                if user_response:
                                    if compound is not None:
                                        correct_abbrev = compound.abbrev
                                        reagent_dict[
                                            compound_abbrev] = correct_abbrev
                                        continue
                                    else:
                                        self.stderr.write('Creating compound with CSID {}, abbrevation {}, name {}'.format(
                                            results[0].csid, correct_abbrev, results[0].common_name))
                                        c = Compound(
                                            CSID=results[0].csid, labGroup=reaction.labGroup, abbrev=correct_abbrev)
                                        try:
                                            c.csConsistencyCheck()
                                            c.save(invalidateReactions=False)
                                            continue
                                        except ValidationError:
                                            c.delete()
                                            raise

                            self.stderr.write(
                                'Could not get unambiguous chemspider entry for abbreviation {}'.format(correct_abbrev))
                            self.stderr.write('Unknown Reactant {} with amount {} {} in reaction {}'.format(
                                r['compound.abbrev'], r['amount'], r['unit'], r['reaction.reference']))
                            correct_abbrev = raw_input(
                                'What is the correct abbreviation for this? ')
                            reagent_dict[compound_abbrev] = correct_abbrev

                if compound_found:
                    self.stdout.write('{}: Creating quantity for compound {} and reaction {}'.format(
                        i, compound.abbrev, reaction.reference))
                    if r['compound.abbrev'] in ('water', 'H2O'):
                        r['density'] = 1
                    try:
                        mw = NumMolDescriptorValue.objects.get(
                            compound=compound, descriptor__heading='mw').value
                    except NumMolDescriptorValue.DoesNotExist:
                        compound.save(invalidateReactions=False)
                        mw = NumMolDescriptorValue.objects.get(
                            compound=compound, descriptor__heading='mw').value

                    if r['compound.old_abbrev'] is not None and r['compound.old_abbrev'] != r['compound.abbrev']:
                        reaction.notes += ' Compound abbreviation {} changed to {}'.format(
                            r['compound.old_abbrev'], r['compound.abbrev'])
                        reaction.save(calcDescriptors=False)
                    if r['compoundrole.name'] == 'pH':
                        reaction.notes += ' pH adjusting reagent used: {}, {}{}'.format(
                            compound, r['amount'], r['unit'])
                        reaction.save(calcDescriptors=False)
                    else:
                        compoundrole = None
                        while compoundrole is None and r['compoundrole.name'] in (None, '', '?'):
                            if r['compoundrole.name'] in (None, '', '?'):
                                classes = compound.chemicalClasses.all()
                                if classes.count() > 1:
                                    self.stderr.write(
                                        '{} has more than one chemical class: {}'.format(compound, classes))
                                    role_label = raw_input('Which is the correct role for reagent {} in reaction {} with amount {} {}'.format(
                                        compound, reaction, r['amount'], r['unit']))
                                elif classes.count() == 0:
                                    self.stderr.write(
                                        '{} has no chemical classes'.format(compound))
                                    role_label = raw_input(
                                        'What chemical class does {} belong to? '.format(compound))
                                    cc = ChemicalClass.objects.get(
                                        label=role_label)
                                    compound.chemicalClasses.add(cc)
                                    # Sanity check
                                    assert(
                                        compound.chemicalClasses.all().count() == 1)
                                else:  # count == 1
                                    role_label = classes[0].label
                                self.stderr.write('No reaction role listed for reagent {} with amount {} {} in reaction {}. '
                                                  'Using chemical class {}'.format(compound, reaction, r['amount'], r['unit'], role_label))
                                r['compoundrole.name'] = role_label
                            else:
                                role_label = r['compoundrole.name']
                            if not role_label:
                                reaction.notes += ' No role for reactant {} with amount {} {}'.format(
                                    r['compound.abbrev'], r['amount'], r['unit'])
                                reaction.save(calcDescriptors=False)
                            else:
                                try:
                                    compoundrole = CompoundRole.objects.get(
                                        label=role_label)
                                except CompoundRole.DoesNotExist:
                                    user_response = None
                                    if role_label in role_dict:
                                        new_role_label = role_dict[role_label]
                                    else:
                                        while user_response is None:
                                            user_verification = raw_input(
                                                'Compound role {} does not exist. Would you like to add it? ')
                                            if user_verification and user_verification.lower()[0] == 'y':
                                                user_response = True
                                            elif user_verification and user_verification.lower()[0] == 'n':
                                                user_response = False
                                        if user_response:
                                            compoundrole = CompoundRole.objects.create(
                                                label=role_label)
                                        else:
                                            new_role_label = raw_input(
                                                'What should this label be? ')
                                            role_dict[
                                                role_label] = new_role_label
                                            r['compoundrole.name'] = new_role_label

                                self.stdout.write('\tadding {} with role {} to {}'.format(
                                    compound.abbrev, role_label, reaction.reference))
                                if r['amount'] in ('', '?'):
                                    amount = None
                                    reaction.notes += ' No amount for reactant {} with role {}'.format(
                                        r['compound.abbrev'], r['compoundrole.name'])
                                    reaction.save(calcDescriptors=False)
                                elif r['unit'] == 'g':
                                    amount = float(r['amount']) / mw
                                elif r['unit'] == 'd' or r['unit'] == 'mL':
                                    valid_density = False
                                    while not valid_density:
                                        if compound.abbrev in density_dict:
                                            r['density'] = density_dict[
                                                compound.abbrev]
                                        try:
                                            density = float(r['density'])
                                            valid_density = True
                                        except (TypeError, ValueError):
                                            self.stderr.write("Density '{}' cannot be converted to float. (Compound {} with amount {} {} in reaction {})".format(
                                                r['density'], compound, r['amount'], r['unit'], reaction))
                                            r['density'] = raw_input(
                                                'What is the density? ')
                                    density_dict[compound.abbrev] = r[
                                        'density']
                                    if r['unit'] == 'd':
                                        amount = float(
                                            r['amount']) * 0.0375 * density / mw
                                    elif r['unit'] == 'mL':
                                        amount = float(
                                            r['amount']) * density / mw
                                else:
                                    raise RuntimeError('invalid unit entered')
                                # convert to millimoles
                                if amount is not None:
                                    amount = (amount * 1000)
                                cqq = CompoundQuantity.objects.filter(
                                    compound=compound, reaction=reaction)
                                if cqq.exists():
                                    cqq.delete()

                                quantity = CompoundQuantity(
                                    compound=compound, reaction=reaction, role=compoundrole, amount=amount)
                                quantities.append(quantity)

                            if len(quantities) > save_at_once:
                                self.stdout.write('Saving...')
                                CompoundQuantity.objects.bulk_create(
                                    quantities)
                                quantities = []

                else:
                    self.stderr.write('Unknown Reactant {} with amount {} {} in reaction {}'.format(
                        r['compound.abbrev'], r['amount'], r['unit'], r['reaction.reference']))
                    reaction.notes += ' Unknown Reactant {} with amount {} {}'.format(
                        r['compound.abbrev'], r['amount'], r['unit'])
                    reaction.valid = False
                    reaction.save(calcDescriptors=False)

                writer.writerow(r)

        self.stdout.write('Saving...')
        CompoundQuantity.objects.bulk_create(quantities)
        quantities = []
コード例 #35
0
ファイル: collect_smiles.py プロジェクト: yyfdemajia/Scripts
        description="Script to obtain SMILES for a solutes in a list")
    argparser.add_argument('-db', '--db', help="the molecule database")
    argparser.add_argument('-solvent',
                           '--solvent',
                           help="the solvent",
                           default="water")
    argparser.add_argument('-solutes', '--solutes', help="the list of solutes")
    args = argparser.parse_args()

    db = dblib.SolvDb(filename=args.db, type="abs", filehandle="^0")
    solutes = [s.strip() for s in open(args.solutes, 'r').readlines()]

    if os.getenv("SPIDERKEY") is None:
        print "SPIDERKEY environmental variable not set! Exit."
        quit()
    cs = ChemSpider(os.getenv("SPIDERKEY"))

    # Loop over all the database entries in the solute lists
    n = 0
    for entry in db.itersolutelist(args.solvent, solutes):
        if os.path.exists(entry.FileHandle + ".smi"): continue
        hits = cs.search(entry.SoluteName)
        if len(hits) > 0:
            smi = hits[0].smiles
            with open(entry.FileHandle + ".smi", "w") as f:
                f.write("%s\n" % smi)
        else:
            print entry.SoluteName, entry.FileHandle
        n += 1

    print "Looped over %d solutes" % n
コード例 #36
0
    def handle(self, *args, **kwargs):
        folder = kwargs['directory']
        #if not path.isfile(path.join(folder, 'performedReactionsNoDupsLower.tsv')):
            #self.stdout.write('Writing file with all references that were uppercase (now lower) and duplicate references disambiguated (arbitrarily)')
            #with open(path.join(folder, 'performedReactions.tsv')) as in_file, open(path.join(folder, 'performedReactionsNoDupsLower.tsv'), 'w') as out_file:
                #references = set()
                #reader = csv.DictReader(in_file, delimiter='\t')
                #writer = csv.DictWriter(out_file, delimiter='\t', fieldnames=reader.fieldnames)
                #writer.writeheader()

                #case_count = 0
                #valid_case_count = 0
                #dup_count = 0
                #for r in reader:
                    #ref = r['reference'].lower()
                    #if ref != r['reference']:
                        #self.stderr.write('Reference {} was not in lowercase. Converted.'.format(r['reference']))
                        #case_count += 1
                        #if r['valid'] == '1':
                            #valid_case_count += 1

                        #if ref in references:
                            #r['notes'] += ' Duplicated reference'
                            #r['valid'] = 0
                            #dup_count += 1
                            #i = 1
                            #new_ref = ref
                            #while new_ref in references:
                                #new_ref = '{}_dup{}'.format(ref, i)
                                #i += 1
                            #self.stderr.write('Reference {} duplicated {} times. Renamed and invalidated'.format(ref, i))
                            #ref = new_ref
                        #references.add(ref)
                        #r['reference'] = ref
                        #writer.writerow(r)
            #self.stderr.write('{} references converted to lowercase. {} were valid'.format(case_count, valid_case_count))
            #self.stderr.write('{} references with _dupX appended to remove duplicate reference'.format(dup_count))
        #with open(path.join(folder, 'performedReactionsNoDupsLower.tsv')) as reactions:
            #reader = csv.DictReader(reactions, delimiter='\t')
            #for r in reader:
                #if not PerformedReaction.objects.filter(reference=r['reference'].lower()).exists():
                    #p = PerformedReaction(
                        #reference = r['reference'],
                        #labGroup = LabGroup.objects.get(title=r['labGroup.title']),
                        #notes = r['notes'],
                        #user = User.objects.get(username=r['user.username']),
                        #valid = int(r['valid']),
                        #legacyRecommendedFlag=r['legacyRecommendedFlag']=='Yes',
                        #insertedDateTime=r['insertedDateTime'],
                        #public=int(r['public'])
                        #)
                    #self.stdout.write('Creating reaction with reference {}'.format(p.reference))
                    #p.validate_unique()
                    #p.save(calcDescriptors=False)
        with open(path.join(folder, 'performedReactionsNoDupsLower.tsv')) as reactions:
            reader = csv.DictReader(reactions, delimiter='\t')
            outValues = []
            outBoolValues = []
            purityValues = []
            temperatureValues = []
            timeValues = []
            pHValues = []
            preHeatStandingValues = []
            teflonValues = []
            
            for r in reader:
                self.stdout.write('Reiterating for reaction with reference {}'.format(r['reference'].lower()))
                ps = PerformedReaction.objects.filter(reference=r['reference'].lower())
                if ps.count() > 1:
                    ps = ps.filter(valid=True)
                if ps.exists():
                    if ps.count() > 1:
                        raise RuntimeError('{} has more than one reaction'.format(r['reference'].lower()))
                    p = ps[0]
                    try:
                        p.duplicateOf = PerformedReaction.objects.get(reference=r['duplicateOf.reference'].lower())
                        p.save()
                    except PerformedReaction.DoesNotExist:
                        pass

                    #outValue = OrdRxnDescriptorValue.objects.get_or_create(descriptor=outcomeDescriptor, reaction=p)[0]
                    outcomeValue = int(r['outcome']) if (r['outcome'] in (str(x) for x in range (1, 5))) else None
                    try:
                        v = OrdRxnDescriptorValue.objects.get(descriptor=outcomeDescriptor, reaction=p)
                        if v.value != outcomeValue:
                            v.value = outcomeValue
                            v.save()
                    except OrdRxnDescriptorValue.DoesNotExist:
                        outValue = outcomeDescriptor.createValue(p, outcomeValue)
                        #outValue.save()
                        outValues.append(outValue)
                    
                    #outBoolValue = BoolRxnDescriptorValue.objects.get_or_create(descriptor=outcomeBooleanDescriptor, reaction=p)[0]
                    value = True if (outcomeValue > 2) else False
                    try:
                        v = BoolRxnDescriptorValue.objects.get(descriptor=outcomeBooleanDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except BoolRxnDescriptorValue.DoesNotExist:
                        #outBoolValue.save()
                        outBoolValue = outcomeBooleanDescriptor.createValue(p, value)
                        outBoolValues.append(outBoolValue)
                    
                    #purityValue = OrdRxnDescriptorValue.objects.get_or_create(descriptor=purityDescriptor, reaction=p)[0]
                    value = int(r['purity']) if (r['purity'] in ('1', '2')) else None
                    try:
                        v = OrdRxnDescriptorValue.objects.get(descriptor=purityDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except OrdRxnDescriptorValue.DoesNotExist:
                        #purityValue.save()
                        purityValue = purityDescriptor.createValue(p, value)
                        purityValues.append(purityValue)
                    
                    #temperatureDescriptorValue = NumRxnDescriptorValue.objects.get_or_create(descriptor=temperatureDescriptor, reaction=p)[0]
                    value = (float(r['temp']) + 273.15) if (r['temp'] not in ('', '?')) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(descriptor=temperatureDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        #temperatureDescriptorValue.save()
                        temperatureDescriptorValue = temperatureDescriptor.createValue(p, value)
                        temperatureValues.append(temperatureDescriptorValue)
                    
                    #timeDescriptorValue = NumRxnDescriptorValue.objects.get_or_create(descriptor=timeDescriptor, reaction=p)[0]
                    value = float(r['time'])*60 if (r['time'] not in ['', '?']) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(descriptor=timeDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        #timeDescriptorValue.save()
                        timeDescriptorValue = timeDescriptor.createValue(p, value)
                        timeValues.append(timeDescriptorValue)
                    
                    #pHDescriptorValue = NumRxnDescriptorValue.objects.get_or_create(descriptor=pHDescriptor, reaction=p)[0]
                    value = float(r['pH']) if (r['pH'] not in ('', '?')) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(descriptor=pHDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        #pHDescriptorValue.save()
                        pHDescriptorValue = pHDescriptor.createValue(p, value)
                        pHValues.append(pHDescriptorValue)
                    
                    #preHeatStandingDescriptorValue = NumRxnDescriptorValue.objects.get_or_create(descriptor=preHeatStandingDescriptor, reaction=p)[0]
                    value = bool(r['pre_heat standing']) if (r.get('pre_heat standing') not in ('', None)) else None
                    try:
                        v = NumRxnDescriptorValue.objects.get(descriptor=preHeatStandingDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except NumRxnDescriptorValue.DoesNotExist:
                        #preHeatStandingDescriptorValue.save()
                        preHeatStandingDescriptorValue = preHeatStandingDescriptor.createValue(p, value)
                        preHeatStandingValues.append(preHeatStandingDescriptorValue)
                    
                    #teflonDescriptorValue = BoolRxnDescriptorValue.objects.get_or_create(descriptor=teflonDescriptor, reaction=p)[0]
                    value = bool(int(r['teflon_pouch'])) if (r.get('teflon_pouch') not in(None, '')) else None
                    try:
                        v = BoolRxnDescriptorValue.objects.get(descriptor=teflonDescriptor, reaction=p)
                        if v.value != value:
                            v.value = value
                            v.save()
                    except BoolRxnDescriptorValue.DoesNotExist:
                        #teflonDescriptorValue.save()
                        teflonDescriptorValue = teflonDescriptor.createValue(p, value)
                        teflonValues.append(teflonDescriptorValue)

                    if len(outValues) > 500:
                        self.stdout.write("Saving...")
                        OrdRxnDescriptorValue.objects.bulk_create(outValues)
                        BoolRxnDescriptorValue.objects.bulk_create(outBoolValues)
                        OrdRxnDescriptorValue.objects.bulk_create(purityValues)
                        NumRxnDescriptorValue.objects.bulk_create(temperatureValues)
                        NumRxnDescriptorValue.objects.bulk_create(timeValues)
                        NumRxnDescriptorValue.objects.bulk_create(pHValues)
                        NumRxnDescriptorValue.objects.bulk_create(preHeatStandingValues)
                        BoolRxnDescriptorValue.objects.bulk_create(teflonValues)
                        
                        outValues = []
                        outBoolValues = []
                        purityValues = []
                        temperatureValues = []
                        timeValues = []
                        pHValues = []
                        preHeatStandingValues = []
                        teflonValues = []
                        self.stdout.write("...saved")

        with open(path.join(folder, 'compound_labs.tsv')) as compounds:
            reader = csv.DictReader(compounds, delimiter='\t')
            cs = ChemSpider(settings.CHEMSPIDER_TOKEN)
            for r in reader:
                l = LabGroup.objects.get(title=r['labGroup.title'])
                if not Compound.objects.filter(abbrev=r['abbrev']).exists():
                    self.stdout.write('Importing compound with abbreviation {} and name {}'.format(r['abbrev'], r['name']))
                    if r.get('custom') != '1':
                        try: 
                            if r.get('CSID') not in ('', None):
                                c = Compound(CSID=r['CSID'], labGroup=l, abbrev=r['abbrev'])
                                c.csConsistencyCheck()
                                c.save()
                            else:
                                if r.get('CAS_ID') not in (None, ''):
                                    CASResults = cs.simple_search(r['CAS_ID'])
                                else:
                                    CASResults = []
                                if len(CASResults) != 1:
                                        nameResults = cs.simple_search(r.get('name'))
                                        if len(nameResults) != 1:
                                            raise RuntimeError('Could not get unambiguous chemspider entry for CAS ID {} with name {}. Got {} responses'.format(r['CAS_ID'], r['name'], len(CASResults)))
                                        else:
                                            c = Compound(CSID=nameResults[0].csid, labGroup=l, abbrev=r['abbrev'])
                                            c.csConsistencyCheck()
                                            c.save()
                                else:
                                    c = Compound(CSID=CASResults[0].csid, labGroup=l, abbrev=r['abbrev'])
                                    c.csConsistencyCheck()
                                    c.save()
                        except ValidationError as e:
                            c.delete()
                            raise e
                    else:
                        if r.get('INCHI') is None:
                            r['INCHI'] = ''
                        if r.get('smiles') is None:
                            r['smiles'] = ''
                        c = Compound.objects.get_or_create(labGroup=l, custom=True, name=r['name'], abbrev=r['abbrev'], formula=r['formula'], smiles=r['smiles'], INCHI=r['INCHI'])[0]
                    self.stdout.write(c.name.encode('utf-8'))
                    c.save()
        with open(path.join(folder, 'compound_chemicalClasses.tsv')) as chemicalClasses:
            reader = csv.DictReader(chemicalClasses, delimiter='\t')
            for r in reader:
                self.stdout.write('working with class {}'.format(r['chemicalClass.label']))
                cs = Compound.objects.filter(abbrev=r['compound.abbrev'])
                if cs.count() > 0:
                    c1 = ChemicalClass.objects.get_or_create(label=r['chemicalClass.label'])[0]
                    for c2 in cs:
                        if not c1 in c2.chemicalClasses.all():
                            c2.chemicalClasses.add(c1)
                            c2.save()
        with open(path.join(folder, 'compoundquantities.tsv')) as cqs:
            reader = csv.DictReader(cqs, delimiter='\t')
            quantities = []
            for r in reader:
                try:
                    reaction = PerformedReaction.objects.get(reference=r['reaction.reference'].lower())
                    compound = Compound.objects.get(abbrev=r['compound.abbrev'], labGroup=reaction.labGroup)
                    if r['compound.abbrev'] in ('water', 'H2O'):
                        r['density'] = 1
                    mw = NumMolDescriptorValue.objects.get(compound=compound, descriptor__heading='mw').value
                    if r['compoundrole.name'] != 'pH':
                        self.stdout.write('adding {} to {}'.format(compound.abbrev, reaction.reference))
                        compoundrole = CompoundRole.objects.get_or_create(label=r['compoundrole.name'])[0]
                        if r['amount'] in ('', '?'):
                            amount = None
                        elif r['unit'] == 'g':
                            amount = float(r['amount'])/mw
                        elif r['unit'] == 'd':
                            amount = float(r['amount'])*0.0375*float(r['density'])/mw
                        elif r['unit'] == 'mL':
                            amount = float(r['amount'])*float(r['density'])/mw
                        else:
                            raise RuntimeError('invalid unit entered')
                        if amount is not None:
                            amount = (amount * 1000)
                        cqq = CompoundQuantity.objects.filter(role=compoundrole, compound=compound, reaction=reaction)
                        if cqq.count() > 1:
                            cqq.delete()
                        elif cqq.count() == 0:
                            quantity = CompoundQuantity(role=compoundrole, compound=compound, reaction=reaction)
                            quantity.amount = amount
                            #quantities.append(quantity)
                            quantity.save(recalculate=False)
                    else:
                        reaction.notes += ' pH adjusting reagent used: {}, {}{}'.format(r['compound.abbrev'], r['amount'], r['unit'])
                        reaction.save(calcDescriptors=False)
                except Compound.DoesNotExist as e:
                    self.stderr.write('Unknown Reactant {} with amount {} {} in reaction {}'.format(r['compound.abbrev'], r['amount'], r['unit'], r['reaction.reference']))
                    reaction.notes += ' Unknown Reactant {} with amount {} {}'.format(r['compound.abbrev'], r['amount'], r['unit'])
                    reaction.valid = False
                    reaction.save(calcDescriptors=False)
                except PerformedReaction.DoesNotExist as e:
                    raise e
コード例 #37
0
ファイル: make_descs.py プロジェクト: DajeRoma/clicc-flask
class make_descs:
    def __init__(self,argv):
        ''' load input arguments'''
        self.CAS_file = argv[1]
        ''' my chemsphder token '''
        self.cs=ChemSpider('d1778a9f-c41f-41f6-920e-fc6d9ff739ca')

    def querySMILEs(self):
        '''
        Read CAS file as input
        To calculate SMILEs through querying chemspider
        '''
        index_num=-1
        self.CAS = []
        self.SMILEs=[]
        self.missing = []
        with open (self.CAS_file,'rb') as csvfile:
            csv_read=csv.reader(csvfile,delimiter=',')
            for row in csv_read:
                index_num += 1
                row = row[0]
                row = row.split('//')
                row = row[0]
                chem_this=self.cs.search(row)
                try:
                    print "Working on chemical ", index_num, row
        #             raw_input(chem_this[0].smiles)
                    self.CAS.append(row)
                    self.SMILEs.append(chem_this[0].smiles)
                except IndexError:
                    print "Can't find index: ", index_num, row
                    self.SMILEs.append(row)
                    self.missing.append(index_num)
                    continue
        
        # delete missing rows        
        for index in sorted(self.missing,reverse=True):
            del self.CAS[index]
            del self.SMILEs[index]
        
        # Clean up SMILEs
        self._cleanSMILEs()
        
        # Write SMILEs
        self._writeSEMILs()
       
        return self.SMILEs
    
    def _writeSEMILs(self):
           
        resultsfile=open('./cas/SMILEs.csv','wb')
        wr=csv.writer(resultsfile,dialect='excel')
        for eachSMILEs in self.SMILEs:
            wr.writerow([eachSMILEs])
    
    def _cleanSMILEs(self):
        ''' 
        Clean up SMILEs
        Make sure there is not '+', "-", and "."
        '''
        assert self.SMILEs is not None
        for indx, eachSMILEs in enumerate(self.SMILEs):
            eachSMILEs = eachSMILEs.replace("+","")
            eachSMILEs = eachSMILEs.replace("-","")
            eachSMILEs = eachSMILEs.replace(".","")
            self.SMILEs[indx] = eachSMILEs
        print "SMILEs have been cleaned up!"
        
    def calculateDescs(self):
        '''
        Use subprocess to call Dragon command line shell
        And calculate predefined descriptors.
        
        These descriptors are suited for the LCIA module at this time
        '''
        dragonShellCall = "./Dragon/dragon6shell.exe"
        dragonScriptCall = "./Dragon/test_script.drs"
        proc = sb.Popen([dragonShellCall,"-s" ,dragonScriptCall])
コード例 #38
0
 def __init__(self):
     sett = SettingsConstants()
     self.key = sett.get('CHEMSPI_KEY')
     self.url = sett.get('CHEMSPI_API_URL')
     self.cs = ChemSpider(self.key, api_url=self.url)
コード例 #39
0
ファイル: inject_InChI.py プロジェクト: JonWel/CoolProp-f
import CoolProp
from chemspipy import ChemSpider
from chemspipy_key import key # private file with the key (DO NOT COMMIT!!)
import glob, json
cs = ChemSpider(key)

# Map from name to Chemspider ID
backup_map = {
    'Propyne': 6095,
    'R236EA': 71342,
    'R245ca': 62827,
    'trans-2-Butene': 56442,
    'Oxygen': 952,
    'Fluorine': 22932,
    'Hydrogen': 762,
    'Deuterium': 22931,
    'HFE143m': 66577,
    'SulfurHexafluoride': 16425,
    'R114': 13853215
}

# Make sure the key works
c = cs.get_compound(2157)
assert(c.inchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYAW')

for fname in glob.glob('../fluids/*.json'):
    with open(fname,'r') as fp:
        jj = json.load(fp)

    fluid = jj['INFO']['NAME']
コード例 #40
0
ファイル: ChemSpider.py プロジェクト: TSP-Molecule/ATOM
# Tool uses the ChemSpiPy library to assist in accessing the ChemSpider Database

# Syntax to run command: python ChemSpider.py -(f/n) term
#     -f name -> get the formula for the common name formula
#     -n formula -> get the common name for the formula

import sys

# allows us to use command line arguments

if len(sys.argv) < 3:
    print("Incorrect input.\n\t==> python ChemSpider.py [-f/-n] <argument>")
    sys.exit()

from chemspipy import ChemSpider

cs = ChemSpider('3e05e0a6-9f49-4dff-ba0e-a9d6ca3d04ea')
# imports the ChemSpider api, and passes our access token to it

for result in cs.search(sys.argv[2])[:5]:
    # Give the first five results for -f.
    if sys.argv[1] == "-f":
        print(result.common_name)
        print(result.molecular_formula)
        # print(result.common_name)
    if sys.argv[1] == "-n":
        print(result.common_name)
        break
コード例 #41
0
    if not args.from_db:
        from chemspipy import ChemSpider

    if not args.export_db_only:
        import pandas as pd

    if args.from_db or args.export_db_only or args.export_db_csv:
        import shelve

    ## ==================== set up chemspider ====================

    if not args.from_db:
        possiblefile = os.path.expanduser(args.token)
        if os.path.exists(possiblefile):  # is file
            with open(possiblefile) as f:
                csp = ChemSpider(f.read().strip())
        else:
            csp = ChemSpider(args.token)  # else is token
    else:
        csp = None

    spq = spiderquery(csp, args.prefix + '_p')

    ## ==================== list of compounds ====================

    if args.inputfile:
        with open(args.inputfile) as csvfile:
            f = csv.reader(csvfile)
            compounds = []
            j = 0
            for i, row in enumerate(f):
コード例 #42
0
ファイル: find_mol.py プロジェクト: SGenheden/Scripts
import os

from chemspipy import ChemSpider

from sgenlib import smiles

if __name__ == "__main__":

    parser = argparse.ArgumentParser(description="Program to find molecules in ChemSpdie",)
    parser.add_argument('filename',help="the filename of a list")
    args = parser.parse_args()

    if os.getenv("SPIDERKEY") is None :
        print "SPIDERKEY environmental variable not set! Exit."
        quit()
    cs = ChemSpider(os.getenv("SPIDERKEY"))

    molecules = []
    with open(args.filename,"r") as f :
        molecules = [line.strip() for line in f.readlines()]

    for mol in molecules:
        hits = cs.search(mol)
        if len(hits) == 0 :
            print mol+"\t!!"
        else :
            """try :
                print "//".join([h.common_name for h in hits])
            except :
                print mol+"\t!!!"
                """
コード例 #43
0
ファイル: collect_smiles.py プロジェクト: SGenheden/Scripts
if __name__ == '__main__':

    argparser = argparse.ArgumentParser(description="Script to obtain SMILES for a solutes in a list")
    argparser.add_argument('-db', '--db', help="the molecule database")
    argparser.add_argument('-solvent', '--solvent', help="the solvent", default="water")
    argparser.add_argument('-solutes','--solutes',help="the list of solutes")
    args = argparser.parse_args()

    db = dblib.SolvDb(filename=args.db,type="abs",filehandle="^0")
    solutes = [s.strip() for s in open(args.solutes,'r').readlines()]

    if os.getenv("SPIDERKEY") is None :
        print "SPIDERKEY environmental variable not set! Exit."
        quit()
    cs = ChemSpider(os.getenv("SPIDERKEY"))

    # Loop over all the database entries in the solute lists
    n = 0
    for entry in db.itersolutelist(args.solvent,solutes):
        if  os.path.exists(entry.FileHandle+".smi") : continue
        hits = cs.search(entry.SoluteName)
        if len(hits) > 0 :
            smi = hits[0].smiles
            with open(entry.FileHandle+".smi","w") as f :
                f.write("%s\n"%smi)
        else :
            print entry.SoluteName, entry.FileHandle
        n += 1

    print "Looped over %d solutes"%n
コード例 #44
0
from chemspipy import ChemSpider

from sgenlib import smiles

if __name__ == "__main__":

    parser = argparse.ArgumentParser(
        description="Program to build molecules", )
    parser.add_argument('filename', help="the filename of a list")
    args = parser.parse_args()

    if os.getenv("SPIDERKEY") is None:
        print "SPIDERKEY environmental variable not set! Exit."
        quit()
    cs = ChemSpider(os.getenv("SPIDERKEY"))

    lines = []
    with open(args.filename, "r") as f:
        lines = [line.strip() for line in f.readlines()]
    molecules = sorted(list(set(lines)),
                       cmp=lambda x, y: cmp(lines.index(x), lines.index(y)))

    for mol in molecules:
        hits = cs.search(mol)
        if len(hits) == 0:
            print mol + "\t!!"
        else:
            molsmiles = hits[0].smiles
            mol2 = mol.strip()
            mol2.replace(" ", "_")
コード例 #45
0
from nose.tools import eq_, ok_, raises
import requests
import six

from chemspipy import ChemSpider, MOL2D, MOL3D, BOTH
from chemspipy.errors import ChemSpiPyAuthError, ChemSpiPyServerError


logging.basicConfig(level=logging.WARN)
logging.getLogger('chemspipy').setLevel(logging.DEBUG)

# Security token is retrieved from environment variables
CHEMSPIDER_SECURITY_TOKEN = os.environ['CHEMSPIDER_SECURITY_TOKEN']

# Chemspider instances with and without a security token
cs = ChemSpider(security_token=CHEMSPIDER_SECURITY_TOKEN)
cs2 = ChemSpider()


def test_no_security_token():
    """Test ChemSpider can be initialized with no parameters."""
    eq_(cs2.security_token, None)


def test_security_token():
    """Test security token is set correctly when initializing ChemSpider"""
    eq_(cs.security_token, CHEMSPIDER_SECURITY_TOKEN)


def test_chemspider_repr():
    """Test ChemSpider object repr."""
コード例 #46
0
        return random.choice(security_token)
    else:
        print("You need Security_token.txt providing security token. Please contact me as soon as.")


# print(tokenchoice())
if os.path.isfile('chemspiderdb.json'):
    spiderjsonfileid = []
    with open('chemspiderdb.json', 'r') as jsonfile:
        for f in jsonfile.readlines():
            the_dict = json.loads(f)
            spiderjsonfileid.append(the_dict['_id'])
            # print(spiderjsonfileid)
    for csid in csids:
        # cskey = random.choice(cs_security_key)
        cs = ChemSpider(tokenchoice())
        if csid in spiderjsonfileid:
            print('{0} has been in the file'.format(str(csid)))
            continue
        compound = cs.get_compound(csid)
        try:
            doc = {'_id': int(compound.csid), 'common_name': compound.common_name}
            sleep(random.uniform(0.2, 0.5))
            doc['molecular_weight'] = compound.molecular_weight
            sleep(random.uniform(0, 0.5))
            doc['molecular_formula'] = compound.molecular_formula
            doc['stdinchi'] = compound.stdinchi
            sleep(random.uniform(0.1, 0.5))
            doc['stdinchikey'] = compound.stdinchikey
            doc['smiles'] = compound.smiles
            # sleep(random.uniform(1, 1.1))
コード例 #47
0
from django.utils.text import slugify
from chemspipy import ChemSpider

from professor_oak.models import ScoreMixin

log = logging.getLogger(__name__)


# Load the chemspider API for accessing the RSC structure database
try:
    cs_key = settings.CHEMSPIDER_KEY
except AttributeError:
    log.warn('CHEMSPIDER_KEY not found in localsettings.py')
    chemspider_api = None
else:
    chemspider_api = ChemSpider(cs_key)


class Hazard(models.Model):
    """A hazard type as defined by the global harmonized system.

    Attributes
    ----------
    - pictogram : Image file that represents this image. If not
	  provided, we will look in `static_files/ghs_pictograms/` for one
	  that matches the `name` attribute
	"""
    PHYSICAL = 'p'
    HEALTH = 'h'
    PHYSICAL_AND_HEALTH = 'ph'
    ENVIRONMENTAL = 'e'
コード例 #48
0
ファイル: util.py プロジェクト: tyrochymicus/lavoisier
from chemspipy import ChemSpider

cs = ChemSpider("CHEMSPIDER_API_KEY")


def remove_prefix(disctext):
    """Removes the command prefix from the passed string"""
    cprefix = "!chem "
    if disctext.content.startswith(cprefix):
        return disctext.content[len(cprefix):]


def getCompound(id):
    """Gets a compound object from a compound name or id"""
    slist = cs.search(id)  # Returns list of compounds
    compound = slist[0]  # Fetches top item of list as compound object
    return compound
コード例 #49
0
ファイル: add_mol.py プロジェクト: sdrogers/ms2ldaviz
import os
import pickle
import numpy as np
import sys
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ms2ldaviz.settings_simon")

import django
import jsonpickle
django.setup()

from chemspipy import ChemSpider

from basicviz.models import *

if __name__ == '__main__':
    cs = ChemSpider('b07b7eb2-0ba7-40db-abc3-2a77a7544a3d')

    exp_name = sys.argv[1]
    e = Experiment.objects.get(name = exp_name)
    print e
    docs = Document.objects.filter(experiment = e)
    for doc in docs:
        md = jsonpickle.decode(doc.metadata)
        ik = md.get('InChIKey',md.get('inchikey',None))
        print ik
        # search in chemspi
        results = cs.search(ik)
        if len(results) > 0:
            m = results[0].mol_2d
            if len(m) > 0:
                doc.mol_string = m
コード例 #50
-1
def find_common_name(inchikey):
    # Try to find the common name for the compound, if not, return None.

    name = None

    if chemspikey:
        cs = ChemSpider(chemspikey)

        if (len(inchikey) > 0):
          result = cs.search(inchikey)
          if (len(result) == 1):
            name = result[0].common_name

    return name