def csConsistencyCheck(self): """Perform a consistency check of this record against chemspider. Raise a ValidationError on error.""" if not self.custom: errorList = [] cs = ChemSpider(settings.CHEMSPIDER_TOKEN) if self.CSID is None or self.CSID is '': raise ValidationError('No CSID set', 'no_csid') else: csCompound = cs.get_compound(self.CSID) if self.name not in ('', None): nameResults = cs.simple_search(self.name) if csCompound not in nameResults: errorList.append(ValidationError( 'A compound was consistency checked and was found to have an invalid name', code='invalid_inchi')) else: self.name = csCompound.common_name if self.INCHI == '': self.INCHI = csCompound.stdinchi elif self.INCHI != csCompound.stdinchi: errorList.append(ValidationError( 'A compound was consistency checked and was found to have an invalid InChi', code='invalid_inchi')) if self.smiles == '': self.smiles = csCompound.smiles elif self.smiles != csCompound.smiles: errorList.append(ValidationError( 'A compound was consistency checked and was found to have an invalid smiles string', code='invalid_smiles')) if self.formula == '': self.formula = csCompound.molecular_formula elif self.formula != csCompound.molecular_formula: errorsList.append(ValidationError( 'A compound was consistency checked and was found to have an invalid formula', code="invalid_formula")) if len(errorList) > 0: raise ValidationError(errorList)
def find_matches(matched_in_ChemSpider, massFile_Name): from chemspipy import ChemSpider cs = ChemSpider('dfdc677d-e7d3-435b-a74e-bfe6167a3899') for i in matched_in_ChemSpider.keys(): print i # intialiaztion matched_compounds = [] matches = {} # load mol file info of the product product_molFile = read_product_molFile(massFile_Name, i) # for each compound in data base with almost the same mass for CSID in matched_in_ChemSpider[i]: # extract the compound's mol file c = cs.get_compound(CSID) ChemSpider_compound_mol_info = c.mol_2d # compare the product's and compound's mol files is_the_same = compare_two_molFiles(product_molFile, ChemSpider_compound_mol_info) # add the compound to the list if it's molfile is the same as the product's if is_the_same: matched_compounds.append(CSID) # if at least one compound found as a match if matched_compounds != []: matches.update({i: matched_compounds}) # return the whole matches for products return matches
def getLongNames(molsDict, pref=4, onlyLettersDigits=False, token="2228d430-a955-416b-b920-14547d28df9e"): cs = ChemSpider(token) names = {} for mol in molsDict: comp = cs.get_compound(mol[pref:]) nName = comp.common_name.encode("ascii", "ignore") if onlyLettersDigits: nName = leftOnlyLettersDigits(nName) names[mol] = nName return names
class ChemSp(object): def __init__(self): sett = SettingsConstants() self.key = sett.get('CHEMSPI_KEY') self.url = sett.get('CHEMSPI_API_URL') self.cs = ChemSpider(self.key, api_url=self.url) def get_cmpd(self, csid): return self.cs.get_compound(csid) def search(self, query): print('Connected to ChemSpider API') print("Searching started") print("Searching for: " + query) i = 0 results = [] for result in self.cs.search(query): if i > 5: break print("Compound " + str(i)) formula = str(result.molecular_formula) csid = str(result.csid) inchi = result.inchi name = result.common_name cas = cirpy.resolve(inchi, 'cas') iupac_name = cirpy.resolve(inchi, 'iupac_name') if type(cas) is list: c_cas = query sim_cas = difflib.get_close_matches(str(c_cas), cas, 3, 0) print(sim_cas) cas_ = sim_cas[0] else: cas_ = cas image = result.image_url print(image) i = i + 1 result_line = {'csid': csid, 'name': name, 'iupac_name': iupac_name, 'cas': cas_, 'inchi': inchi, \ 'formula': formula, 'image': image} results.append(result_line) print("Searching finished") print(results) return results def render_image(self, csid, image_id): image_png = self.get_cmpd(csid).image temp_image = '/home/marcin/Dokumenty/projekty/production/Chem/chembase/static/chembase/temp/temp' + image_id + '.png' with open(temp_image, 'wb+') as destination: destination.write(image_png) image_path = '/static/chembase/temp/temp' + image_id + '.png?timestamp=' + str( datetime.datetime.now()) return image_path
def database_setup(): """ Download 2D & 3D molecule structure from ChemSpider server to create a database """ from chemspipy import ChemSpider # compile id list for calling molecules id_list = get_id() directory = DATABASE # make directory database_chemspider/ if needed if os.path.isdir(directory): print('Database folder already existed! Aborting... \n ' 'Please remove the folder and rerun') exit() else: os.mkdir(directory) print('downloading..') os.chdir(directory) # change dir to database_chemspider/ # access API key cs = ChemSpider('text') # go through each id for id_chemspider in id_list: if os.path.exists(str(id_chemspider) + '_2d.txt'): # pass if id already exist print('ID ' + str(id_chemspider) + ' already existed') continue # access molecule data c = cs.get_compound(id_chemspider) # write 2d coord and bond data f = open(str(id_chemspider) + '_2d.txt', 'w') f.write(c.mol_2d) f.close() # write 3d coord and bond data f = open(str(id_chemspider) + '_3d.txt', 'w') f.write(c.mol_3d) f.close() os.chdir('../')
def get_chemspider_structure(csid): """ Get a molecular structure from ChemSpider, generate a PDB file of the structure, and return the name of the PDB file """ pdbpath = '{}.pdb'.format(csid) token = 'a03b1636-afc3-4204-9a2c-ede27680577c' # XXX cs = ChemSpider(token) cmpd = cs.get_compound(csid) conv = ob.OBConversion() conv.SetInAndOutFormats('mol', 'pdb') mol = ob.OBMol() conv.ReadString(mol, cmpd.mol_3d) mol.AddHydrogens() with open(pdbpath, 'w') as f: f.write(conv.WriteString(mol)) return pdbpath
def getChemspiderCompounds(token, list, pref, delim="_", longNames=True, onlyLettersDigits=False): cs = ChemSpider(token) names = [] molecules = [] for chsId in list: comp = cs.get_compound(chsId) name = pref + delim + str(chsId) if longNames: name += delim sn = comp.common_name.encode("ascii", "ignore") if onlyLettersDigits: sn = leftOnlyLettersDigits(sn) name += sn # .replace('(', '_').replace(')', '_').replace('[', '_').replace(']', '_').replace(',', '_').replace(' ', '_').replace(';', '_')[:25] print(name) smiles = comp.smiles.encode("ascii", "ignore") mol = Chem.MolFromSmiles(smiles) mol2 = Chem.AddHs(mol) molecules.append(mol2) names.append(name) return molecules, names
#read back in final list which is the two chem lists combined #then extract SMILE and MW information using the csid #boom df = pd.read_csv("chem_cidList_total.csv") #make columns for new data df['MW'] = 0 df['SMILE'] = 0 #write a loop that fills in these variables i = 0 for index, row in df.iterrows(): csid = row[7] if csid != 0: compound = cs.get_compound(csid) MW = compound.molecular_weight SMILE = compound.smiles df.ix[i, 'MW'] = MW df.ix[i, 'SMILE'] = SMILE i = i + 1 else: i = i + 1 #write to csv df.to_csv('ChemID_MW_SMILES_everything.csv')
class CompoundForm(forms.ModelForm): """ A form for users to add compounds to the compound guide. Forces a check against the chemspider database to ensure no spurious compounds make their way into the compound guide. """ CAS_ID = forms.CharField(label='CAS ID', required=False) """Adding this field, not in the database, allows users to match compounds to a CAS_ID without us incuring issues for storing them.""" CSID = forms.IntegerField( label='Chemspider ID', min_value=1, error_messages={'required': 'This value must be set or selected'}) """If the user already knows the right value for this it allows them to skip a step.""" abbrev = forms.CharField(label="Abbreviation", max_length=100) class Meta: fields = ('labGroups', 'CSID', 'abbrev', 'name', 'CAS_ID', 'chemicalClasses') model = Compound help_texts = { 'name': 'A common or IUPAC name for the compound.', 'CAS_ID': 'The CAS number for the compound. Optional.', 'CSID': 'The Chemspider ID for the compound. If this is not included, a list will be provided for you to choose from.' } def __init__(self, user, *args, **kwargs): """Overridden version of the init method allows us to place the user's lab groups as a restricted set.""" super(CompoundForm, self).__init__(*args, **kwargs) self.compound = None self.chemSpider = ChemSpider(settings.CHEMSPIDER_TOKEN) self.fields['labGroups'].queryset = user.labgroup_set.all() if user.labgroup_set.all().exists(): self.fields['labGroups'].empty_label = None def clean_CSID(self): """Check that the CSID is actually a valid id from chemspider.""" searchResults = self.chemSpider.simple_search( self.cleaned_data['CSID']) if (len(searchResults) < 1): raise ValidationError('The CSID you have provided is invalid', code='invalid_csid') else: self.compound = searchResults[0] return self.cleaned_data['CSID'] def clean(self): """Verify that the CSID, CAS_ID (where supplied) and name are consistent.""" self.cleaned_data = super(CompoundForm, self).clean() if 'labGroups' in self.cleaned_data: for labGroup in self.cleaned_data.get('labGroups'): if CompoundGuideEntry.objects.filter( abbrev=self.cleaned_data.get('abbrev'), labGroup=labGroup).exclude( compound=self.instance).exists(): self.add_error( 'abbrev', 'A compound with this abbreviation already exists for the selected labgroup.' ) if self.cleaned_data.get('name'): nameResults = self.chemSpider.simple_search( self.cleaned_data['name']) if self.cleaned_data.get('CAS_ID') != '': CAS_IDResults = self.chemSpider.simple_search( self.cleaned_data['CAS_ID']) compoundChoices = [ compound for compound in nameResults if compound in CAS_IDResults ][0:10] # the CAS_ID always generates a more restrictive set else: compoundChoices = nameResults[0:10] # if the CAS_ID is not supplied, then we just create a subset # based on the name search alone if self.compound is None and len(compoundChoices) > 0: self.fields['CSID'] = forms.ChoiceField( choices=((choice.csid, choice.common_name) for choice in compoundChoices), widget=forms.widgets.RadioSelect) # in essence, if a CSID was not supplied, but the chemspider # search returned chemspider results, then we offer those # results to the user to make a selection. return self.cleaned_data elif self.compound is None: raise ValidationError( 'Your search terms failed to validate against the Chemspider database. Please contact a local administrator.', code='no_compounds') else: if self.compound not in nameResults: raise ValidationError( 'The name provided was not valid for the CSID provided. Please change the entry, or contact your local administrator.', code='name_csid_conflict') elif self.cleaned_data.get( 'CAS_ID') and self.compound not in CAS_IDResults: raise ValidationError( 'The CAS ID provided is not valid for the CSID provided. Remove, replace, or contact your local administrator.', 'name_cas_id_conflict') else: return self.cleaned_data else: if self.compound is not None: # this is probably some of the most horrible code I have # written, but it is the only way to get this to work - Phil. data = self.data.copy( ) # because otherwise the query dict is immutable # replace the data directly, as bad as that is... data['name'] = self.compound.common_name # manually input an error message which is less demanding (this # is actually canonical method) self._errors['name'] = self.error_class( ['Please review this suggestion']) self.data = data # override the old data return self.cleaned_data def save(self, commit=True): """Create (and if appropriate, saves) the compound instance, and adds Inchi and smiles from chemspider.""" try: self.instance = Compound.objects.get( CSID=self.cleaned_data['CSID']) except Compound.DoesNotExist: pass # Hakuna Matata compound = super(CompoundForm, self).save(commit=False) csCompound = self.chemSpider.get_compound(compound.CSID) compound.INCHI = csCompound.inchi compound.smiles = csCompound.smiles compound.formula = csCompound.molecular_formula if commit: compound.save() if 'labGroups' in self.cleaned_data: for labGroup in self.cleaned_data['labGroups']: try: cgEntry = CompoundGuideEntry.objects.get( labGroup=labGroup, abbrev=self.cleaned_data['abbrev']) cgEntry.compound = compound cgEntry.save() except CompoundGuideEntry.DoesNotExist: CompoundGuideEntry.objects.create( labGroup=labGroup, abbrev=self.cleaned_data['abbrev'], compound=compound) return compound
backup_map = { 'Propyne': 6095, 'R236EA': 71342, 'R245ca': 62827, 'trans-2-Butene': 56442, 'Oxygen': 952, 'Fluorine': 22932, 'Hydrogen': 762, 'Deuterium': 22931, 'HFE143m': 66577, 'SulfurHexafluoride': 16425, 'R114': 13853215 } # Make sure the key works c = cs.get_compound(2157) assert(c.inchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYAW') for fname in glob.glob('../fluids/*.json'): with open(fname,'r') as fp: jj = json.load(fp) fluid = jj['INFO']['NAME'] def doset(result): jj['INFO']['INCHI_STRING'] = result.inchi jj['INFO']['INCHI_KEY'] = result.inchikey jj['INFO']['CHEMSPIDER_ID'] = result.csid jj['INFO']['2DPNG_URL'] = result.image_url jj['INFO']['SMILES'] = result.smiles
backup_map = { 'Propyne': 6095, 'R236EA': 71342, 'R245ca': 62827, 'trans-2-Butene': 56442, 'Oxygen': 952, 'Fluorine': 22932, 'Hydrogen': 762, 'Deuterium': 22931, 'HFE143m': 66577, 'SulfurHexafluoride': 16425, 'R114': 13853215 } # Make sure the key works c = cs.get_compound(2157) assert (c.inchikey == 'BSYNRYMUTXBXSQ-UHFFFAOYAW') for fname in glob.glob('../fluids/*.json'): with open(fname, 'r') as fp: jj = json.load(fp) fluid = jj['INFO']['NAME'] def doset(result): jj['INFO']['INCHI_STRING'] = result.inchi jj['INFO']['INCHI_KEY'] = result.inchikey jj['INFO']['CHEMSPIDER_ID'] = result.csid jj['INFO']['2DPNG_URL'] = result.image_url jj['INFO']['SMILES'] = result.smiles
# read the API key from file and instantiate Chem Spider api client with open("apikey", "r") as key: cs = ChemSpider(key.read().strip()) index = 0 # the "startfrom" file persists the ID of the molecule we last requested, check if that exists first try: with open("startfrom", "r") as f: index = int(f.read().strip()) except: pass while (True): try: compound = cs.get_compound(index) # if (compound.common_name): # print(compound.common_name) # save the image with the ID as the name with open("images/" + str(index) + ".png", "wb") as f: f.write(compound.image) print(".", end="", flush=True) except ChemSpiPyServerError as err: # skip over invalid IDs if ("Invalid ID" in err.args[0]): print("x", end="", flush=True) else: print("ERROR", index)
class CompoundForm(forms.ModelForm): """ A form for users to add compounds to the compound guide. Forces a check against the chemspider database to ensure no spurious compounds make their way into the compound guide. """ CAS_ID = forms.CharField(label='CAS ID', required=False) """Adding this field, not in the database, allows users to match compounds to a CAS_ID without us incuring issues for storing them.""" CSID = forms.IntegerField(label='Chemspider ID', min_value=1, error_messages={ 'required': 'This value must be set or selected'}) """If the user already knows the right value for this it allows them to skip a step.""" class Meta: fields = ('labGroup', 'abbrev', 'CSID', 'name', 'CAS_ID', 'chemicalClasses') model = Compound help_texts = { 'abbrev': 'A local abbreviation by which the compound is known.', 'name': 'A common or IUPAC name for the compound.', 'CAS_ID': 'The CAS number for the compound. Optional.', 'CSID': 'The Chemspider ID for the compound. If this is not included, a list will be provided for you to choose from.' } def __init__(self, user, *args, **kwargs): """Overridden version of the init method allows us to place the user's lab groups as a restricted set.""" super(CompoundForm, self).__init__(*args, **kwargs) self.compound = None self.chemSpider = ChemSpider(settings.CHEMSPIDER_TOKEN) self.fields['labGroup'].queryset = user.labgroup_set.all() if user.labgroup_set.all().exists(): self.fields['labGroup'].empty_label = None def clean_CSID(self): """Check that the CSID is actually a valid id from chemspider.""" searchResults = self.chemSpider.simple_search( self.cleaned_data['CSID']) if(len(searchResults) < 1): raise ValidationError( 'The CSID you have provided is invalid', code='invalid_csid') else: self.compound = searchResults[0] return self.cleaned_data['CSID'] def clean(self): """Verify that the CSID, CAS_ID (where supplied) and name are consistent.""" self.cleaned_data = super(CompoundForm, self).clean() if self.cleaned_data.get('name'): nameResults = self.chemSpider.simple_search( self.cleaned_data['name']) if self.cleaned_data.get('CAS_ID') != '': CAS_IDResults = self.chemSpider.simple_search( self.cleaned_data['CAS_ID']) compoundChoices = [ compound for compound in nameResults if compound in CAS_IDResults][0:10] # the CAS_ID always generates a more restrictive set else: compoundChoices = nameResults[0:10] # if the CAS_ID is not supplied, then we just create a subset # based on the name search alone if self.compound is None and len(compoundChoices) > 0: self.fields['CSID'] = forms.ChoiceField(choices=( (choice.csid, choice.common_name) for choice in compoundChoices), widget=forms.widgets.RadioSelect) # in essence, if a CSID was not supplied, but the chemspider # search returned chemspider results, then we offer those # results to the user to make a selection. return self.cleaned_data elif self.compound is None: raise ValidationError( 'Your search terms failed to validate against the Chemspider database. Please contact a local administrator.', code='no_compounds') else: if self.compound not in nameResults: raise ValidationError( 'The name provided was not valid for the CSID provided. Please change the entry, or contact your local administrator.', code='name_csid_conflict') elif self.cleaned_data.get('CAS_ID') and self.compound not in CAS_IDResults: raise ValidationError( 'The CAS ID provided is not valid for the CSID provided. Remove, replace, or contact your local administrator.', 'name_cas_id_conflict') else: return self.cleaned_data else: if self.compound is not None: # this is probably some of the most horrible code I have # written, but it is the only way to get this to work - Phil. data = self.data.copy() # because otherwise the query dict is immutable # replace the data directly, as bad as that is... data['name'] = self.compound.common_name # manually input an error message which is less demanding (this # is actually canonical method) self._errors['name'] = self.error_class( ['Please review this suggestion']) self.data = data # override the old data return self.cleaned_data def save(self, commit=True): """Create (and if appropriate, saves) the compound instance, and adds Inchi and smiles from chemspider.""" compound = super(CompoundForm, self).save(commit=False) csCompound = self.chemSpider.get_compound(compound.CSID) compound.INCHI = csCompound.inchi compound.smiles = csCompound.smiles compound.formula = csCompound.molecular_formula if commit: compound.save() self.save_m2m() return compound
# print(tokenchoice()) if os.path.isfile('chemspiderdb.json'): spiderjsonfileid = [] with open('chemspiderdb.json', 'r') as jsonfile: for f in jsonfile.readlines(): the_dict = json.loads(f) spiderjsonfileid.append(the_dict['_id']) # print(spiderjsonfileid) for csid in csids: # cskey = random.choice(cs_security_key) cs = ChemSpider(tokenchoice()) if csid in spiderjsonfileid: print('{0} has been in the file'.format(str(csid))) continue compound = cs.get_compound(csid) try: doc = {'_id': int(compound.csid), 'common_name': compound.common_name} sleep(random.uniform(0.2, 0.5)) doc['molecular_weight'] = compound.molecular_weight sleep(random.uniform(0, 0.5)) doc['molecular_formula'] = compound.molecular_formula doc['stdinchi'] = compound.stdinchi sleep(random.uniform(0.1, 0.5)) doc['stdinchikey'] = compound.stdinchikey doc['smiles'] = compound.smiles # sleep(random.uniform(1, 1.1)) with open('chemspiderdb.json', 'a') as jsonfile: json.dump(doc, jsonfile) jsonfile.write('\n') print("{0} Thanks! ".format(str(csid)))
cid = pcpCmpd.cid items["pubchem_cid"] = cid else: cid = items["pubchem_cid"] pcpCmpd = pcp.Compound.from_cid(cid) # ChemSpider if not "chemspider_id" in items: results = cs.simple_search(inchikey) results.sort() csCmpd = results[0] csid = csCmpd.csid items["chemspider_id"] = csid items["name"] = csCmpd.common_name.lower() else: csid = items["chemspider_id"] csCmpd = cs.get_compound(csid) # NIH resolver nihCmpd = cirpy.Molecule(inchikey, ['stdinchikey']) if not "iupac_name" in items: name = nihCmpd.iupac_name # try NIH resolver if name is None: # try PubChem name = pcpCmpd.iupac_name if type(name) is ListType: name = name[0] if name is None: name = items["name"] # chemspider common name items["iupac_name"] = name.lower() if not "molwt" in items: items["molwt"] = pcpCmpd.molecular_weight
def getMolecularWeight(self): cs = ChemSpider(settings.CHEMSPIDER_TOKEN) if self.CSID: csCompound = cs.get_compound(self.CSID) return Decimal(csCompound.molecular_weight)