def list_all_cid_from_sdf(sdffile, ID_tag, outfile): fp = file(outfile, 'w') for sdf in sdfiterator.sdf_iter(sdffile): tagdict = get_sdf_tags(sdf) cid = tagdict[ID_tag] fp.write('%s\n' % cid) fp.close() return
def batch_sdf_to_smiles(sdfs): from compounddb.sdfiterator import sdf_iter from io import StringIO buf = '' err = 0 for sdf in sdf_iter(StringIO(str(sdfs))): try: buf += sdf_to_smiles(sdf) except InputError: err += 1 return (buf, err)
def main(): sdfs = sys.stdin.read() similarity = args['similarity'] similarity = int(float(similarity) * 100) compounds = int(args['compounds']) with open(args['outfile'], 'w') as f: for sdf in sdf_iter(StringIO(str(sdfs))): query_id ="" parts = re.split('\s+',sdf_to_smiles(sdf).strip()) smile_query = parts[0] if len(parts) > 1: query_id = parts[1] output = SimilaritySearch(smile_query, similarity, compounds) for match in output.strip().split('\n'): f.write(query_id +" "+match+"\n")
def main(): sdfs = sys.stdin.read() similarity = args['similarity'] similarity = int(float(similarity) * 100) numResults = int(args['compounds']) with open(args['outfile'], 'w') as f: for sdf in sdf_iter(StringIO(str(sdfs))): query_id = "" parts = re.split('\s+', sdf_to_smiles(sdf).strip()) smile_query = parts[0] if len(parts) > 1: query_id = parts[1] matches = chembl_similarity(smile_query, similarity, numResults) if matches != None: for match in matches: #print("match: "+str(match)) f.write(query_id + " " + match[0] + " " + match[1] + "\n")
def format_sdf_for_qsar(sdffile, output, ID_tag): """Cerius2 uses 1st line in SDF as ID tag .... some sdf has blank 1st line, so we need to format SDF .... by filling cid to 1st line in SDF""" fp = file(output, 'w') for sdf in sdfiterator.sdf_iter(sdffile): tagdict = get_sdf_tags(sdf) cid = tagdict[ID_tag] fp.write('%s\n' % cid) fp.write(sdf.split('\n', 1)[1].split('M END')[0]) fp.write('M END\n') fp.write('''> <%s> %s ''' % (ID_tag, cid)) fp.write('$$$$\n') fp.close() return