def OnBlipSubmitted(properties, context): blip = context.GetBlipById(properties['blipId']) contents = blip.GetDocument().GetText() key = '(chem)' leftdelim = '\\[' query = '([a-zA-Z0-9-]{1,20})' image = '(;image)' rightdelim = '\\]' compiledregex = re.compile(key+leftdelim+query+image+rightdelim, re.IGNORECASE|re.DOTALL) chemicallist = compiledregex.finditer(contents) if chemicallist != None: for chemicalname in chemicallist: # Just pull off first one in list r = document.Range(0,0) r.start = chemicalname.start() r.end = chemicalname.end() + 1 query = chemicalname.group(2) compound = ChemSpiPy.simplesearch(query) # obtain chemspider ID chemspiderlink = 'http://www.chemspider.com/Chemical-Structure.%s.html' % compound #insert the gadget gadgeturl = 'http://www.danhagon.me.uk/Wave/ChemSpiderDoodleGadgetMVCDev.xml' gadget = document.Gadget(gadgeturl) # setup gadget instance blip.GetDocument().InsertElement(r.start, gadget) # insert gadget delta = {'molfile' : compound.getMolFile()} # set state with molfile for CSID blip.GetDocument().GadgetSubmitDelta(gadget, delta) # submit the delta #cleaning up text and insert link blip.GetDocument().SetTextInRange(r, query) # remove markup r.start = blip.GetDocument().GetText().find(query) #re-find range of query r.end = r.start + len(query) blip.GetDocument().SetAnnotation(r, 'link/manual', chemspiderlink) #link to CS
def chemify(text): """Accepts a text string and returns dictionary of information and annotations Text string is parsed and sent to chemspider to return chemspider ID, image url, and molecular weight information that makes it feasible to convert weight to moles and similar. Returns a dictionary that contains at a minimum the keys 'replacementtext', 'imageurl', and 'annotations' which is a tuple of dictionaries containing the keys 'text', 'annotation', 'value', 'offset', 'length' for each of the annotations required test = {'replacementtext':'chemchemchem', 'imageurl' :'http://www.chemspider.com/ImagesHandler.ashx?id=236', 'annotations' :[{'text' :'chem', 'annotation':'link/manual', 'value' :'http://www.chemspider.com', 'offset' :4, 'length' :4}, {'text' :'chem', 'annotation':'chemspidey.appspot.com/csid', 'value' :'666', 'offset' :8, 'length' :4} ] } """ # Set up the dictionary to be returned chemified = {'replacementtext' : 'chemchem', 'originaltext' : text, 'imageurl' : 'none', 'annotations' : [] } # Parse the text using chemparse which returns a dictionary parsed = ChemParsing.chemparse(text) # Create local variables for the relevant returned information parsedname = parsed.get('name', None) quantity = parsed.get('amount', None) quantityunits = parsed.get('units', None) role = parsed.get('role', None) # Obtain chemspider ID and other information via ChemSpider simple search try: chemspiderID = ChemSpiPy.simplesearch(parsedname) logging.debug('Success connecting to ChemSpider') except IndexError, ie: errorstring = "Sorry I can't find %s in ChemSpider - try a different name?" % parsedname chemified['replacementtext'] = errorstring chemified['error'] = 'IndexError' logging.debug('Failed to find a match in simplesearch') return chemified
def OnBlipSubmitted(properties, context): blip = context.GetBlipById(properties['blipId']) contents = blip.GetDocument().GetText() key = '(chem)' leftdelim = '\\[' query = '([a-zA-Z0-9-]{1,20})' optintspacer = ';?' optfloat = '\\s?(\\d{0,5}\\.?\\d{0,5})?' optunits = '\\s?([mgl]{1,2})?' optional = optintspacer + optfloat + optunits rightdelim = '\\]' compiledregex = re.compile(key+leftdelim+query+optional+rightdelim, re.IGNORECASE|re.DOTALL) chemicallist = compiledregex.finditer(contents) if chemicallist != None: count = 0 changeslist = [] for chemicalname in chemicallist: r = doc.Range(0,0) r.start = chemicalname.start() r.end = chemicalname.end() + 1 query = chemicalname.group(2) compound = ChemSpiPy.simplesearch(query) url = "http://www.chemspider.com/Chemical-Structure.%s.html" % compound insert = query + " (csid:" + compound if chemicalname.group(3) != None and chemicalname.group(4) == 'mg': nanomoles = 1000*(float(chemicalname.group(3))/compound.molweight()) nanomoles = round(nanomoles, 2) insert = insert + ", " + chemicalname.group(3) + 'mg, ' + str(nanomoles) + " nanomoles" if chemicalname.group(3) != None and chemicalname.group(4) == 'g': millimoles = 1000*(float(chemicalname.group(3))/compound.molweight()) millimoles = round(millimoles, 2) insert = insert + ", " + chemicalname.group(3) + 'g, ' + str(millimoles) + " millimoles" insert = insert + ") " changeslist.append([r, insert, compound, url]) count = count + 1 while count != 0: count = count - 1 blip.GetDocument().SetTextInRange(changeslist[count][0], changeslist[count][1]) SetManualLink(blip, changeslist[count][2], changeslist[count][3]) SetManualLink(blip, changeslist[count][1], 'chem', 'lang')