def _addLevels(self, DGs): disq = db.GqlQuery('select DBParent from DiseaseGroup where DBID = :1') errors = [] count = 0 for DG in DGs: level = 1 if count % 100 == 0: logging.info('DG.DBID = %s' % DG.DBID) logging.info('addLevels still working') disq.bind(DG.DBID) disease = disq.get() if disease is None: errors.append(DG.DBID) continue try: while disease.DBParent is not None: disq.bind(disease.DBParent) disease = disq.get() level += 1 except AttributeError: errors.append(DG.DBID) continue newDG = DiseaseGroup(key_name=DG.DBID + '_' + DG.DBParent, DBID=DG.DBID, DBName=DG.DBName, DBAlt_ids=DG.DBAlt_ids, DBDef=DG.DBDef, DBSyns=DG.DBSyns, DBSubset=DG.DBSubset, DBXrefs=DG.DBXrefs, DBParent=DG.DBParent, DBChildren=DG.DBChildren, DBLevel=level) newDG.put() count += 1 return errors
def _addLevels(self, DGs): disq = db.GqlQuery('select DBParent from DiseaseGroup where DBID = :1') errors = [] count = 0 for DG in DGs: level = 1 if count % 100 == 0: logging.info('DG.DBID = %s' % DG.DBID) logging.info('addLevels still working') disq.bind(DG.DBID) disease = disq.get() if disease is None: errors.append(DG.DBID) continue try: while disease.DBParent is not None: disq.bind(disease.DBParent) disease = disq.get() level += 1 except AttributeError: errors.append(DG.DBID) continue newDG = DiseaseGroup(key_name = DG.DBID + '_' + DG.DBParent, DBID = DG.DBID, DBName = DG.DBName, DBAlt_ids = DG.DBAlt_ids, DBDef = DG.DBDef, DBSyns = DG.DBSyns, DBSubset = DG.DBSubset, DBXrefs = DG.DBXrefs, DBParent = DG.DBParent, DBChildren = DG.DBChildren, DBLevel = level) newDG.put() count += 1 return errors
def get(self): # diseases = db.GqlQuery('select * from DiseaseGroup where DBID in :1', ['9455', '4236']).fetch(2) # diseases = db.GqlQuery('select * from DiseaseGroup where DBID in :1', ['4236']).fetch(2) def1 = u'A lysosomal storage disease that involves the accumulation of harmful amounts of lipids (fats) in some of the body’s cells and tissues.' rogue1 = DiseaseGroup(key_name=u'9455_3211', DBID=u'9455', DBName=u'lipid storage disorder', DBAlt_ids=[], DBDef=def1, DBSyns=[], DBSubset=None, DBXrefs=[], DBParent=u'3211', DBChildren=[], DBLevel=5) rogue1.put() def2 = u'A mullerian mixed tumor that is composed_of carcinoma of different müllerian types and composed_of homologous or heterologous sarcoma-like components.' rogue2 = DiseaseGroup(key_name=u'4236_154', DBID=u'4236', DBName=u'carcinosarcoma', DBAlt_ids=[], DBDef=def2, DBSyns=[], DBSubset=None, DBXrefs=[], DBParent=u'4236', DBChildren=[], DBLevel=6) rogue2.put() diseases = [rogue1, rogue2] pq = db.GqlQuery('select DBID from DiseaseGroup where DBParent = :1') added = 0 for disease in diseases: pq.bind(disease.DBID) children = pq.fetch(100) IDs = [child.DBID for child in children] # DG = DiseaseGroup(key_name = disease.DBID + '_' + disease.DBParent, # DBID = disease.DBID, # DBName = disease.DBName, # DBAlt_ids = [], # DBDef = disease.DBDef, # DBSyns = [], # DBSubset = None, # DBXrefs = [], # DBParent = disease.DBParent, # DBChildren = IDs, # DBLevel = disease.DBLevel) # # DG.put() disease.DBChildren = IDs disease.put() added += 1 logging.info('added = %s' % added) self.response.out.write(''' <html><body> DONE </body></html> ''') self.redirect('/admin/updateDiseaseLevels')
def get(self): old = DiseaseGroup.all().fetch(10**6) for d in old: d.delete() added = 0 errorsCaught = [] DGs = [] parser = Parsers.DOParser( 'http://diseaseontology.svn.sourceforge.net/viewvc/diseaseontology/trunk/HumanDO.obo' ) terms = parser.parse( ) #terms = {'DOID':('name', ['alt_ids'], 'def', ['synonyms'], 'subset', ['xrefs'], 'is_a')} # temp = terms.copy() # levels = self.getLevels(temp) # raise Exception('# levels = %s' % len(levels)) # for i in xrange(len(levels)): # level = levels[i] for DOID, attrs in terms.items(): defi = attrs[2] if defi is not None and len(defi) > 500: defi = attrs[2][:500] #500 character limit cutoff parents = attrs[-1] # if level != len(levels) - 1: children = self._getChildren(DOID, terms) # level = self._getlevel(DOID, terms) if parents: # count = 1 for parent in parents: # keyName = '{0}_{1}'.format(DOID, count) keyName = '{0}_{1}'.format(DOID, parent) try: DG = DiseaseGroup(key_name=keyName, DBID=DOID, DBName=attrs[0], DBAlt_ids=attrs[1], DBDef=defi, DBSyns=attrs[3], DBSubset=attrs[4], DBXrefs=attrs[5], DBParent=parent, DBChildren=children, DBLevel=None) DG.put() DGs.append(DG) added += 1 except UnicodeDecodeError: errorsCaught.append(DOID) else: try: DG = DiseaseGroup(key_name=DOID, DBID=DOID, DBName=attrs[0], DBAlt_ids=attrs[1], DBDef=defi, DBSyns=attrs[3], DBSubset=attrs[4], DBXrefs=attrs[5], DBParent=parent, DBChildren=children, DBLevel=None) DG.put() DGs.append(DG) added += 1 except UnicodeDecodeError: errorsCaught.append(DOID) # if len(parent) == 0: # try: # DG = DiseaseGroup(key_name = DOID, # DBID = DOID, # DBName = attrs[0], # DBAlt_ids = attrs[1], # DBDef = defi, # DBSyns = attrs[3], # DBSubset = attrs[4], # DBXrefs = attrs[5], # DBParent = None, # DBChildren = children, # DBLevel = 1) # #DBLevel = i+1) # DG.put() # added += 1 # except UnicodeDecodeError: # errorsCaught.append(DOID) # # elif len(parents) == 1: # try: # parent = parents[0] # level = levels[parent] # DG = DiseaseGroup(key_name = DOID, # DBID = DOID, # DBName = attrs[0], # DBAlt_ids = attrs[1], # DBDef = defi, # DBSyns = attrs[3], # DBSubset = attrs[4], # DBXrefs = attrs[5], # DBParent = parent, # DBChildren = children, # DBLevel = level) # #DBLevel = i+1) # DG.put() # added += 1 # except UnicodeDecodeError: # errorsCaught.append(DOID) # # else: # for j in xrange(len(parents)): # parent = parents[j] # level = levels[parent] # formatted_id = '{0}_{1}'.format(DOID, str(j)) # try: # DG = DiseaseGroup(key_name = formatted_id, # DBID = DOID, # DBName = attrs[0], # DBAlt_ids = attrs[1], # DBDef = defi, # DBSyns = attrs[3], # DBSubset = attrs[4], # DBXrefs = attrs[5], # DBParent = parent, # DBChildren = children, # DBLevel = level) # #DBLevel = i+1) # DG.put() # added += 1 # except UnicodeDecodeError: # errorsCaught.append(DOID) # errors = self._addLevels(DGs) self.response.out.write(''' <html> <body> DGs added:''' + str(added) + '<br> length of terms:' + str(len(terms)) + '<br> errors caught:' + str(errorsCaught) + '<br> UPDATE DISEASE LEVELS NOW') # + '<br> addLevels errors: ' + str(errors)) self.response.out.write(''' <form action='/' method = 'get'> <div><input type='submit' value='Done'></div> </form> ''') self.response.out.write(''' </body> </html> ''') self.redirect('/admin/updateRogueDiseases')
def get(self): # diseases = db.GqlQuery('select * from DiseaseGroup where DBID in :1', ['9455', '4236']).fetch(2) # diseases = db.GqlQuery('select * from DiseaseGroup where DBID in :1', ['4236']).fetch(2) def1 = u'A lysosomal storage disease that involves the accumulation of harmful amounts of lipids (fats) in some of the body’s cells and tissues.' rogue1 = DiseaseGroup(key_name = u'9455_3211', DBID = u'9455', DBName = u'lipid storage disorder', DBAlt_ids = [], DBDef = def1, DBSyns = [], DBSubset = None, DBXrefs = [], DBParent = u'3211', DBChildren = [], DBLevel = 5) rogue1.put() def2 = u'A mullerian mixed tumor that is composed_of carcinoma of different müllerian types and composed_of homologous or heterologous sarcoma-like components.' rogue2 = DiseaseGroup(key_name = u'4236_154', DBID = u'4236', DBName = u'carcinosarcoma', DBAlt_ids = [], DBDef = def2, DBSyns = [], DBSubset = None, DBXrefs = [], DBParent = u'4236', DBChildren = [], DBLevel = 6) rogue2.put() diseases = [rogue1, rogue2] pq = db.GqlQuery('select DBID from DiseaseGroup where DBParent = :1') added = 0 for disease in diseases: pq.bind(disease.DBID) children = pq.fetch(100) IDs = [child.DBID for child in children] # DG = DiseaseGroup(key_name = disease.DBID + '_' + disease.DBParent, # DBID = disease.DBID, # DBName = disease.DBName, # DBAlt_ids = [], # DBDef = disease.DBDef, # DBSyns = [], # DBSubset = None, # DBXrefs = [], # DBParent = disease.DBParent, # DBChildren = IDs, # DBLevel = disease.DBLevel) # # DG.put() disease.DBChildren = IDs disease.put() added += 1 logging.info('added = %s' % added) self.response.out.write(''' <html><body> DONE </body></html> ''') self.redirect('/admin/updateDiseaseLevels')
def get(self): old = DiseaseGroup.all().fetch(10**6) for d in old: d.delete() added = 0 errorsCaught = [] DGs = [] parser = Parsers.DOParser('http://diseaseontology.svn.sourceforge.net/viewvc/diseaseontology/trunk/HumanDO.obo') terms = parser.parse() #terms = {'DOID':('name', ['alt_ids'], 'def', ['synonyms'], 'subset', ['xrefs'], 'is_a')} # temp = terms.copy() # levels = self.getLevels(temp) # raise Exception('# levels = %s' % len(levels)) # for i in xrange(len(levels)): # level = levels[i] for DOID, attrs in terms.items(): defi = attrs[2] if defi is not None and len(defi) > 500: defi = attrs[2][:500] #500 character limit cutoff parents = attrs[-1] # if level != len(levels) - 1: children = self._getChildren(DOID, terms) # level = self._getlevel(DOID, terms) if parents: # count = 1 for parent in parents: # keyName = '{0}_{1}'.format(DOID, count) keyName = '{0}_{1}'.format(DOID, parent) try: DG = DiseaseGroup(key_name = keyName, DBID = DOID, DBName = attrs[0], DBAlt_ids = attrs[1], DBDef = defi, DBSyns = attrs[3], DBSubset = attrs[4], DBXrefs = attrs[5], DBParent = parent, DBChildren = children, DBLevel = None) DG.put() DGs.append(DG) added += 1 except UnicodeDecodeError: errorsCaught.append(DOID) else: try: DG = DiseaseGroup(key_name = DOID, DBID = DOID, DBName = attrs[0], DBAlt_ids = attrs[1], DBDef = defi, DBSyns = attrs[3], DBSubset = attrs[4], DBXrefs = attrs[5], DBParent = parent, DBChildren = children, DBLevel = None) DG.put() DGs.append(DG) added += 1 except UnicodeDecodeError: errorsCaught.append(DOID) # if len(parent) == 0: # try: # DG = DiseaseGroup(key_name = DOID, # DBID = DOID, # DBName = attrs[0], # DBAlt_ids = attrs[1], # DBDef = defi, # DBSyns = attrs[3], # DBSubset = attrs[4], # DBXrefs = attrs[5], # DBParent = None, # DBChildren = children, # DBLevel = 1) # #DBLevel = i+1) # DG.put() # added += 1 # except UnicodeDecodeError: # errorsCaught.append(DOID) # # elif len(parents) == 1: # try: # parent = parents[0] # level = levels[parent] # DG = DiseaseGroup(key_name = DOID, # DBID = DOID, # DBName = attrs[0], # DBAlt_ids = attrs[1], # DBDef = defi, # DBSyns = attrs[3], # DBSubset = attrs[4], # DBXrefs = attrs[5], # DBParent = parent, # DBChildren = children, # DBLevel = level) # #DBLevel = i+1) # DG.put() # added += 1 # except UnicodeDecodeError: # errorsCaught.append(DOID) # # else: # for j in xrange(len(parents)): # parent = parents[j] # level = levels[parent] # formatted_id = '{0}_{1}'.format(DOID, str(j)) # try: # DG = DiseaseGroup(key_name = formatted_id, # DBID = DOID, # DBName = attrs[0], # DBAlt_ids = attrs[1], # DBDef = defi, # DBSyns = attrs[3], # DBSubset = attrs[4], # DBXrefs = attrs[5], # DBParent = parent, # DBChildren = children, # DBLevel = level) # #DBLevel = i+1) # DG.put() # added += 1 # except UnicodeDecodeError: # errorsCaught.append(DOID) # errors = self._addLevels(DGs) self.response.out.write(''' <html> <body> DGs added:''' + str(added) + '<br> length of terms:' + str(len(terms)) + '<br> errors caught:' + str(errorsCaught) + '<br> UPDATE DISEASE LEVELS NOW') # + '<br> addLevels errors: ' + str(errors)) self.response.out.write(''' <form action='/' method = 'get'> <div><input type='submit' value='Done'></div> </form> ''') self.response.out.write(''' </body> </html> ''') self.redirect('/admin/updateRogueDiseases')