コード例 #1
0
ファイル: search.py プロジェクト: tjworks/precon
def handler(req):
	q = req.REQUEST.get("term")
	if not q: return SmartResponse(Exception("Missing parameter: term"), req)
	q = {'_id':{'$regex': '^%s' %q.lower() } }
	# filter by entity/network/people etc
	filter = req.REQUEST.get("filter") or None 
	ret = []
	tmp = []
	for r in mongo.getCollection('indices').find(q):
		if not 'names' in r: continue
		for id, name in r['names'].items():
			if id in tmp: continue;
			tmp.append(id)	
			if(not name): continue	
			name = ("%s"%name)[0:80]
			md = ''
			if id[0:4] in models.prefix_mapping:				
				md = models.prefix_mapping[id[0:4]]
				pre = models.model_label_mapping[md]
				label = "%s: %s" %(pre, name)
			
			if(filter and filter!=md): continue
			ret.append( {'label':label, 'value':name, '_id':id} )
		if(len(ret)>50): break  # max 20 results
				
	return SmartResponse(ret, req)	
コード例 #2
0
ファイル: ukpmc.py プロジェクト: tjworks/precon
def ukpmc(ids=None):
    """
    Given a pubmed id, Load entities into DB from UKPMC
    """
    ids = ids.split(",") if ids else demo_pubmeds
    
    url = 'http://ukpmc.ac.uk/abstract/MED/'
    import requests,re
    col = mongo.getCollection('publication')
    for id in ids:
        print "#### proceesing %s" %id
        p = col.find_one({'_id':'publ%s'%id})
        pub = Publication( p )
        u = "%s%s" %(url, id)
        r = requests.get(u)         
        if r.status_code == 200:
                from django.utils.encoding import smart_str, smart_unicode            
                content = smart_str(r.text)
                entities = {}
                for m in re.finditer(r'<span class="(disease|protein|geneOntology|species|chemical)".*?_blank">(.*?)</a></span>', content):
                    group = m.group(1)
                    group = 'go' if group == 'geneOntology' else group.lower()                
                    e = {'name': m.group(2).lower(), 'group': group}
                    entities[e['name']] = e
                pub.entities = []
                for en, item in entities.items():
                    pub.entities.append(item)
                if(pub.entities):                    
                    pub.save()     
                    print("Saved %d items" %(len(entities)))           
コード例 #3
0
ファイル: search.py プロジェクト: linkai1208/precon
def handler(req):
    q = req.REQUEST.get("term")
    if not q: return SmartResponse(Exception("Missing parameter: term"), req)
    q = {'_id': {'$regex': '^%s' % q.lower()}}
    # filter by entity/network/people etc
    filter = req.REQUEST.get("filter") or None
    ret = []
    tmp = []
    for r in mongo.getCollection('indices').find(q):
        if not 'names' in r: continue
        for id, name in r['names'].items():
            if id in tmp: continue
            tmp.append(id)
            if (not name): continue
            name = ("%s" % name)[0:80]
            md = ''
            if id[0:4] in models.prefix_mapping:
                md = models.prefix_mapping[id[0:4]]
                pre = models.model_label_mapping[md]
                label = "%s: %s" % (pre, name)

            if (filter and filter != md): continue
            ret.append({'label': label, 'value': name, '_id': id})
        if (len(ret) > 50): break  # max 20 results

    return SmartResponse(ret, req)
コード例 #4
0
ファイル: load.py プロジェクト: linkai1208/precon
def ukpmc(ids=None):
    """
    Given a pubmed id, Load entities into DB from UKPMC
    """
    ids = ids.split(",") if ids else demo_pubmeds

    url = 'http://ukpmc.ac.uk/abstract/MED/'
    import requests, re
    col = mongo.getCollection('publication')
    for id in ids:
        print "#### proceesing %s" % id
        p = col.find_one({'_id': 'publ%s' % id})
        pub = Publication(p)
        u = "%s%s" % (url, id)
        r = requests.get(u)
        if r.status_code == 200:
            from django.utils.encoding import smart_str, smart_unicode
            content = smart_str(r.text)
            entities = {}
            for m in re.finditer(
                    r'<span class="(disease|protein|geneOntology|species|chemical)".*?_blank">(.*?)</a></span>',
                    content):
                group = m.group(1)
                group = 'go' if group == 'geneOntology' else group.lower()
                e = {'name': m.group(2).lower(), 'group': group}
                entities[e['name']] = e
            pub.entities = []
            for en, item in entities.items():
                pub.entities.append(item)
            if (pub.entities):
                pub.save()
                print("Saved %d items" % (len(entities)))
コード例 #5
0
ファイル: views.py プロジェクト: tjworks/precon
def index(keywords, obj):
    if (isinstance (keywords, basestring)):
        keywords = keywords.split()
    for kwd in keywords:        
        if(not kwd): continue;
        kwd = kwd.lower()                               
        updates = {}
        updates['names.'+ obj._id]  =  obj.name
        col =mongo.getCollection('indices')                               
        col.update(
              {'_id': kwd},                                     
              {'$set': updates, '$addToSet':{'ids' :obj._id}},                     
              upsert=True
         );                               
コード例 #6
0
def index(keywords, obj):
    if (isinstance(keywords, basestring)):
        keywords = keywords.split()
    for kwd in keywords:
        if (not kwd): continue
        kwd = kwd.lower()
        updates = {}
        updates['names.' + obj._id] = obj.name
        col = mongo.getCollection('indices')
        col.update({'_id': kwd}, {
            '$set': updates,
            '$addToSet': {
                'ids': obj._id
            }
        },
                   upsert=True)
コード例 #7
0
ファイル: models.py プロジェクト: tjworks/precon
 def save(self):
     if self.validate: self.validate()
     if self.beforeSave: self.beforeSave()
     
     col = mongo.getCollection(self._col)
             
     self._id = self.cleanup_id(self._id)
     
     logger.debug("Persisting %s: %s" %(self._col, self._id))
     self.create_tm = self.create_tm or getTime()
     self.update_tm =  getTime()
     self.tm = self.tm or time.time()
     
     col.save(self, safe=True)
     
     #logger.debug("Done")
     
     if self.afterSave: self.afterSave()
コード例 #8
0
    def save(self):
        if self.validate: self.validate()
        if self.beforeSave: self.beforeSave()

        col = mongo.getCollection(self._col)

        self._id = self.cleanup_id(self._id)

        logger.debug("Persisting %s: %s" % (self._col, self._id))
        self.create_tm = self.create_tm or getTime()
        self.update_tm = getTime()
        self.tm = self.tm or time.time()

        col.save(self, safe=True)

        #logger.debug("Done")

        if self.afterSave: self.afterSave()
コード例 #9
0
ファイル: importer.py プロジェクト: linkai1208/precon
def importEdges(filename=None):
    """
    import edges, one pair per line 
    Edge property: - no direction, -> left to right, <- right to left
    """
    filename = filename or r'C:\work\caida\Dropbox\precon\engineering\Contents\GBM_BN-Massaged.csv'

    # add a network
    n = Network.findOne({'name': "GBM Predicted Tumor Network"}) or Network()
    n.name = "GBM Predicted Tumor Network"
    n.group = "predicted"
    n.owner = 'precon'
    n.save()

    col = mongo.db()['entity']
    count = 0
    now = time.time()
    ec = mongo.getCollection('edge')
    with open(filename) as f:
        content = f.read()
        for line in content.split('\n'):
            doc = {}
            line = line.strip()
            pos = line.find("///")
            if pos > 0:
                line = line[0:pos].strip()
                doc['comment'] = line[pos:]

            items = line.split("->")
            if (len(items) == 1): items = line.split("<-")
            if (len(items) == 1): items = line.split(" - ")
            if (len(items) == 1):
                error("Ignore invalid line: [%s]" % line)
                continue
            count += 1
            if (count < 8378): continue

            tmp = []
            tmp.append(items[0].lower().strip())
            tmp.append(items[1].lower().strip())
            entities = ['', '']
            print "!!! %d " % (col.find({'symbol': {'$in': tmp}}).count())
            for r in col.find({'symbol': {'$in': tmp}}):
                if (r['symbol'] == tmp[0]):
                    entities[0] = Entity(r)
                if (r['symbol'] == tmp[1]):
                    entities[1] = Entity(r)

            if (len(entities) != 2):
                raise "Invalid nodes %s, continue" % entities

            node1 = Node.findOne({'network': n._id, 'entity': entities[0]._id})
            if not node1:
                node1 = Node({'network': n._id}, entities[0])
                node1.save()
            node2 = Node.findOne({'network': n._id, 'entity': entities[1]._id})
            if not node2:
                node2 = Node({'network': n._id}, entities[1])
                node2.save()

            con = Connection()
            con._id = "conn_%s_%s" % (tmp[0], tmp[1])
            con.nodes = [node1._id, node2._id]
            con.entities = [entities[0]._id, entities[1]._id]
            con.type = 'predicted'
            con.network = n._id
            con.label = ""
            con.save()

            print "Saving connection %d %s" % (count, con._id)

    finish = time.time()
    print "Imported %d edges, time elpased %d seconds" % (count, finish - now)
コード例 #10
0
ファイル: models.py プロジェクト: tjworks/precon
 def findOne(cls, query):
     col = mongo.getCollection(cls._col)
     r = col.find_one(query)
     if(r): return cls(r)
     return None
コード例 #11
0
ファイル: mif.py プロジェクト: tjworks/precon
def importmif():
	dups = {}	
	basedir = "data/IntAct/psi25/datasets"
	cats = os.listdir(basedir)
			
	networks = []
	entities = []
	connections = []
	nodes = [] 	
	
	for c in cats:
		if c != 'Parkinsons': continue
		print "Processing category %s" %c
		files = os.listdir("%s\\%s" %(basedir,c) )
		for filename in files:
			file = "%s\\%s\\%s" %(basedir, c, filename)
			if os.path.isdir(file): continue
			log( "Processing %s" %file)
			res = Network()
			res.group = c 
			res.refs = {}
			res.connections = []
			res.entities = []
			res.refs['intact'] = filename.replace(".xml", "")
			parseFile(file, res)
			
			if res._id in dups:
				error("Duplicated id: %s/%s"%(c, file))
				continue
			networks.append(res)
			dups[res._id ] = 1			
			if res.entities: entities.extend(res.entities)
			
			connections.extend(res.connections)
			tmp_nodes = []
			for con in res.connections:
				if con.nodes: tmp_nodes.extend(con.nodes)
			nodes.extend(tmp_nodes)
			
			log("Connections: %d  Participants %d Interactors: %d" %(len(res.connections), len(tmp_nodes), len(res.entities) ))
				#interactors.extend(a)
			#interactions.extend(b)
			#log("interactors : %d" % len(res.entities))
			#log("interactions: %d" % len(res.entities))
			#break
	
	#log( "Total interactions: %d" % len(interactions))
	
	nc = mongo.getCollection('network')
	ec = mongo.getCollection('entity')
	cc = mongo.getCollection('connection')
	nodec=mongo.getCollection('node')
	
	for con in connections:
		node_ids = []
		con.entities = []
		for node in con.nodes:			
			ent_id = ''
			if node.refs and node.refs['entity']:
				# node.entity is IntAct internal ID
				intact_id = node.refs['entity']				
				for item in entities:
					if item.refs and item.refs['intact'] == intact_id:						
						ent_id = item._id												
						break				
			if not ent_id:
				error("Unresolved interactorRef for %s" %node)						
			else:
				node.entity = ent_id
				node_ids.append(node._id)										
				con.entities.append(ent_id)										
		con.nodes = node_ids		
		
	for con in connections:
		cc.insert(con, safe=True)
		log("Saved connection %s" %con._id)
	
	
	for network in networks:
		del network['entities']
		del network['connections']
		nc.insert(network, safe=True)
		log("Saved network %s" %network._id)
	
	
	for node in nodes:
		if not node.entity: continue
		nodec.insert(node, safe=True)
		log("Saved node %s" %node._id)
	
	dups = []
	for entity in entities:
		if entity._id in dups: continue
		ec.insert(entity, safe=True)
		dups.append(entity._id)
		log("Saved entity %s" %entity._id)
	
	log( "###########################")
	log( "Total networks: %d" % len(networks))
	log( "Total interactors: %d" % len(entities))
	log( "Total nodes: %d" %(len(nodes)))
	
	log("Done")
	return networks
コード例 #12
0
ファイル: load.py プロジェクト: linkai1208/precon
def load_pubmeds(ids=None):
    ids = ids.split(",") if ids else demo_pubmeds

    url = "http://togows.dbcls.jp/entry/pubmed/$ID?format=xml"
    """
    pub={    '_id':'',
            'name':'',
            'refs':{ 'pubmed': '' },
            'abstract':'', 
            'local': 0,  
            'url':'',  
            'published': 1, 
            'authors':[]
            }
    """
    pc = mongo.getCollection('people')
    try:
        pc.create_index([("last", 1), ("middle", 1), ("first", 1)],
                        unique=True)
    except:
        pass

    pubs = []
    peoples = []
    for pid in pubmeds:
        try:
            uri = url.replace('$ID', pid)
            print "Loading %s" % uri
            doc = XML2Dict().fromurl(uri)
            #print doc
            article = doc['PubmedArticleSet']['PubmedArticle'][
                'MedlineCitation']['Article']
            article = doc.PubmedArticleSet.PubmedArticle.MedlineCitation.Article

            pub = Publication()
            pub._id = "publ_pubmed%s" % (pid)
            pub.refs = {'pubmed': pid}
            pub.name = article['ArticleTitle'][
                'value'] if article.ArticleTitle else ''
            pub.abstract = ''
            if article.Abstract and article.Abstract.AbstractText:
                texts = [
                    article.Abstract.AbstractText
                ] if not isinstance(article.Abstract.AbstractText,
                                    list) else article.Abstract.AbstractText
                pub.abstract = "\n\n".join([text['value'] for text in texts])

            pub.language = article['Language'][
                'value'] if article.Language else ''
            pubs.append(pub)

            pub.authors = []
            authors = article['AuthorList']['Author']
            for author in authors:
                people = {
                    'first':
                    author.ForeName.value
                    if author.ForeName and author.ForeName.value else '',
                    'last':
                    author.LastName.value
                    if author.LastName and author.LastName.value else '',
                    'middle':
                    author.Initials.value
                    if author.Initials and author.Initials.value else ''
                }
                if not people['last']: continue
                people['namekey'] = "%s.%s.%s" % (people['first'].lower(),
                                                  people['middle'].lower(),
                                                  people['last'].lower())

                people['_id'] = idtool.generate('peop')
                try:
                    pc.insert(people, safe=True)
                    print "Inserted %s" % people
                except:
                    del people['_id']
                    people = pc.find_one(people)
                if people:
                    pc.update({'_id': people['_id']},
                              {'$addToSet': {
                                  'publications': pub._id
                              }},
                              safe=True)
                    pub.authors.append(people)
            #print authors
        except:
            print "ERROR: %s" % traceback.format_exc()

    pubc = mongo.getCollection('publication')
    for pub in pubs:
        try:
            pubc.insert(pub)
            print "Inserted pub: %s" % pub
        except:
            print "ERROR %s" % traceback.format_exc()

    log("Done")

    return pubs
コード例 #13
0
ファイル: importer.py プロジェクト: tjworks/precon
def importEdges(filename=None):
    """
    import edges, one pair per line 
    Edge property: - no direction, -> left to right, <- right to left
    """
    filename = filename or  r'C:\work\caida\Dropbox\precon\engineering\Contents\GBM_BN-Massaged.csv'    
    
    # add a network
    n = Network.findOne({'name': "GBM Predicted Tumor Network"}) or Network()
    n.name = "GBM Predicted Tumor Network"
    n.group = "predicted"
    n.owner = 'precon'
    n.save()
     
    
    col = mongo.db()['entity']
    count = 0
    now = time.time()
    ec = mongo.getCollection('edge')
    with open(filename) as f:
        content = f.read()        
        for line in content.split('\n'):
            doc  = {}
            line = line.strip()
            pos = line.find("///")            
            if pos>0: 
                line = line[0:pos].strip()
                doc['comment'] = line[pos:]
                             
            
            items = line.split("->")
            if (len(items) == 1): items = line.split("<-")
            if (len(items) == 1): items = line.split(" - ")
            if (len(items) == 1): 
                error("Ignore invalid line: [%s]" %line)
                continue
            count+=1
            if(count<8378): continue
            
            tmp =[]
            tmp.append( items[0].lower().strip())
            tmp.append( items[1].lower().strip())
            entities = ['','']
            print "!!! %d " %( col.find({'symbol': {'$in': tmp  } }).count() )
            for r in  col.find({'symbol': {'$in': tmp  } }):
                if(r['symbol'] == tmp[0]):
                    entities[0] = Entity(r)
                if(r['symbol'] == tmp[1]):
                    entities[1] = Entity(r)
            
            if(len(entities)!=2 ):
                raise "Invalid nodes %s, continue" % entities
            
            
            node1 = Node.findOne({'network': n._id, 'entity': entities[0]._id}) 
            if not node1:
                node1 = Node({'network':n._id}, entities[0])
                node1.save()
            node2 = Node.findOne({'network': n._id, 'entity': entities[1]._id}) 
            if not node2:
                node2 = Node({'network':n._id}, entities[1])
                node2.save()
            
         
            
            con = Connection()
            con._id  = "conn_%s_%s" %(tmp[0], tmp[1])
            con.nodes = [node1._id, node2._id]
            con.entities = [ entities[0]._id, entities[1]._id ] 
            con.type = 'predicted'
            con.network = n._id
            con.label = ""
            con.save()
                        
            
            print "Saving connection %d %s" % (count, con._id) 
            
    finish = time.time()
    print "Imported %d edges, time elpased %d seconds" %(count, finish - now)
コード例 #14
0
def importmif():
    dups = {}
    basedir = "data/IntAct/psi25/datasets"
    cats = os.listdir(basedir)

    networks = []
    entities = []
    connections = []
    nodes = []

    for c in cats:
        if c != 'Parkinsons': continue
        print "Processing category %s" % c
        files = os.listdir("%s\\%s" % (basedir, c))
        for filename in files:
            file = "%s\\%s\\%s" % (basedir, c, filename)
            if os.path.isdir(file): continue
            log("Processing %s" % file)
            res = Network()
            res.group = c
            res.refs = {}
            res.connections = []
            res.entities = []
            res.refs['intact'] = filename.replace(".xml", "")
            parseFile(file, res)

            if res._id in dups:
                error("Duplicated id: %s/%s" % (c, file))
                continue
            networks.append(res)
            dups[res._id] = 1
            if res.entities: entities.extend(res.entities)

            connections.extend(res.connections)
            tmp_nodes = []
            for con in res.connections:
                if con.nodes: tmp_nodes.extend(con.nodes)
            nodes.extend(tmp_nodes)

            log("Connections: %d  Participants %d Interactors: %d" %
                (len(res.connections), len(tmp_nodes), len(res.entities)))
            #interactors.extend(a)
            #interactions.extend(b)
            #log("interactors : %d" % len(res.entities))
            #log("interactions: %d" % len(res.entities))
            #break

    #log( "Total interactions: %d" % len(interactions))

    nc = mongo.getCollection('network')
    ec = mongo.getCollection('entity')
    cc = mongo.getCollection('connection')
    nodec = mongo.getCollection('node')

    for con in connections:
        node_ids = []
        con.entities = []
        for node in con.nodes:
            ent_id = ''
            if node.refs and node.refs['entity']:
                # node.entity is IntAct internal ID
                intact_id = node.refs['entity']
                for item in entities:
                    if item.refs and item.refs['intact'] == intact_id:
                        ent_id = item._id
                        break
            if not ent_id:
                error("Unresolved interactorRef for %s" % node)
            else:
                node.entity = ent_id
                node_ids.append(node._id)
                con.entities.append(ent_id)
        con.nodes = node_ids

    for con in connections:
        cc.insert(con, safe=True)
        log("Saved connection %s" % con._id)

    for network in networks:
        del network['entities']
        del network['connections']
        nc.insert(network, safe=True)
        log("Saved network %s" % network._id)

    for node in nodes:
        if not node.entity: continue
        nodec.insert(node, safe=True)
        log("Saved node %s" % node._id)

    dups = []
    for entity in entities:
        if entity._id in dups: continue
        ec.insert(entity, safe=True)
        dups.append(entity._id)
        log("Saved entity %s" % entity._id)

    log("###########################")
    log("Total networks: %d" % len(networks))
    log("Total interactors: %d" % len(entities))
    log("Total nodes: %d" % (len(nodes)))

    log("Done")
    return networks
コード例 #15
0
ファイル: ukpmc.py プロジェクト: tjworks/precon
def load_pubmeds(ids=None):
    ids = ids.split(",") if ids else demo_pubmeds
        
    url = "http://togows.dbcls.jp/entry/pubmed/$ID?format=xml"

    """
    pub={    '_id':'',
            'name':'',
            'refs':{ 'pubmed': '' },
            'abstract':'', 
            'local': 0,  
            'url':'',  
            'published': 1, 
            'authors':[]
            }
    """
    pc = mongo.getCollection('people')
    try:
        pc.create_index([("last", 1), ("middle",1), ("first",1)], unique=True)
    except:
        pass
    
    pubs = []
    peoples = []
    for pid in ids:
        try:
            uri = url.replace('$ID', pid)
            print "Loading %s" %uri
            doc = XML2Dict().fromurl(uri)
            #print doc
            article = doc['PubmedArticleSet']['PubmedArticle']['MedlineCitation']['Article']
            article = doc.PubmedArticleSet.PubmedArticle.MedlineCitation.Article
            
            pub = Publication()
            pub._id = "publ_pubmed%s" % (pid)
            pub.refs= {'pubmed': pid}
            pub.name= article['ArticleTitle']['value'] if article.ArticleTitle else ''
            pub.abstract = ''
            if article.Abstract and article.Abstract.AbstractText:
                texts = [ article.Abstract.AbstractText ] if not isinstance(article.Abstract.AbstractText, list) else article.Abstract.AbstractText
                pub.abstract= "\n\n".join([ text['value'] for text in texts ]) 
                                
            pub.language=article['Language']['value'] if article.Language else ''
            pubs.append(pub) 
            
            pub.authors=[]
            authors = article['AuthorList']['Author']
            for author in authors:
                people = {'first': author.ForeName.value if author.ForeName and author.ForeName.value else '',
                          'last': author.LastName.value if author.LastName and author.LastName.value else '',
                          'middle': author.Initials.value if author.Initials and author.Initials.value else '' }
                if not people['last']: continue             
                people['namekey'] = "%s.%s.%s" %(people['first'].lower(), people['middle'].lower(), people['last'].lower())                
                
                people['_id'] = idtool.generate('peop')
                try:
                    pc.insert(people, safe=True)
                    print "Inserted %s" %people
                except:
                    del people['_id']
                    people = pc.find_one(people)
                if people:
                    pc.update({'_id':people['_id']}, {'$addToSet': {'publications':pub._id}}, safe=True)
                    pub.authors.append(people)                            
            #print authors            
        except:
            print "ERROR: %s" %traceback.format_exc()    
                
            
    pubc = mongo.getCollection('publication')
    for pub in pubs:
        try:
            pubc.insert(pub)
            print "Inserted pub: %s" %pub
        except:
            print "ERROR %s"  %traceback.format_exc()

    log("Done")
    
    return pubs
コード例 #16
0
 def findOne(cls, query):
     col = mongo.getCollection(cls._col)
     r = col.find_one(query)
     if (r): return cls(r)
     return None