def test_savenetwork(self): settings.MONGODB_NAME = 'octest' raw = "{'_nodes': [{'_id': 'node13448668499547680-new', 'label': 'TTT', 'entity': ''}], 'name': 'abcde', 'connections': ['conn_mll_tsc2', 'conn13448668609469805-new'], 'owner': 'abc4_onechart.com', 'nodes': ['node13448668499547680-new'], '_id': 'netw13448668795386140-new', '_connections': [{'entities': ['enti_up_P49815', ''], 'nodes': ['node120810102011296361', 'node13448668499547680-new'], '_id': 'conn13448668609469805-new', 'type': 'inhibits', 'id': 'conn13448668609468747-new'}]}" raw = """ {"owner":"abc4_onechart.com","name":"abcde","_connections":[{"nodes":["node120810102011296361","node13448668499547680-new"], "type":"inhibits","id":"conn13448668609468747-new","_id":"conn13448668609469805-new","entities":["enti_up_P49815",""]}], "connections":["conn_mll_tsc2","conn13448668609469805-new"],"nodes":["node13448668499547680-new"],"_nodes":[{"entity":"","label":"TTT","_id":"node13448668499547680-new"}], "_id":"netw13448668795386140-new"} """ data = json.loads(raw) m = Network(data) m.name = "TestNetwork-1" m.save()
def persist(req): jsondata = req.REQUEST.get("data") initdata = json.loads(jsondata) logger.debug("Persisting") logger.debug( "%s" % (initdata) ) fileutil.writeFile("test1", jsondata) try: if(initdata['_id'][:4] == 'conn'): con = Connection(initdata) con.save() return SmartResponse(con._id, req) else: network = Network(initdata) network.save() index(network.name, network) return SmartResponse(network._id, req) except Error as e: logger.error("Error saving data: %s" %traceback.format_exc()) return SmartResponse(e, req)
def persist(req): jsondata = req.REQUEST.get("data") initdata = json.loads(jsondata) logger.debug("Persisting") logger.debug("%s" % (initdata)) fileutil.writeFile("test1", jsondata) try: if (initdata['_id'][:4] == 'conn'): con = Connection(initdata) con.save() return SmartResponse(con._id, req) else: network = Network(initdata) network.save() index(network.name, network) return SmartResponse(network._id, req) except Error as e: logger.error("Error saving data: %s" % traceback.format_exc()) return SmartResponse(e, req)
def importEdges(filename=None): """ import edges, one pair per line Edge property: - no direction, -> left to right, <- right to left """ filename = filename or r'C:\work\caida\Dropbox\precon\engineering\Contents\GBM_BN-Massaged.csv' # add a network n = Network.findOne({'name': "GBM Predicted Tumor Network"}) or Network() n.name = "GBM Predicted Tumor Network" n.group = "predicted" n.owner = 'precon' n.save() col = mongo.db()['entity'] count = 0 now = time.time() ec = mongo.getCollection('edge') with open(filename) as f: content = f.read() for line in content.split('\n'): doc = {} line = line.strip() pos = line.find("///") if pos > 0: line = line[0:pos].strip() doc['comment'] = line[pos:] items = line.split("->") if (len(items) == 1): items = line.split("<-") if (len(items) == 1): items = line.split(" - ") if (len(items) == 1): error("Ignore invalid line: [%s]" % line) continue count += 1 if (count < 8378): continue tmp = [] tmp.append(items[0].lower().strip()) tmp.append(items[1].lower().strip()) entities = ['', ''] print "!!! %d " % (col.find({'symbol': {'$in': tmp}}).count()) for r in col.find({'symbol': {'$in': tmp}}): if (r['symbol'] == tmp[0]): entities[0] = Entity(r) if (r['symbol'] == tmp[1]): entities[1] = Entity(r) if (len(entities) != 2): raise "Invalid nodes %s, continue" % entities node1 = Node.findOne({'network': n._id, 'entity': entities[0]._id}) if not node1: node1 = Node({'network': n._id}, entities[0]) node1.save() node2 = Node.findOne({'network': n._id, 'entity': entities[1]._id}) if not node2: node2 = Node({'network': n._id}, entities[1]) node2.save() con = Connection() con._id = "conn_%s_%s" % (tmp[0], tmp[1]) con.nodes = [node1._id, node2._id] con.entities = [entities[0]._id, entities[1]._id] con.type = 'predicted' con.network = n._id con.label = "" con.save() print "Saving connection %d %s" % (count, con._id) finish = time.time() print "Imported %d edges, time elpased %d seconds" % (count, finish - now)
def load(self): self.columnIndexes = {} headers = self.rows.next() for i in range (len(headers)): col = headers[i] if col not in SUPPORTED_COLUMNS: raise Exception("Invalid column: %s. Accepted columns are: %s" %(col, SUPPORTED_COLUMNS)) self.columnIndexes[col] = i #networks, connections, entities, nodes entities = {} nodes = {} networks = {} for row in self.rows: entityA = None dbid = self.getCol('NodeA_DBID',row ) if dbid: dbid = dbid.lower() _id = 'enti_%s' %dbid if _id not in entities: entityA = Entity() entityA._id = _id entityA.dbref = {} pair = dbid.split(':') if(len(pair)!=2): self._error("Invalid NodeA_DBID: %s" %dbid) continue entityA.dbref[pair[0]] = pair[1] entityA.group = self.getCol('NodeA_Category',row) #entity.label = 'Metformin' entityA.name = self.getCol('NodeA',row) entities[_id] = entityA else: entityA=entities[_id] #Node B entityB = None dbid = self.getCol('NodeB_DBID',row, False) if dbid: dbid = dbid.lower() _id = 'enti_%s' %dbid if _id not in entities: entityB = Entity() entityB._id = _id entityB.dbref = {} pair = dbid.split(':') if(len(pair)!=2): self._error("Invalid NodeB_DBID: %s" %dbid) continue entityB.dbref[pair[0].strip()] = pair[1].strip() entityB.group = self.getCol('NodeB_Category',row) #entity.label = 'Metformin' entityB.name = self.getCol('NodeB' ,row ) entities[_id] = entityB else: entityB = entities[_id] edgeType = self.getCol('Edge',row, False) if not edgeType: continue con = Connection() con.type = edgeType nodeA = nodes[entityA._id] if entityA._id in nodes else self.newNode(entityA) nodeB = nodes[entityB._id] if entityB._id in nodes else self.newNode(entityB) nodes[entityA._id] = nodeA nodes[entityB._id] = nodeB con.entities=[entityA._id, entityB._id] # this is for search con.nodes = [nodeA._id, nodeB._id] con._nodes = [nodeA, nodeB] con.refs = {} edgeRefs = self.getCol('Edge_Ref',row, False) if edgeRefs: # comma separated pairs = edgeRefs.split(",") if len(pairs) == 1: pairs = edgeRefs.split("\n") for p in pairs: pair = p.split(':') refs = pair[1].split(';') if len(refs) == 1: refs = pair[1] con.refs[pair[0].lower().strip()] = refs networkName = self.getCol('Network',row) or 'DEFAULT' network = networks[networkName] if networkName in networks else Network() network.name = networkName network.owner='peop_precon' networks[networkName] = network network._connections = network._connections or [] network._connections.append(con) con.network = network._id con.owner='precon' #TBD, error return networks, self.errors
def importmif(): dups = {} basedir = "data/IntAct/psi25/datasets" cats = os.listdir(basedir) networks = [] entities = [] connections = [] nodes = [] for c in cats: if c != 'Parkinsons': continue print "Processing category %s" %c files = os.listdir("%s\\%s" %(basedir,c) ) for filename in files: file = "%s\\%s\\%s" %(basedir, c, filename) if os.path.isdir(file): continue log( "Processing %s" %file) res = Network() res.group = c res.refs = {} res.connections = [] res.entities = [] res.refs['intact'] = filename.replace(".xml", "") parseFile(file, res) if res._id in dups: error("Duplicated id: %s/%s"%(c, file)) continue networks.append(res) dups[res._id ] = 1 if res.entities: entities.extend(res.entities) connections.extend(res.connections) tmp_nodes = [] for con in res.connections: if con.nodes: tmp_nodes.extend(con.nodes) nodes.extend(tmp_nodes) log("Connections: %d Participants %d Interactors: %d" %(len(res.connections), len(tmp_nodes), len(res.entities) )) #interactors.extend(a) #interactions.extend(b) #log("interactors : %d" % len(res.entities)) #log("interactions: %d" % len(res.entities)) #break #log( "Total interactions: %d" % len(interactions)) nc = mongo.getCollection('network') ec = mongo.getCollection('entity') cc = mongo.getCollection('connection') nodec=mongo.getCollection('node') for con in connections: node_ids = [] con.entities = [] for node in con.nodes: ent_id = '' if node.refs and node.refs['entity']: # node.entity is IntAct internal ID intact_id = node.refs['entity'] for item in entities: if item.refs and item.refs['intact'] == intact_id: ent_id = item._id break if not ent_id: error("Unresolved interactorRef for %s" %node) else: node.entity = ent_id node_ids.append(node._id) con.entities.append(ent_id) con.nodes = node_ids for con in connections: cc.insert(con, safe=True) log("Saved connection %s" %con._id) for network in networks: del network['entities'] del network['connections'] nc.insert(network, safe=True) log("Saved network %s" %network._id) for node in nodes: if not node.entity: continue nodec.insert(node, safe=True) log("Saved node %s" %node._id) dups = [] for entity in entities: if entity._id in dups: continue ec.insert(entity, safe=True) dups.append(entity._id) log("Saved entity %s" %entity._id) log( "###########################") log( "Total networks: %d" % len(networks)) log( "Total interactors: %d" % len(entities)) log( "Total nodes: %d" %(len(nodes))) log("Done") return networks
def importEdges(filename=None): """ import edges, one pair per line Edge property: - no direction, -> left to right, <- right to left """ filename = filename or r'C:\work\caida\Dropbox\precon\engineering\Contents\GBM_BN-Massaged.csv' # add a network n = Network.findOne({'name': "GBM Predicted Tumor Network"}) or Network() n.name = "GBM Predicted Tumor Network" n.group = "predicted" n.owner = 'precon' n.save() col = mongo.db()['entity'] count = 0 now = time.time() ec = mongo.getCollection('edge') with open(filename) as f: content = f.read() for line in content.split('\n'): doc = {} line = line.strip() pos = line.find("///") if pos>0: line = line[0:pos].strip() doc['comment'] = line[pos:] items = line.split("->") if (len(items) == 1): items = line.split("<-") if (len(items) == 1): items = line.split(" - ") if (len(items) == 1): error("Ignore invalid line: [%s]" %line) continue count+=1 if(count<8378): continue tmp =[] tmp.append( items[0].lower().strip()) tmp.append( items[1].lower().strip()) entities = ['',''] print "!!! %d " %( col.find({'symbol': {'$in': tmp } }).count() ) for r in col.find({'symbol': {'$in': tmp } }): if(r['symbol'] == tmp[0]): entities[0] = Entity(r) if(r['symbol'] == tmp[1]): entities[1] = Entity(r) if(len(entities)!=2 ): raise "Invalid nodes %s, continue" % entities node1 = Node.findOne({'network': n._id, 'entity': entities[0]._id}) if not node1: node1 = Node({'network':n._id}, entities[0]) node1.save() node2 = Node.findOne({'network': n._id, 'entity': entities[1]._id}) if not node2: node2 = Node({'network':n._id}, entities[1]) node2.save() con = Connection() con._id = "conn_%s_%s" %(tmp[0], tmp[1]) con.nodes = [node1._id, node2._id] con.entities = [ entities[0]._id, entities[1]._id ] con.type = 'predicted' con.network = n._id con.label = "" con.save() print "Saving connection %d %s" % (count, con._id) finish = time.time() print "Imported %d edges, time elpased %d seconds" %(count, finish - now)
def importmif(): dups = {} basedir = "data/IntAct/psi25/datasets" cats = os.listdir(basedir) networks = [] entities = [] connections = [] nodes = [] for c in cats: if c != 'Parkinsons': continue print "Processing category %s" % c files = os.listdir("%s\\%s" % (basedir, c)) for filename in files: file = "%s\\%s\\%s" % (basedir, c, filename) if os.path.isdir(file): continue log("Processing %s" % file) res = Network() res.group = c res.refs = {} res.connections = [] res.entities = [] res.refs['intact'] = filename.replace(".xml", "") parseFile(file, res) if res._id in dups: error("Duplicated id: %s/%s" % (c, file)) continue networks.append(res) dups[res._id] = 1 if res.entities: entities.extend(res.entities) connections.extend(res.connections) tmp_nodes = [] for con in res.connections: if con.nodes: tmp_nodes.extend(con.nodes) nodes.extend(tmp_nodes) log("Connections: %d Participants %d Interactors: %d" % (len(res.connections), len(tmp_nodes), len(res.entities))) #interactors.extend(a) #interactions.extend(b) #log("interactors : %d" % len(res.entities)) #log("interactions: %d" % len(res.entities)) #break #log( "Total interactions: %d" % len(interactions)) nc = mongo.getCollection('network') ec = mongo.getCollection('entity') cc = mongo.getCollection('connection') nodec = mongo.getCollection('node') for con in connections: node_ids = [] con.entities = [] for node in con.nodes: ent_id = '' if node.refs and node.refs['entity']: # node.entity is IntAct internal ID intact_id = node.refs['entity'] for item in entities: if item.refs and item.refs['intact'] == intact_id: ent_id = item._id break if not ent_id: error("Unresolved interactorRef for %s" % node) else: node.entity = ent_id node_ids.append(node._id) con.entities.append(ent_id) con.nodes = node_ids for con in connections: cc.insert(con, safe=True) log("Saved connection %s" % con._id) for network in networks: del network['entities'] del network['connections'] nc.insert(network, safe=True) log("Saved network %s" % network._id) for node in nodes: if not node.entity: continue nodec.insert(node, safe=True) log("Saved node %s" % node._id) dups = [] for entity in entities: if entity._id in dups: continue ec.insert(entity, safe=True) dups.append(entity._id) log("Saved entity %s" % entity._id) log("###########################") log("Total networks: %d" % len(networks)) log("Total interactors: %d" % len(entities)) log("Total nodes: %d" % (len(nodes))) log("Done") return networks