def importEdges(filename=None): """ import edges, one pair per line Edge property: - no direction, -> left to right, <- right to left """ filename = filename or r'C:\work\caida\Dropbox\precon\engineering\Contents\GBM_BN-Massaged.csv' # add a network n = Network.findOne({'name': "GBM Predicted Tumor Network"}) or Network() n.name = "GBM Predicted Tumor Network" n.group = "predicted" n.owner = 'precon' n.save() col = mongo.db()['entity'] count = 0 now = time.time() ec = mongo.getCollection('edge') with open(filename) as f: content = f.read() for line in content.split('\n'): doc = {} line = line.strip() pos = line.find("///") if pos > 0: line = line[0:pos].strip() doc['comment'] = line[pos:] items = line.split("->") if (len(items) == 1): items = line.split("<-") if (len(items) == 1): items = line.split(" - ") if (len(items) == 1): error("Ignore invalid line: [%s]" % line) continue count += 1 if (count < 8378): continue tmp = [] tmp.append(items[0].lower().strip()) tmp.append(items[1].lower().strip()) entities = ['', ''] print "!!! %d " % (col.find({'symbol': {'$in': tmp}}).count()) for r in col.find({'symbol': {'$in': tmp}}): if (r['symbol'] == tmp[0]): entities[0] = Entity(r) if (r['symbol'] == tmp[1]): entities[1] = Entity(r) if (len(entities) != 2): raise "Invalid nodes %s, continue" % entities node1 = Node.findOne({'network': n._id, 'entity': entities[0]._id}) if not node1: node1 = Node({'network': n._id}, entities[0]) node1.save() node2 = Node.findOne({'network': n._id, 'entity': entities[1]._id}) if not node2: node2 = Node({'network': n._id}, entities[1]) node2.save() con = Connection() con._id = "conn_%s_%s" % (tmp[0], tmp[1]) con.nodes = [node1._id, node2._id] con.entities = [entities[0]._id, entities[1]._id] con.type = 'predicted' con.network = n._id con.label = "" con.save() print "Saving connection %d %s" % (count, con._id) finish = time.time() print "Imported %d edges, time elpased %d seconds" % (count, finish - now)
def newNode(self, entity): node = Node(entity=entity) return node
def importEdges(filename=None): """ import edges, one pair per line Edge property: - no direction, -> left to right, <- right to left """ filename = filename or r'C:\work\caida\Dropbox\precon\engineering\Contents\GBM_BN-Massaged.csv' # add a network n = Network.findOne({'name': "GBM Predicted Tumor Network"}) or Network() n.name = "GBM Predicted Tumor Network" n.group = "predicted" n.owner = 'precon' n.save() col = mongo.db()['entity'] count = 0 now = time.time() ec = mongo.getCollection('edge') with open(filename) as f: content = f.read() for line in content.split('\n'): doc = {} line = line.strip() pos = line.find("///") if pos>0: line = line[0:pos].strip() doc['comment'] = line[pos:] items = line.split("->") if (len(items) == 1): items = line.split("<-") if (len(items) == 1): items = line.split(" - ") if (len(items) == 1): error("Ignore invalid line: [%s]" %line) continue count+=1 if(count<8378): continue tmp =[] tmp.append( items[0].lower().strip()) tmp.append( items[1].lower().strip()) entities = ['',''] print "!!! %d " %( col.find({'symbol': {'$in': tmp } }).count() ) for r in col.find({'symbol': {'$in': tmp } }): if(r['symbol'] == tmp[0]): entities[0] = Entity(r) if(r['symbol'] == tmp[1]): entities[1] = Entity(r) if(len(entities)!=2 ): raise "Invalid nodes %s, continue" % entities node1 = Node.findOne({'network': n._id, 'entity': entities[0]._id}) if not node1: node1 = Node({'network':n._id}, entities[0]) node1.save() node2 = Node.findOne({'network': n._id, 'entity': entities[1]._id}) if not node2: node2 = Node({'network':n._id}, entities[1]) node2.save() con = Connection() con._id = "conn_%s_%s" %(tmp[0], tmp[1]) con.nodes = [node1._id, node2._id] con.entities = [ entities[0]._id, entities[1]._id ] con.type = 'predicted' con.network = n._id con.label = "" con.save() print "Saving connection %d %s" % (count, con._id) finish = time.time() print "Imported %d edges, time elpased %d seconds" %(count, finish - now)
def parseInteractions(dom,network): """ <interaction id="678614" imexId="IM-12113-2"> <names> <shortLabel>pf1130-pf1128-2</shortLabel> <fullName>Affinity purification by hydroxy apatite chromatography</fullName> </names> <xref> <primaryRef refTypeAc="MI:0356" refType="identity" id="EBI-2507294" dbAc="MI:0469" db="intact"/> <secondaryRef refType="imex source" id="MI:0469" dbAc="MI:0488" db="psi-mi"/> <secondaryRef refTypeAc="MI:0662" refType="imex-primary" id="IM-12113-2" dbAc="MI:0670" db="imex"/> </xref> <experimentList> <experimentRef>678502</experimentRef> </experimentList> <participantList> <participant id="678615"> <names> <shortLabel>n/a</shortLabel> </names> <xref> <primaryRef refTypeAc="MI:0356" refType="identity" id="EBI-2507299" dbAc="MI:0469" db="intact"/> </xref> <interactorRef>678518</interactorRef> <biologicalRole> <names> <shortLabel>unspecified role</shortLabel> <fullName>unspecified role</fullName> </names> <xref> <primaryRef refTypeAc="MI:0356" refType="identity" id="MI:0499" dbAc="MI:0488" db="psi-mi"/> <secondaryRef refTypeAc="MI:0356" refType="identity" id="EBI-77781" dbAc="MI:0469" db="intact"/> <secondaryRef refTypeAc="MI:0358" refType="primary-reference" id="14755292" dbAc="MI:0446" db="pubmed"/> </xref> </biologicalRole> <experimentalRoleList> <experimentalRole> <names> <shortLabel>neutral component</shortLabel> <fullName>neutral component</fullName> </names> <xref> <primaryRef refTypeAc="MI:0356" refType="identity" id="MI:0497" dbAc="MI:0488" db="psi-mi"/> <secondaryRef refTypeAc="MI:0356" refType="identity" id="EBI-55" dbAc="MI:0469" db="intact"/> <secondaryRef refTypeAc="MI:0358" refType="primary-reference" id="14755292" dbAc="MI:0446" db="pubmed"/> </xref> </experimentalRole> </experimentalRoleList> </participant> """ ems = dom.getElementsByTagName("interaction") connections = [] nodeCount = 0 for em in ems: #interactions.append(interaction) participants = em.getElementsByTagName("participant") con_nodes = [] for p in participants: node = Node() node.network = network._id nodeCount+=1 node._id = "node%s_%d" %(node.network[4:], nodeCount) node.role = getText(p, "biologicalRole", "names", "shortLabel") #node.refs['intact'] = p.getAttribute("id") setPrimaryRef(p, node) ref = getText(p, 'interactorRef') if ref: node.refs['entity'] = ref # temporary con_nodes.append(node) connection = Connection() connection._id = "conn_intact_%s" % em.getAttribute("id") connection.refs={} setPrimaryRef(em, connection) connection.type=getText( em.getElementsByTagName("interactionType")[0], 'shortLabel') connection.label = getText(em,'shortLabel') connection.network = network._id connection.nodes = con_nodes connections.append(connection) network.connections = connections
def parseInteractions(dom, network): """ <interaction id="678614" imexId="IM-12113-2"> <names> <shortLabel>pf1130-pf1128-2</shortLabel> <fullName>Affinity purification by hydroxy apatite chromatography</fullName> </names> <xref> <primaryRef refTypeAc="MI:0356" refType="identity" id="EBI-2507294" dbAc="MI:0469" db="intact"/> <secondaryRef refType="imex source" id="MI:0469" dbAc="MI:0488" db="psi-mi"/> <secondaryRef refTypeAc="MI:0662" refType="imex-primary" id="IM-12113-2" dbAc="MI:0670" db="imex"/> </xref> <experimentList> <experimentRef>678502</experimentRef> </experimentList> <participantList> <participant id="678615"> <names> <shortLabel>n/a</shortLabel> </names> <xref> <primaryRef refTypeAc="MI:0356" refType="identity" id="EBI-2507299" dbAc="MI:0469" db="intact"/> </xref> <interactorRef>678518</interactorRef> <biologicalRole> <names> <shortLabel>unspecified role</shortLabel> <fullName>unspecified role</fullName> </names> <xref> <primaryRef refTypeAc="MI:0356" refType="identity" id="MI:0499" dbAc="MI:0488" db="psi-mi"/> <secondaryRef refTypeAc="MI:0356" refType="identity" id="EBI-77781" dbAc="MI:0469" db="intact"/> <secondaryRef refTypeAc="MI:0358" refType="primary-reference" id="14755292" dbAc="MI:0446" db="pubmed"/> </xref> </biologicalRole> <experimentalRoleList> <experimentalRole> <names> <shortLabel>neutral component</shortLabel> <fullName>neutral component</fullName> </names> <xref> <primaryRef refTypeAc="MI:0356" refType="identity" id="MI:0497" dbAc="MI:0488" db="psi-mi"/> <secondaryRef refTypeAc="MI:0356" refType="identity" id="EBI-55" dbAc="MI:0469" db="intact"/> <secondaryRef refTypeAc="MI:0358" refType="primary-reference" id="14755292" dbAc="MI:0446" db="pubmed"/> </xref> </experimentalRole> </experimentalRoleList> </participant> """ ems = dom.getElementsByTagName("interaction") connections = [] nodeCount = 0 for em in ems: #interactions.append(interaction) participants = em.getElementsByTagName("participant") con_nodes = [] for p in participants: node = Node() node.network = network._id nodeCount += 1 node._id = "node%s_%d" % (node.network[4:], nodeCount) node.role = getText(p, "biologicalRole", "names", "shortLabel") #node.refs['intact'] = p.getAttribute("id") setPrimaryRef(p, node) ref = getText(p, 'interactorRef') if ref: node.refs['entity'] = ref # temporary con_nodes.append(node) connection = Connection() connection._id = "conn_intact_%s" % em.getAttribute("id") connection.refs = {} setPrimaryRef(em, connection) connection.type = getText( em.getElementsByTagName("interactionType")[0], 'shortLabel') connection.label = getText(em, 'shortLabel') connection.network = network._id connection.nodes = con_nodes connections.append(connection) network.connections = connections