def readPublicDatasets( nwdata, c ): # OK, NOW we dump in the public datasets for pd in c['public_dicts'] : # (done) make emili follow biogrid field conventions # TODO ditto bioplex if pd['infilename'] in os.listdir('.') : pdsf = open(pd['infilename']) elif os.path.isfile(pd['infilename']) : pdsf = open(pd['infilename']) else : pdsf = open(c['publicDatadir'] + pd['infilename']) temporaryds = I.dataSet( i_filter = c['iact_filter'] ) if pd.get('convert') == 'm2h' : temporaryds.parse( pdsf, fd = I.fd_biogrid, m2h = True, qualify = pd.get('qualify',''), directed = False, force_qualify = True ) elif pd.get('convert') == 'h2m' : temporaryds.parse( pdsf, fd = I.fd_biogrid, h2m = True, qualify = pd.get('qualify',''), directed = False, force_qualify = True ) else : temporaryds.parse( pdsf, fd = I.fd_biogrid, qualify = pd.get('qualify',''), directed = False, force_qualify = True ) print( 'saving public dataset' + pd['infilename']) sio = StringIO() temporaryds.save( sio, edges = { e for e in temporaryds.edges.values() if e.weight >= pd.get('minweight',0) and e.totalscore >= pd.get('minscore',0) }); sio.seek(0) print( 'reloading public dataset' + pd['infilename']) nwdata.load_from( sio ) sio.close() pdsf.close()
def load_biogrid(filters='default',m2h=False,h2m=False,force_qualify=True,superdebug=False,qualify='') : global biogrid ; bgpath= REFERENCEPATH + 'biogrid_latest' ; from lib import interactors as I from lib import interactors_extras as ie mynf=None ; myif=None ; (myif,mynf)=ie.bg_regex_assembler() ; if filters == 'default' : pass ; elif filters.lower() in 'exogenous' : mynf=ie.exogenous_regex_assembler() ; elif filters.lower() in 'ionly' : mynf=None ; elif filters.lower() in 'physical' : physical_subf=ie.subfilter(r'genetic','systemType') ; myif=ie.Filter(excludes={physical_subf,}) ; mynf=None ; elif filters is None or filters.lower() in 'none' : mynf=None ; myif=None ; biogrid=I.dataSet(i_filter=myif,n_filter=mynf,superdebug=superdebug) ; bgfile=open(bgpath) ; biogrid.parse(bgfile,fd=I.fd_biogrid,m2h=m2h,h2m=h2m,force_qualify=force_qualify,qualify=qualify) ; bgfile.close()
def load_preppi() : global preppi ; from lib import interactors as I from lib import interactors_extras as ie preppipath= REFERENCEPATH + 'preppi_150727_lr600.i' ; preppi=I.dataSet() ; preppi_f=open(preppipath) ; preppi.parse(preppi_f,fd=I.fdms) ; preppi_f.close() ;
def load_preppi(): global preppi from lib import interactors as I from lib import interactors_extras as ie preppipath = REFERENCEPATH + 'preppi_150727_lr600.i' preppi = I.dataSet() preppi_f = open(preppipath) preppi.parse(preppi_f, fd=I.fdms) preppi_f.close()
def readPublicDatasets(nwdata, c): # OK, NOW we dump in the public datasets for pd in c['public_dicts']: # (done) make emili follow biogrid field conventions # TODO ditto bioplex if pd['infilename'] in os.listdir('.'): pdsf = open(pd['infilename']) elif os.path.isfile(pd['infilename']): pdsf = open(pd['infilename']) else: pdsf = open(c['publicDatadir'] + pd['infilename']) temporaryds = I.dataSet(i_filter=c['iact_filter']) if pd.get('convert') == 'm2h': temporaryds.parse(pdsf, fd=I.fd_biogrid, m2h=True, qualify=pd.get('qualify', ''), directed=False, force_qualify=True) elif pd.get('convert') == 'h2m': temporaryds.parse(pdsf, fd=I.fd_biogrid, h2m=True, qualify=pd.get('qualify', ''), directed=False, force_qualify=True) else: temporaryds.parse(pdsf, fd=I.fd_biogrid, qualify=pd.get('qualify', ''), directed=False, force_qualify=True) print('saving public dataset' + pd['infilename']) sio = StringIO() temporaryds.save(sio, edges={ e for e in temporaryds.edges.values() if e.weight >= pd.get('minweight', 0) and e.totalscore >= pd.get('minscore', 0) }) sio.seek(0) print('reloading public dataset' + pd['infilename']) nwdata.load_from(sio) sio.close() pdsf.close()
def load_refsuite(filters='default', m2h=False, h2m=False, superdebug=False, force=False): from lib import interactors as I from lib import interactors_extras as ie global refsuite if refsuite is not None and len(refsuite.nodes) > 0 and not force: return bgpath = REFERENCEPATH + 'biogrid_latest' empath = REFERENCEPATH + '/complexes/emiliome.i' bppath = REFERENCEPATH + '/bioplex.i' mynf = None myif = None (myif, mynf) = ie.bg_regex_assembler() if filters == 'default': pass elif filters.lower() in 'exogenous': mynf = ie.exogenous_regex_assembler() elif filters.lower() in 'ionly': mynf = None elif filters is None or filters.lower() in 'none': mynf = None myif = None refsuite = I.dataSet(i_filter=myif, n_filter=mynf, superdebug=superdebug) bgfile = open(bgpath) refsuite.parse(bgfile,fd=I.fd_biogrid,m2h=m2h,h2m=h2m,force_qualify=True,\ force_score=0.0,qualify='bg') bgfile.close() emfile = open(empath) refsuite.parse(emfile,fd=I.fd_emili,m2h=m2h,h2m=h2m,force_qualify=True,\ force_score=0.0,qualify='em') emfile.close() bpfile = open(bppath) refsuite.parse(bpfile,fd=I.fd_emili,m2h=m2h,h2m=h2m,force_qualify=True,directed=True,\ force_score=0.0,qualify='bp') bpfile.close()
def load_refsuite(filters='default',m2h=False,h2m=False,superdebug=False,force=False) : from lib import interactors as I from lib import interactors_extras as ie global refsuite ; if refsuite is not None and len(refsuite.nodes) > 0 and not force : return ; bgpath = REFERENCEPATH + 'biogrid_latest' ; empath = REFERENCEPATH + '/complexes/emiliome.i' ; bppath = REFERENCEPATH + '/bioplex.i' ; mynf=None ; myif=None ; (myif,mynf)=ie.bg_regex_assembler() ; if filters == 'default' : pass ; elif filters.lower() in 'exogenous' : mynf=ie.exogenous_regex_assembler() ; elif filters.lower() in 'ionly' : mynf=None ; elif filters is None or filters.lower() in 'none' : mynf=None ; myif=None ; refsuite=I.dataSet(i_filter=myif,n_filter=mynf,superdebug=superdebug) ; bgfile=open(bgpath) ; refsuite.parse(bgfile,fd=I.fd_biogrid,m2h=m2h,h2m=h2m,force_qualify=True,\ force_score=0.0,qualify='bg') ; bgfile.close() ; emfile=open(empath) ; refsuite.parse(emfile,fd=I.fd_emili,m2h=m2h,h2m=h2m,force_qualify=True,\ force_score=0.0,qualify='em') ; emfile.close() ; bpfile=open(bppath) ; refsuite.parse(bpfile,fd=I.fd_emili,m2h=m2h,h2m=h2m,force_qualify=True,directed=True,\ force_score=0.0,qualify='bp') ; bpfile.close() ;
def createNetwork(yamlfile): readYAMLfile(yamlfile, config) loadObjects(config) theds = I.dataSet(n_filter=config['node_filter']) readInDatasets(theds, config) # filter experimental data by background dists filterNodesByBackground(theds, config) readPublicDatasets(theds, config) secondaryFiltration(theds, config) makeOutput(theds, config) return theds, config
def readControls( c ): cntrl = I.dataSet(n_filter = config['node_filter'], debug = DB) for dsd in c['ds_dicts'] : cfile = dsd['control_ori'] # assumed that this is the same in all datasets and # it is a file of a list of ifilenames break c_dicts = list() with open( cfile, 'rt' ) as fh: for line in fh: ifname = line.rstrip() bait = line.split('_')[0] c_dicts.append( { 'infilename': ifname, 'bait': bait, 'qualify': ifname }) c['c_dicts'] = c_dicts readInDatasets( cntrl, c, 'c_dicts', 'cntrlkeys' ) return cntrl
def load_biogrid(filters='default', m2h=False, h2m=False, force_qualify=True, superdebug=False, qualify=''): global biogrid bgpath = REFERENCEPATH + 'biogrid_latest' from lib import interactors as I from lib import interactors_extras as ie mynf = None myif = None (myif, mynf) = ie.bg_regex_assembler() if filters == 'default': pass elif filters.lower() in 'exogenous': mynf = ie.exogenous_regex_assembler() elif filters.lower() in 'ionly': mynf = None elif filters.lower() in 'physical': physical_subf = ie.subfilter(r'genetic', 'systemType') myif = ie.Filter(excludes={ physical_subf, }) mynf = None elif filters is None or filters.lower() in 'none': mynf = None myif = None biogrid = I.dataSet(i_filter=myif, n_filter=mynf, superdebug=superdebug) bgfile = open(bgpath) biogrid.parse(bgfile, fd=I.fd_biogrid, m2h=m2h, h2m=h2m, force_qualify=force_qualify, qualify=qualify) bgfile.close()
def createNetwork( yamlfile ) : readYAMLfile( yamlfile, config ) loadObjects( config ) theds = I.dataSet(n_filter = config['node_filter']) readInDatasets( theds, config ) # filter experimental data by background dists filterNodesByBackground( theds, config ) readPublicDatasets( theds, config ) secondaryFiltration( theds, config ) makeOutput( theds, config ) return theds,config
def createNetwork( yamlfile ) : readYAMLfile( yamlfile, config ) loadObjects( config ) theds = I.dataSet(n_filter = config['node_filter'], debug = DB) readInDatasets( theds, config, 'ds_dicts', 'baitkeys' ) if config['mt_method'] == 'fdr_bh': # filter experimental data by background dists filterNodesByBackground( theds, config ) elif config['mt_method'] == 'saintx': scoreBySaintx( theds, config ) else: config['joint_hits'] = set(theds.edges.keys()) config['node_pass1_all'] = set(theds.nodes.keys()) config['node_pass1_strong'] = set(theds.nodes.keys()) readPublicDatasets( theds, config ) if not config['rescueAll']: secondaryFiltration( theds, config ) else : config['nodes_pass2'] = config['node_pass1_all'] config['edges_pass1'] = set() for e in list( theds.edges.values()) : if {e.to.key,e.whence.key}.issubset( config['node_pass1_all'] ) and e.to.key != e.whence.key : config['edges_pass1'].add(e) config['edges_pass2'] = config['edges_pass1'] config['nnodes_rescued'] = 0 makeOutput( theds, config ) return theds,config
def readPublicDatasets( nwdata, c ): if type(c['public_dicts']) is not list or len(c['public_dicts']) == 0: return True # OK, NOW we dump in the public datasets for pd in c['public_dicts'] : # (done) make emili follow biogrid field conventions # TODO ditto bioplex if pd['infilename'] in os.listdir('.') : pdsf = open(pd['infilename']) elif os.path.isfile(pd['infilename']) : pdsf = open(pd['infilename']) else : pdsf = open(c['publicDatadir'] + pd['infilename']) temporaryds = I.dataSet( i_filter = c['iact_filter'], debug = DB ) convert = pd.get('convert', None) temporaryds.parse( pdsf, fd = I.fd_biogrid, convert = convert, qualify = pd.get('qualify',''), directed = False, force_qualify = True, user = scruser ) sio = StringIO() print( 'saving public dataset' + pd['infilename']) # filter nodes of the dataset if bait is defined if 'bait' in pd: node_set = trim( temporaryds, pd.get('bait'), pd.get( 'radius', 1 )) temporaryds.save( sio, nodes = node_set ); else : edge_set = { e for e in temporaryds.edges.values() if e.weight >= pd.get('minweight',0) and e.totalscore >= pd.get('minscore',0) } temporaryds.save( sio, edges = edge_set ); sio.seek(0) print( 'import public data into network ' ) nwdata.load_from( sio, scruser ) sio.close() pdsf.close()