Beispiel #1
0
 def login(self):
     url = data_formats.urls['msigdb']['login1']
     self.pre_session = dataio.curl(
         url, init_url=url, silent=False, cache=False, init_headers=True)
     url = data_formats.urls['msigdb']['login2']
     post = {'j_username': self.user, 'j_password': '******'}
     self.session = dataio.curl(
         url,
         init_url=url,
         post=post,
         req_headers=self.pre_session,
         silent=False,
         cache=False,
         init_headers=True)
Beispiel #2
0
 def load_set(self, setname, map_ids=True):
     url = data_formats.urls['msigdb']['one_set'] % setname
     data = dataio.curl(url, req_headers=self.session, silent=True)
     data = data.split('\n')
     self.info[setname] = data[1][2:]
     self.write_set((j for j in (i.strip() for i in data[2:])
                     if len(j) > 0), setname, 'symbol', map_ids)
Beispiel #3
0
 def load_collection(self,
                     collname,
                     id_type='entrez',
                     map_ids=True,
                     cachedir='cache'):
     if os.path.exists(os.path.join(cachedir, 'gsea-%s.pickle' % collname)):
         self.load([collname])
         return None
     url = self.collections[collname]['urls'][id_type]
     data = dataio.curl(
         url,
         req_headers=self.session,
         silent=False,
         cache=False,
         write_cache=True)
     data = data.split('\n')
     names = []
     prg = progress.Progress(len(data), 'Loading gene sets', 1)
     for line in (l.split('\t') for l in data if len(l) > 0):
         prg.step()
         setname = line[0].strip()
         self.write_set(line[2:], setname, id_type, map_ids)
         self.get_desc(setname)
         names.append(setname)
     prg.terminate()
     self.groups[collname] = set(names)
     self.save([collname], cachedir=cachedir)
Beispiel #4
0
 def list_collections(self):
     renm = re.compile(r'(.+)\([^0-9]*([0-9]*)[^0-9]*\)')
     url = data_formats.urls['msigdb']['coll']
     html = dataio.curl(url, req_headers=self.session, silent=False)
     soup = bs4.BeautifulSoup(html, 'lxml')
     for col in soup.find('table', class_='lists1').find_all('tr'):
         lname, num = renm.findall(col.find('th').text.replace('\n', ''))[0]
         sname = col.find('a').attrs['name']
         urls = dict(
             [(d.attrs['href'].split('.')[-2],
               data_formats.urls['msigdb']['url_stem'] % d.attrs['href'])
              for d in col.find_all('a')[-3:]])
         self.collections[sname] = {
             'name': lname,
             'count': int(num),
             'urls': urls
         }
Beispiel #5
0
 def get_desc(self, setname):
     url = data_formats.urls['msigdb']['one_set'] % setname
     txt = dataio.curl(url, req_headers=self.session, silent=True)
     self.info[setname] = txt.split('\n')[1][2:]