def full_io(url, path, remove=True):
    path = UnsyncFetch.fetch_file(semaphore, 'get', dict(url=url), path,
                                  1).result()
    with gzip_open(path, 'rt') as handle:
        mmcif_dict = MMCIF2DictPlus(handle, ('_pdbe_chain_remapping.', ))
    if remove:
        path.unlink()
    return mmcif_dict
Example #2
0
 def single_retrieve(cls,
                     pdb,
                     suffix: str,
                     folder: Path,
                     semaphore,
                     rate: float = 1.5):
     return UnsyncFetch.single_task(task=cls.task_unit(pdb, suffix, folder),
                                    semaphore=semaphore,
                                    rate=rate)
Example #3
0
 def retrieve_metadata(cls, organism: str, dataset: str = 'complete', suffix: str = 'user_data/{organism}/download/{dataset}/', ret_res:bool=True):
     assert organism in cls.organisms, f"Invalid organism!\nValid set:{cls.organisms}"
     res = UnsyncFetch.single_task(cls.task_unit(cls.folder,
         f'{suffix.format(organism=organism, dataset=dataset)}interactions.dat', '.tsv', f'{organism}_{dataset}_interactions'), 
         cls.get_web_semaphore())
     if ret_res:
         return res.result()
     else:
         return res
Example #4
0
 def single_retrieve(self,
                     pdb_id: str,
                     suffix: str,
                     params: Dict,
                     folder: Union[Path, str],
                     semaphore,
                     rate: float = 1.5):
     return UnsyncFetch.single_task(task=self.task_unit(
         pdb_id, suffix, params, folder),
                                    semaphore=semaphore,
                                    rate=rate)
Example #5
0
 def single_retrieve(cls,
                     suffix: str,
                     params: Dict,
                     folder: Union[Path, str],
                     semaphore,
                     rate: float = 1.5):
     assert suffix in cls.api_set, f"Invalid suffix! Valid set is \n{cls.api_set}"
     folder = Path(folder)
     return UnsyncFetch.single_task(task=cls.task_unit(
         suffix, params, folder),
                                    semaphore=semaphore,
                                    rate=rate)
Example #6
0
 def single_retrieve(cls,
                     pdb,
                     suffix: str,
                     folder: Path,
                     semaphore,
                     file_suffix: Optional[str] = None,
                     rate: float = 1.5):
     if file_suffix is None:
         file_suffix = cls.get_file_suffix(suffix)
     return UnsyncFetch.single_task(task=cls.task_unit(
         pdb, suffix, file_suffix, folder),
                                    semaphore=semaphore,
                                    rate=rate)
Example #7
0
 def single_retrieve(cls,
                     pdb: str,
                     suffix: str,
                     method: str,
                     folder: Union[Path, str],
                     semaphore,
                     rate: float = 1.5,
                     **kwargs):
     return UnsyncFetch.single_task(
         task=next(cls.yieldTasks((pdb, ), suffix, method, folder)),
         semaphore=semaphore,
         to_do_func=kwargs.get('to_do_func', cls.process),
         rate=rate)
Example #8
0
 def single_retrieve(cls,
                     suffix: str,
                     identifier: str,
                     params: Optional[Dict],
                     folder: Union[Path, str],
                     semaphore,
                     rate: float = 1.5,
                     headers: Optional[Dict] = None):
     assert suffix in cls.api_set, f"Invalid suffix! Valid set is \n{cls.api_set}"
     folder = Path(folder)
     return UnsyncFetch.single_task(task=cls.task_unit(
         suffix, identifier, params, folder, headers),
                                    semaphore=semaphore,
                                    rate=rate)
Example #9
0
 def retrieve(cls,
              pdbs,
              suffix: str,
              folder: Path,
              file_suffix: Optional[str] = None,
              concur_req: int = 20,
              rate: float = 1.5,
              ret_res: bool = True,
              **kwargs):
     res = UnsyncFetch.multi_tasks(cls.yieldTasks(pdbs, suffix, file_suffix,
                                                  folder),
                                   concur_req=concur_req,
                                   rate=rate,
                                   ret_res=ret_res,
                                   semaphore=kwargs.get('semaphore', None))
     return res
Example #10
0
 def retrieve(cls,
              suffix: str,
              params_collection: Iterable[Dict],
              folder: Union[Path, str],
              concur_req: int = 20,
              rate: float = 1.5,
              ret_res: bool = True,
              **kwargs):
     assert suffix in cls.api_set, f"Invalid suffix! Valid set is \n{cls.api_set}"
     folder = Path(folder)
     res = UnsyncFetch.multi_tasks(cls.yieldTasks(suffix, params_collection,
                                                  folder),
                                   concur_req=concur_req,
                                   rate=rate,
                                   ret_res=ret_res,
                                   semaphore=kwargs.get('semaphore', None))
     return res
Example #11
0
 def retrieve(cls,
              unps,
              folder: Optional[Union[Path, str]] = None,
              params: Dict = dict(provider='swissmodel'),
              concur_req: int = 20,
              rate: float = 1.5,
              file_format: str = 'json',
              ret_res: bool = True,
              **kwargs):
     assert file_format in ('json', 'pdb'), "Invalid file format"
     res = UnsyncFetch.multi_tasks(
         cls.yieldTasks(unps, params, file_format,
                        cls.folder if folder is None else folder),
         cls.process,
         concur_req=concur_req,
         rate=rate,
         ret_res=ret_res,
         semaphore=kwargs.get('semaphore', cls.web_semaphore))
     return res
Example #12
0
 def single_retrieve(cls,
                     unp: str,
                     folder: Optional[Union[Path, str]] = None,
                     semaphore=None,
                     params: Dict = dict(provider='swissmodel'),
                     rate: float = 1.5,
                     file_format: str = 'json'):
     assert file_format in ('json', 'pdb'), "Invalid file format"
     task = cls.task_unit(unp, params, file_format,
                          cls.folder if folder is None else folder)
     if file_format == 'json':
         candidate = Path(str(task[2]).replace('json', 'tsv'))
         if candidate.exists():
             return unsync_wrap(candidate)
     return UnsyncFetch.single_task(
         task=task,
         semaphore=cls.web_semaphore if semaphore is None else semaphore,
         to_do_func=cls.process,
         rate=rate)
Example #13
0
 def query_sequence(cls,
                    params: Dict,
                    data: Dict,
                    folder: Union[Path, str],
                    fileName: str,
                    semaphore,
                    rate: float = 1.5):
     '''
     Implement `uniparc/sequence`
     '''
     folder = Path(folder)
     args = dict(url=f'{BASE_URL}uniparc/sequence',
                 headers=cls.headers,
                 params=params,
                 data=data)
     return UnsyncFetch.single_task(
         task=('post', args,
               folder / f'{fileName}.{cls.get_file_suffix()}'),
         semaphore=semaphore,
         rate=rate)
Example #14
0
 def single_retrieve(cls,
                     pdb: str,
                     suffix: str,
                     method: str,
                     folder: Union[Path, str],
                     semaphore,
                     params=None,
                     data_collection=None,
                     rate: float = 1.5,
                     filename='subset'):
     if params is None or len(params) == 0:
         params = {'model_nums': 1, 'encoding': 'cif'}
     return UnsyncFetch.single_task(task=cls.task_unit(pdb,
                                                       suffix,
                                                       method,
                                                       folder,
                                                       data_collection,
                                                       params,
                                                       filename=filename),
                                    semaphore=semaphore,
                                    rate=rate)
Example #15
0
 def retrieve(cls,
              pdbs: Union[Iterable, Iterator],
              suffix: str,
              method: str,
              folder: Union[str, Path],
              chunksize: int = 20,
              concur_req: int = 20,
              rate: float = 1.5,
              task_id: int = 0,
              ret_res: bool = True,
              **kwargs):
     # t0 = time.perf_counter()
     res = UnsyncFetch.multi_tasks(cls.yieldTasks(pdbs, suffix, method,
                                                  folder, chunksize,
                                                  task_id),
                                   cls.process,
                                   concur_req=concur_req,
                                   rate=rate,
                                   ret_res=ret_res,
                                   semaphore=kwargs.get('semaphore', None))
     # elapsed = time.perf_counter() - t0
     # cls.logger.info('{} ids downloaded in {:.2f}s'.format(len(res), elapsed))
     return res
Example #16
0
 def retrieve_all_meta(cls, dataset: str = 'complete', suffix: str = 'user_data/{organism}/download/{dataset}/'):
     tasks = [cls.task_unit(cls.folder, f'{suffix.format(organism=organism, dataset=dataset)}interactions.dat', '.tsv', f'{organism}_{dataset}_interactions') for organism in cls.organisms]
     return UnsyncFetch.multi_tasks(tasks, semaphore=cls.get_web_semaphore())
Example #17
0
 def graphql_retrieve(cls, query, folder, semaphore, to_do_func=None, rate: float = 1.5):
     return UnsyncFetch.single_task(task=('get', dict(url=cls.graphql_root, params=dict(query=query), headers=cls.headers), Path(folder)/f'{sha1(bytes(query, encoding="utf-8")).hexdigest()}.json'), semaphore=semaphore, to_do_func=to_do_func, rate=rate)
Example #18
0
 def single_retrieve(cls, identifier: str, suffix: str, folder: Union[Path, str], semaphore, to_do_func=None, rate: float = 1.5):
     return UnsyncFetch.single_task(task=cls.task_unit(identifier, suffix, folder), semaphore=semaphore, to_do_func=to_do_func, rate=rate)