def save_snps(self): """ Save SNPs to file. Returns ------- bool true if SNPs saved to file in output directory """ if self._snps is not None: try: if lineage.create_dir(self._output_dir): output_dir = self._output_dir else: return False file = os.path.join(output_dir, self.get_var_name() + '.csv') print('Saving ' + os.path.relpath(file)) self._snps.to_csv( file, na_rep='--', header=['chromosome', 'position', 'genotype']) except Exception as err: print(err) return False else: print('no SNPs to save...') return False return True
def _get_path_hapmap_h36(self): """ Get local path to HapMap for hg18 / NCBI36, downloading if necessary. Returns ------- str path to hapmap_h36.tar.gz References ---------- ..[1] "The International HapMap Consortium (2007). A second generation human haplotype map of over 3.1 million SNPs. Nature 449: 851-861." """ if not lineage.create_dir(self._resources_dir): return None hapmap = 'hapmap_h36' destination = os.path.join(self._resources_dir, hapmap + '.tar.gz') if not os.path.exists(destination): try: # make FTP connection to NCBI with ftplib.FTP('ftp.ncbi.nlm.nih.gov') as ftp: ftp.login() ftp.cwd('hapmap/recombination/2008-03_rel22_B36/rates') # download each HapMap file and add to compressed tar with tarfile.open(destination, 'w:gz') as out_tar: for filename in ftp.nlst(): if '.txt' in filename: path = os.path.join(destination, hapmap, filename) self._print_download_msg(path) # open temp file, download HapMap file, close temp file with tempfile.NamedTemporaryFile( delete=False) as fp: ftp.retrbinary('RETR ' + filename, fp.write) # add temp file to archive out_tar.add(fp.name, arcname=os.path.join( hapmap, filename)) # remove temp file os.remove(fp.name) ftp.quit() except Exception as err: print(err) return None return destination
def _download_file(self, url, filename, compress=False, timeout=30): """ Download a file to the resources folder. Download data from `url`, save as `filename`, and optionally compress with gzip. Parameters ---------- url : str URL to download data from filename : str name of file to save; if compress, ensure '.gz' is appended compress : bool compress with gzip timeout : int seconds for timeout of download request Returns ------- str path to downloaded file, None if error """ if not lineage.create_dir(self._resources_dir): return None if compress and filename[-3:] != '.gz': filename += '.gz' destination = os.path.join(self._resources_dir, filename) if not os.path.exists(destination): try: self._print_download_msg(destination) if compress: open_func = gzip.open else: open_func = open # get file if it hasn't already been downloaded # http://stackoverflow.com/a/7244263 with urllib.request.urlopen(url, timeout=timeout) as response, \ open_func(destination, 'wb') as f: data = response.read() # a `bytes` object f.write(data) except Exception as err: print(err) return None return destination
def save_snps(self): """ Save SNPs to file. """ if self._snps is not None: try: if lineage.create_dir(self._output_dir): output_dir = self._output_dir else: output_dir = '' file = os.path.join(output_dir, self.get_var_name() + '.csv') print('Saving ' + os.path.relpath(file)) self._snps.to_csv( file, na_rep='--', header=['chromosome', 'position', 'genotype']) except Exception as err: print(err) else: print('no SNPs to save...')