Esempio n. 1
0
    def save_snps(self):
        """ Save SNPs to file.

        Returns
        -------
        bool
            true if SNPs saved to file in output directory
        """
        if self._snps is not None:
            try:
                if lineage.create_dir(self._output_dir):
                    output_dir = self._output_dir
                else:
                    return False

                file = os.path.join(output_dir, self.get_var_name() + '.csv')
                print('Saving ' + os.path.relpath(file))
                self._snps.to_csv(
                    file,
                    na_rep='--',
                    header=['chromosome', 'position', 'genotype'])
            except Exception as err:
                print(err)
                return False
        else:
            print('no SNPs to save...')
            return False

        return True
Esempio n. 2
0
    def _get_path_hapmap_h36(self):
        """ Get local path to HapMap for hg18 / NCBI36, downloading if necessary.

        Returns
        -------
        str
            path to hapmap_h36.tar.gz

        References
        ----------
        ..[1] "The International HapMap Consortium (2007).  A second generation human haplotype
          map of over 3.1 million SNPs.  Nature 449: 851-861."

        """
        if not lineage.create_dir(self._resources_dir):
            return None

        hapmap = 'hapmap_h36'
        destination = os.path.join(self._resources_dir, hapmap + '.tar.gz')

        if not os.path.exists(destination):
            try:
                # make FTP connection to NCBI
                with ftplib.FTP('ftp.ncbi.nlm.nih.gov') as ftp:
                    ftp.login()
                    ftp.cwd('hapmap/recombination/2008-03_rel22_B36/rates')

                    # download each HapMap file and add to compressed tar
                    with tarfile.open(destination, 'w:gz') as out_tar:
                        for filename in ftp.nlst():
                            if '.txt' in filename:
                                path = os.path.join(destination, hapmap,
                                                    filename)
                                self._print_download_msg(path)

                                # open temp file, download HapMap file, close temp file
                                with tempfile.NamedTemporaryFile(
                                        delete=False) as fp:
                                    ftp.retrbinary('RETR ' + filename,
                                                   fp.write)

                                # add temp file to archive
                                out_tar.add(fp.name,
                                            arcname=os.path.join(
                                                hapmap, filename))

                                # remove temp file
                                os.remove(fp.name)
                    ftp.quit()
            except Exception as err:
                print(err)
                return None

        return destination
Esempio n. 3
0
    def _download_file(self, url, filename, compress=False, timeout=30):
        """ Download a file to the resources folder.

        Download data from `url`, save as `filename`, and optionally compress with gzip.

        Parameters
        ----------
        url : str
            URL to download data from
        filename : str
            name of file to save; if compress, ensure '.gz' is appended
        compress : bool
            compress with gzip
        timeout : int
            seconds for timeout of download request

        Returns
        -------
        str
            path to downloaded file, None if error
        """
        if not lineage.create_dir(self._resources_dir):
            return None

        if compress and filename[-3:] != '.gz':
            filename += '.gz'

        destination = os.path.join(self._resources_dir, filename)

        if not os.path.exists(destination):
            try:
                self._print_download_msg(destination)

                if compress:
                    open_func = gzip.open
                else:
                    open_func = open

                # get file if it hasn't already been downloaded
                # http://stackoverflow.com/a/7244263
                with urllib.request.urlopen(url, timeout=timeout) as response, \
                        open_func(destination, 'wb') as f:
                    data = response.read()  # a `bytes` object
                    f.write(data)
            except Exception as err:
                print(err)
                return None

        return destination
Esempio n. 4
0
    def save_snps(self):
        """ Save SNPs to file. """
        if self._snps is not None:
            try:
                if lineage.create_dir(self._output_dir):
                    output_dir = self._output_dir
                else:
                    output_dir = ''

                file = os.path.join(output_dir, self.get_var_name() + '.csv')
                print('Saving ' + os.path.relpath(file))
                self._snps.to_csv(
                    file,
                    na_rep='--',
                    header=['chromosome', 'position', 'genotype'])
            except Exception as err:
                print(err)
        else:
            print('no SNPs to save...')