Example #1
0
def download_kegg_info_files(kegg_set_ids, species_ini_file):
    """
    This is a KEGG-specific function that downloads the files containing
    information about the KEGG sets, such as their title, abstract, supporting
    publications, etc.

    Arguments:
    kegg_set_ids -- List of kegg set identifiers (e.g. hsa00010) for which
    info files will be downloaded.

    species_ini_file -- Path to the species INI config file. This
    is a string.

    Returns:
    Nothing, just downloads and saves files to keggset_info folder, which will
    be the SPECIES_DOWNLOAD_FOLDER + 'KEGG/keggset_info_folder'

    """
    species_file = SafeConfigParser()
    species_file.read(species_ini_file)

    sd_folder = species_file.get('species_info', 'SPECIES_DOWNLOAD_FOLDER')

    keggset_info_folder = os.path.join(sd_folder, KEGGSET_INFO_FOLDER)
    check_create_folder(keggset_info_folder)

    full_info_url = species_file.get('KEGG', 'KEGG_ROOT_URL') + \
        species_file.get('KEGG', 'SET_INFO_DIR')

    for kegg_id in kegg_set_ids:
        kegg_info_file = full_info_url + kegg_id
        download_from_url(kegg_info_file, keggset_info_folder)
Example #2
0
    def __init__(self, verbose=False, download_dir=None):
        self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR
        self.google = settings.GOOGLE_STORAGE
        self.s3 = settings.S3_LANDSAT

        # Make sure download directory exist
        check_create_folder(self.download_dir)
Example #3
0
    def __init__(self, player, *args, **kwargs):
        super(PodcastPage, self).__init__(*args, **kwargs)

        self.player = player

        self.podcast_entry = Gtk.Entry()
        self.podcast_entry.set_placeholder_text('Insert a new podcast feed.')

        add_button = Gtk.Button.new_from_stock('gtk-add')
        add_button.connect('clicked', self.add_podcast)

        update_button = Gtk.Button.new_from_stock('gtk-refresh')
        update_button.connect('clicked', self.on_update_all)

        episodes = Gtk.Label('Episodes')
        all_button = Gtk.Button('All')
        all_button.connect('clicked', self.all_button_pressed)

        self.podcast_box = Gtk.ListBox()
        self.podcast_box.connect('row-activated', self.on_podcast_selected)

        podcast_sw = create_scrolled_window(self.podcast_box)

        self.episode_box = Gtk.ListBox()
        self.episode_box.connect('row-activated', self.on_episode_selected)

        self.episode_scroll = create_scrolled_window(self.episode_box)
        self.episode_scroll.connect('edge-overshot', self.on_scroll_overshot)

        self.paned = Gtk.Paned(orientation=Gtk.Orientation.VERTICAL)
        self.paned.set_position(300)
        self.paned.add1(self.episode_scroll)

        pod_hbox = Gtk.HBox()
        pod_hbox.pack_start(self.podcast_entry, True, True, 0)
        pod_hbox.pack_start(add_button        , False, True, 0)
        pod_hbox.pack_start(update_button     , False, True, 0)

        self.attach(pod_hbox  , 0, 0, 1, 1)
        self.attach(podcast_sw, 0, 1, 1, 1)

        self.attach(episodes  , 2, 0, 2, 1)
        self.attach(all_button, 4, 0, 1, 1)
        self.attach(self.paned, 1, 1, 4, 1)

        self.set_column_spacing(3)

        check_create_folder('./temp')
        self.database = PodcastDB()

        self.on_load()

        self.pod_row_selected = None
        self.ep_selected_link = None
        self.episode_info = None
Example #4
0
    def _unzip(self, src, dst, scene):
        """ Unzip tar files """
        self.output("Unzipping %s - It might take some time" % scene, normal=True, arrow=True)

        try:
            tar = tarfile.open(src, 'r')
            tar.extractall(path=dst)
            tar.close()
        except tarfile.ReadError:
            check_create_folder(dst)
            subprocess.check_call(['tar', '-xf', src, '-C', dst])
Example #5
0
    def _unzip(self, src, dst, scene):
        """ Unzip tar files """
        self.output("Unzipping %s - It might take some time" % scene, normal=True, arrow=True)

        try:
            tar = tarfile.open(src, 'r')
            tar.extractall(path=dst)
            tar.close()
        except tarfile.ReadError:
            check_create_folder(dst)
            subprocess.check_call(['tar', '-xf', src, '-C', dst])
Example #6
0
    def download(self, scenes, bands=None):
        """
        Download scenese from Google Storage or Amazon S3 if bands are provided

        @params
        scenes - A list of sceneIDs
        bands - A list of bands
        """

        if isinstance(scenes, list):
            for scene in scenes:
                # If bands are provided the image is from 2015 or later use Amazon
                if (bands and int(scene[12]) > 4):
                    if isinstance(bands, list):
                        # Create a folder to download the specific bands into
                        path = check_create_folder(
                            join(self.download_dir, scene))
                        try:
                            # Always grab MTL.txt if bands are specified
                            bands_plus = bands
                            bands_plus.append('MTL')
                            for band in bands_plus:
                                self.amazon_s3(scene, band, path)
                        except RemoteFileDoesntExist:
                            self.google_storage(scene, self.download_dir)
                    else:
                        raise Exception('Expected bands list')
                else:
                    self.google_storage(scene, self.download_dir)

            return True

        else:
            raise Exception('Expected sceneIDs list')
Example #7
0
    def _unzip(self, src, dst, scene, force_unzip=False):
        """ Unzip tar files """
        self.output("Unzipping %s - It might take some time" % scene, normal=True, arrow=True)

        try:
            # check if file is already unzipped, skip
            if isdir(dst) and not force_unzip:
                self.output("%s is already unzipped." % scene, normal=True, arrow=True)
                return
            else:
                tar = tarfile.open(src, 'r')
                tar.extractall(path=dst)
                tar.close()
        except tarfile.ReadError:
            check_create_folder(dst)
            subprocess.check_call(['tar', '-xf', src, '-C', dst])
Example #8
0
    def __init__(self, path, bands=None, dst_path=None, verbose=False):
        """
        @params
        scene - the scene ID
        bands - The band sequence for the final image. Must be a python list
        src_path - The path to the source image bundle
        dst_path - The destination path
        zipped - Set to true if the scene is in zip format and requires unzipping
        verbose - Whether to sh ow verbose output
        """

        self.projection = {'init': 'epsg:3857'}
        self.dst_crs = {'init': u'epsg:3857'}
        self.scene = get_file(path).split('.')[0]
        self.bands = bands if isinstance(bands, list) else [4, 3, 2]

        # Landsat source path
        self.src_path = path.replace(get_file(path), '')

        # Build destination folder if doesn't exits
        self.dst_path = dst_path if dst_path else settings.PROCESSED_IMAGE
        self.dst_path = check_create_folder(join(self.dst_path, self.scene))
        self.verbose = verbose

        # Path to the unzipped folder
        self.scene_path = join(self.src_path, self.scene)

        if self._check_if_zipped(path):
            self._unzip(join(self.src_path, get_file(path)), join(self.src_path, self.scene), self.scene)

        self.bands_path = []
        for band in self.bands:
            self.bands_path.append(join(self.scene_path, self._get_full_filename(band)))
Example #9
0
    def _unzip(self, src, dst, scene, force_unzip=False):
        """ Unzip tar files """
        self.output("Unzipping %s - It might take some time" % scene, normal=True, arrow=True)

        try:
            # check if file is already unzipped, skip
            if isdir(dst) and not force_unzip:
                self.output("%s is already unzipped." % scene, normal=True, arrow=True)
                return
            else:
                tar = tarfile.open(src, "r")
                tar.extractall(path=dst)
                tar.close()
        except tarfile.ReadError:
            check_create_folder(dst)
            subprocess.check_call(["tar", "-xf", src, "-C", dst])
Example #10
0
    def download(self, scenes, bands=None):
        """
        Download scenese from Google Storage or Amazon S3 if bands are provided

        @params
        scenes - A list of sceneIDs
        bands - A list of bands
        """

        if isinstance(scenes, list):
            for scene in scenes:
                if bands:
                    if isinstance(bands, list):
                        # Create a folder to download the specific bands into
                        path = check_create_folder(
                            join(self.download_dir, scene))
                        for band in bands:
                            self.amazon_s3(scene, band, path)
                    else:
                        raise Exception('Expected bands list')
                else:
                    self.google_storage(scene, self.download_dir)

            return True

        else:
            raise Exception('Expected sceneIDs list')
Example #11
0
    def _unzip(self, src, dst, scene, force_unzip=False):
        """ Unzip tar files """
        self.output("Unzipping %s - It might take some time" % scene, normal=True, arrow=True)

        try:
            # check if file is already unzipped, skip
            if isdir(dst) and not force_unzip:
                self.output('%s is already unzipped.' % scene, normal=True, color='green', indent=1)
                return
            else:
                tar = tarfile.open(src, 'r')
                tar.extractall(path=dst)
                tar.close()
        except tarfile.ReadError:
            check_create_folder(dst)
            subprocess.check_call(['tar', '-xf', src, '-C', dst])
Example #12
0
    def amazon_s3(self, scene, bands):
        """
        Amazon S3 downloader
        """

        sat = self.scene_interpreter(scene)

        # Always grab MTL.txt and QA band if bands are specified
        if 'BQA' not in bands:
            bands.append('QA')

        if 'MTL' not in bands:
            bands.append('MTL')

        urls = []

        for band in bands:
            # get url for the band
            url = self.amazon_s3_url(sat, band)

            # make sure it exist
            self.remote_file_exists(url)
            urls.append(url)

        # create folder
        path = check_create_folder(join(self.download_dir, scene))

        self.output('Source: AWS S3', normal=True, arrow=True)
        for url in urls:
            self.fetch(url, path)

        return path
Example #13
0
    def __init__(self, path, bands=None, dst_path=None, verbose=False):
        """
        @params
        scene - the scene ID
        bands - The band sequence for the final image. Must be a python list
        src_path - The path to the source image bundle
        dst_path - The destination path
        zipped - Set to true if the scene is in zip format and requires unzipping
        verbose - Whether to sh ow verbose output
        """

        self.projection = {'init': 'epsg:3857'}
        self.dst_crs = {'init': u'epsg:3857'}
        self.scene = get_file(path).split('.')[0]
        self.bands = bands if isinstance(bands, list) else [4, 3, 2]

        # Landsat source path
        self.src_path = path.replace(get_file(path), '')

        # Build destination folder if doesn't exits
        self.dst_path = dst_path if dst_path else settings.PROCESSED_IMAGE
        self.dst_path = check_create_folder(join(self.dst_path, self.scene))
        self.verbose = verbose

        # Path to the unzipped folder
        self.scene_path = join(self.src_path, self.scene)

        if self._check_if_zipped(path):
            self._unzip(join(self.src_path, get_file(path)), join(self.src_path, self.scene), self.scene)

        self.bands_path = []
        for band in self.bands:
            self.bands_path.append(join(self.scene_path, self._get_full_filename(band)))
Example #14
0
    def __init__(self, path, bands=None, dst_path=None, verbose=False, force_unzip=False, bounds=None):

        self.projection = {'init': 'epsg:3857'}
        self.dst_crs = {'init': u'epsg:3857'}
        self.scene = get_file(path).split('.')[0]
        self.bands = bands if isinstance(bands, list) else [4, 3, 2]
        self.clipped = False

        # Landsat source path
        self.src_path = path.replace(get_file(path), '')

        # Build destination folder if doesn't exist
        self.dst_path = dst_path if dst_path else os.getcwd()
        self.dst_path = check_create_folder(join(self.dst_path, self.scene))
        self.verbose = verbose

        # Path to the unzipped folder
        self.scene_path = join(self.src_path, self.scene)

        # Unzip files
        if self._check_if_zipped(path):
            self._unzip(join(self.src_path, get_file(path)), join(self.src_path, self.scene), self.scene, force_unzip)

        if (bounds):
            self.bounds = bounds
            self.scene_path = self.clip()
            self.clipped = True

        self.bands_path = []
        for band in self.bands:
            self.bands_path.append(join(self.scene_path, self._get_full_filename(band)))
Example #15
0
    def clip(self):
        """ Clip images based on bounds provided
        Implementation is borrowed from
        https://github.com/brendan-ward/rasterio/blob/e3687ce0ccf8ad92844c16d913a6482d5142cf48/rasterio/rio/convert.py
        """

        self.output("Clipping", normal=True)

        # create new folder for clipped images
        path = check_create_folder(join(self.scene_path, 'clipped'))

        try:
            temp_bands = copy(self.bands)
            temp_bands.append('QA')
            for i, band in enumerate(temp_bands):
                band_name = self._get_full_filename(band)
                band_path = join(self.scene_path, band_name)

                self.output("Band %s" % band,
                            normal=True,
                            color='green',
                            indent=1)
                with rasterio.open(band_path) as src:
                    bounds = transform_bounds(
                        {
                            'proj': 'longlat',
                            'ellps': 'WGS84',
                            'datum': 'WGS84',
                            'no_defs': True
                        }, src.crs, *self.bounds)

                    if disjoint_bounds(bounds, src.bounds):
                        bounds = adjust_bounding_box(src.bounds, bounds)

                    window = src.window(*bounds)

                    out_kwargs = src.meta.copy()
                    out_kwargs.update({
                        'driver': 'GTiff',
                        'height': window[0][1] - window[0][0],
                        'width': window[1][1] - window[1][0],
                        'transform': src.window_transform(window)
                    })

                    with rasterio.open(join(path, band_name), 'w',
                                       **out_kwargs) as out:
                        out.write(src.read(window=window))

            # Copy MTL to the clipped folder
            copyfile(join(self.scene_path, self.scene + '_MTL.txt'),
                     join(path, self.scene + '_MTL.txt'))

            return path

        except IOError as e:
            exit(e.message, 1)
Example #16
0
    def clip(self):
        """ Clip images based on bounds provided
        Implementation is borrowed from
        https://github.com/brendan-ward/rasterio/blob/e3687ce0ccf8ad92844c16d913a6482d5142cf48/rasterio/rio/convert.py
        """

        self.output("Clipping", normal=True)

        # create new folder for clipped images
        path = check_create_folder(join(self.scene_path, 'clipped'))

        try:
            temp_bands = copy(self.bands)
            temp_bands.append('QA')
            for i, band in enumerate(temp_bands):
                band_name = self._get_full_filename(band)
                band_path = join(self.scene_path, band_name)

                self.output("Band %s" % band, normal=True, color='green', indent=1)
                with rasterio.open(band_path) as src:
                    bounds = transform_bounds(
                        {
                            'proj': 'longlat',
                            'ellps': 'WGS84',
                            'datum': 'WGS84',
                            'no_defs': True
                        },
                        src.crs,
                        *self.bounds
                    )

                    if disjoint_bounds(bounds, src.bounds):
                        bounds = adjust_bounding_box(src.bounds, bounds)

                    window = src.window(*bounds)

                    out_kwargs = src.meta.copy()
                    out_kwargs.update({
                        'driver': 'GTiff',
                        'height': window[0][1] - window[0][0],
                        'width': window[1][1] - window[1][0],
                        'transform': src.window_transform(window)
                    })

                    with rasterio.open(join(path, band_name), 'w', **out_kwargs) as out:
                        out.write(src.read(window=window))

            # Copy MTL to the clipped folder
            copyfile(join(self.scene_path, self.scene + '_MTL.txt'), join(path, self.scene + '_MTL.txt'))

            return path

        except IOError as e:
            exit(e.message, 1)
Example #17
0
    def download(self, scenes, bands=None):
        """
        Download scenese from Google Storage or Amazon S3 if bands are provided

        :param scenes:
            A list of scene IDs
        :type scenes:
            List
        :param bands:
            A list of bands. Default value is None.
        :type scenes:
            List

        :returns:
            (List) includes downloaded scenes as key and source as value (aws or google)
        """

        if isinstance(scenes, list):
            output = {}

            for scene in scenes:
                # If bands are provided the image is from 2015 or later use Amazon
                self.scene_interpreter(scene)

                if (bands and int(scene[12]) > 4):
                    if isinstance(bands, list):
                        # Create a folder to download the specific bands into
                        path = check_create_folder(join(self.download_dir, scene))
                        try:
                            # Always grab MTL.txt if bands are specified
                            if 'BQA' not in bands:
                                bands.append('QA')

                            if 'MTL' not in bands:
                                bands.append('MTL')

                            for band in bands:
                                self.amazon_s3(scene, band, path)
                                output[scene] = 'aws'
                        except RemoteFileDoesntExist:
                            self.google_storage(scene, self.download_dir)
                            output[scene] = 'google'

                    else:
                        raise Exception('Expected bands list')
                else:
                    self.google_storage(scene, self.download_dir)
                    output[scene] = 'google'

            return output

        else:
            raise Exception('Expected sceneIDs list')
    def download(self, scenes, bands=None):
        """
        Download scenese from Google Storage or Amazon S3 if bands are provided

        :param scenes:
            A list of scene IDs
        :type scenes:
            List
        :param bands:
            A list of bands. Default value is None.
        :type scenes:
            List

        :returns:
            (List) includes downloaded scenes as key and source as value (aws or google)
        """

        if isinstance(scenes, list):
            output = {}

            for scene in scenes:
                # If bands are provided the image is from 2015 or later use Amazon
                self.scene_interpreter(scene)

                if (bands and int(scene[12]) > 4):
                    if isinstance(bands, list):
                        # Create a folder to download the specific bands into
                        path = check_create_folder(join(self.download_dir, scene))
                        try:
                            # Always grab MTL.txt if bands are specified
                            if 'BQA' not in bands:
                                bands.append('QA')

                            if 'MTL' not in bands:
                                bands.append('MTL')

                            for band in bands:
                                self.amazon_s3(scene, band, path)
                                output[scene] = 'aws'
                        except RemoteFileDoesntExist:
                            self.google_storage(scene, self.download_dir)
                            output[scene] = 'google'

                    else:
                        raise Exception('Expected bands list')
                else:
                    self.google_storage(scene, self.download_dir)
                    output[scene] = 'google'

            return output

        else:
            raise Exception('Expected sceneIDs list')
Example #19
0
    def __init__(self,
                 path,
                 bands=None,
                 dst_path=None,
                 verbose=False,
                 force_unzip=False,
                 bounds=None):

        self.projection = {'init': 'epsg:3857'}
        self.dst_crs = {'init': u'epsg:3857'}
        self.scene = get_file(path).split('.')[0]
        self.bands = bands if isinstance(bands, list) else [4, 3, 2]
        self.clipped = False

        # Landsat source path
        self.src_path = path.replace(get_file(path), '')

        # Build destination folder if doesn't exist
        self.dst_path = dst_path if dst_path else os.getcwd()
        self.dst_path = check_create_folder(join(self.dst_path, self.scene))
        self.verbose = verbose

        # Path to the unzipped folder
        self.scene_path = join(self.src_path, self.scene)

        # Unzip files
        if self._check_if_zipped(path):
            self._unzip(join(self.src_path, get_file(path)),
                        join(self.src_path, self.scene), self.scene,
                        force_unzip)

        if (bounds):
            self.bounds = bounds
            self.scene_path = self.clip()
            self.clipped = True

        self.bands_path = []
        for band in self.bands:
            self.bands_path.append(
                join(self.scene_path, self._get_full_filename(band)))
Example #20
0
    def __init__(self, path, bands=None, dst_path=None, verbose=False, force_unzip=False):

        self.projection = {"init": "epsg:3857"}
        self.dst_crs = {"init": u"epsg:3857"}
        self.scene = get_file(path).split(".")[0]
        self.bands = bands if isinstance(bands, list) else [4, 3, 2]

        # Landsat source path
        self.src_path = path.replace(get_file(path), "")

        # Build destination folder if doesn't exits
        self.dst_path = dst_path if dst_path else settings.PROCESSED_IMAGE
        self.dst_path = check_create_folder(join(self.dst_path, self.scene))
        self.verbose = verbose

        # Path to the unzipped folder
        self.scene_path = join(self.src_path, self.scene)

        if self._check_if_zipped(path):
            self._unzip(join(self.src_path, get_file(path)), join(self.src_path, self.scene), self.scene, force_unzip)

        self.bands_path = []
        for band in self.bands:
            self.bands_path.append(join(self.scene_path, self._get_full_filename(band)))
Example #21
0
def download_all_files(species_ini_file,
                       base_download_folder,
                       secrets_location=None):
    """
    Reads config INI file for a species, which contains the files (and
    their locations, or URLs) that must be loaded for this species, and calls
    the download_from_url function for each of those files.

    Arguments:
    species_ini_file -- Path to the particular species INI file. This
    is a string.

    base_download_folder -- A string. Path of the root folder where download
    folders for other species will be created and where common downloaded files
    will be saved. This is stored in the main configuration INI file.

    secrets_location -- Optional string of location of the secrets INI
    file.

    Returns:
    Nothing, just downloads and saves files to download_folder

    """
    check_create_folder(base_download_folder)

    species_file = SafeConfigParser()
    species_file.read(species_ini_file)

    sd_folder = species_file.get('species_info', 'SPECIES_DOWNLOAD_FOLDER')
    check_create_folder(sd_folder)

    if species_file.has_section('GO'):
        if species_file.getboolean('GO', 'DOWNLOAD'):

            obo_url = species_file.get('GO', 'GO_OBO_URL')
            download_from_url(obo_url, base_download_folder)

            go_dir = os.path.join(sd_folder, 'GO')
            check_create_folder(go_dir)

            goa_urls = species_file.get('GO', 'ASSOC_FILE_URLS')
            goa_urls = re.sub(r'\s', '', goa_urls).split(',')

            for goa_url in goa_urls:
                download_from_url(goa_url, go_dir)

    if species_file.has_section('KEGG'):
        if species_file.getboolean('KEGG', 'DOWNLOAD'):

            kegg_root_url = species_file.get('KEGG', 'KEGG_ROOT_URL')

            kegg_info_url = kegg_root_url + species_file.get(
                'KEGG', 'DB_INFO_URL')

            download_from_url(kegg_info_url, base_download_folder,
                              'kegg_db_info')

            kegg_dir = os.path.join(sd_folder, 'KEGG')
            check_create_folder(kegg_dir)

            ks_urls = species_file.get('KEGG', 'SETS_TO_DOWNLOAD')
            kegg_urls = [
                kegg_root_url + url.strip() for url in ks_urls.split(',')
            ]

            for kegg_url in kegg_urls:
                download_from_url(kegg_url, kegg_dir)

    if species_file.has_section('DO'):
        if species_file.getboolean('DO', 'DOWNLOAD'):
            do_dir = os.path.join(sd_folder, 'DO')
            check_create_folder(do_dir)

            obo_url = species_file.get('DO', 'DO_OBO_URL')
            download_from_url(obo_url, do_dir)

            mim2gene_url = species_file.get('DO', 'MIM2GENE_URL')
            download_from_url(mim2gene_url, do_dir)

            # The genemap_file needs a special Secret Key, which must be
            # retrieved from the secrets file if the user wishes to download
            # the genemap_file
            genemap_url = species_file.get('DO', 'GENEMAP_URL')

            if not secrets_location:
                logger.error('Secrets file was not passed to '
                             'download_all_files() function. A secrets file '
                             'containing an OMIM API secret key is required to'
                             ' download the genemap file to process Disease '
                             'Ontology.')
                sys.exit(1)

            secrets_file = SafeConfigParser()
            secrets_file.read(secrets_location)

            if not secrets_file.has_section('OMIM API secrets'):
                logger.error('Secrets file has no "OMIM API secrets" section,'
                             'which is required to download the genemap file '
                             ' to process Disease Ontology.')
                sys.exit(1)

            omim_secret_key = secrets_file.get('OMIM API secrets',
                                               'SECRET_KEY')
            genemap_url = genemap_url.replace('<SecretKey>', omim_secret_key)

            download_from_url(genemap_url, do_dir)