Python TiffExtractor Examples

Programming Language: Python

Namespace/Package Name: backend.data_preparation.extractor.soil_mois_extractor

Class/Type: TiffExtractor

Examples at hotexamples.com: 6

Python TiffExtractor - 6 examples found. These are the top rated real world Python examples of backend.data_preparation.extractor.soil_mois_extractor.TiffExtractor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

extract(4)

TiffExtractor(3)

Frequently Used Methods

extract (4)

TiffExtractor (3)

Example #1

Show file

class DataFromUSGS(Runnable):
    def __init__(self):
        self.crawler = USGSCrawler()
        self.extractor = TiffExtractor()
        self.dumper = PRISMDumper()
        self.buffer: List[bytes] = list()

    def run(self, end_clause: int = 210):
        """
        crawling routine
        :param end_clause: number of days we want to crawl, default=7
        :return: None
        """

        current_date = datetime.now(timezone.utc).date()
        end_date = current_date - timedelta(days=end_clause)

        # TODO: stop and continue
        with Connection() as conn:
            cur = conn.cursor()
            cur.execute('select date from usgs_info')
            exist_list = cur.fetchall()
            cur.close()

        date = current_date - timedelta(days=7)  # website update weekly
        diff_date = date - date_parser.parse('20190730').date()
        date = date - timedelta(days=diff_date.days % 7)

        while date >= end_date:
            logger.info(f'[fetch]{date}')
            # skip if exist
            if (date,) in exist_list:
                logger.info(f'skip: {date}')

            saved_zip_path = self.crawler.crawl(date)
            if saved_zip_path is None:
                logger.info(f'{date} not found, skipped')

            else:
                zf = zipfile.ZipFile(saved_zip_path)
                for file in zf.namelist():
                    if file.split('.')[-4] == 'VI_NDVI' and file.split('.')[-1] == 'tif':
                        zf.extract(file, os.path.split(saved_zip_path)[0])
                        tif_file_name = file
                zf.close()
                tif_path = os.path.join(os.path.split(saved_zip_path)[0], tif_file_name)
                if tif_path is not None:
                    unflattened = self.extractor.extract(tif_path)
                    if unflattened is not None:
                        self.dumper.insert(date, unflattened, 'usgs')

                    # clean up
                    os.remove(saved_zip_path)
                    os.remove(tif_path)

            # finish crawling a day
            date = date - timedelta(days=7)

Example #2

Show file

File: prism_dumper.py Project: totemprotocol/Wildfires

                cur.execute(PRISMDumper.INSERT_INFOS[var_type], (date, 1))
            conn.commit()
            cur.close()

    @staticmethod
    def record_generator(date: datetime.date, _data):
        for gid, val in enumerate(_data.tolist()):
            yield (date, gid, val)


if __name__ == '__main__':
    logger.setLevel(logging.INFO)
    logger.addHandler(logging.StreamHandler())

    crawler = USGSCrawler()
    extractor = TiffExtractor()
    dumper = PRISMDumper()
    target_time = "20190806"

    zip_file_path = crawler.crawl(datetime.datetime.strptime(target_time, '%Y%m%d'))
    zf = zipfile.ZipFile(zip_file_path)
    for file in zf.namelist():
        if file.split('.')[-4] == 'VI_NDVI' and file.split('.')[-1] == 'tif':
            zf.extract(file, os.path.split(zip_file_path)[0])
            tif_file_name = file
    zf.close()
    tif_path = os.path.join(os.path.split(zip_file_path)[0], tif_file_name)

    if tif_path is not None:
        data = extractor.extract(tif_path)
        dumper.insert(datetime.datetime.strptime(target_time, '%Y%m%d'), data, var_type='usgs')

Example #3

Show file

 def __init__(self):
     self.crawler = USGSCrawler()
     self.extractor = TiffExtractor()
     self.dumper = PRISMDumper()
     self.buffer: List[bytes] = list()

Example #4

Show file

File: data_from_NASAGrace_runnable.py Project: totemprotocol/Wildfires

 def __init__(self):
     self.crawler = SoilMoisCrawler()
     self.extractor = TiffExtractor()
     self.dumper = SoilMoisDumper()
     self.end_time = datetime.strptime('20160104', '%Y%m%d')

Example #5

Show file

File: data_from_NASAGrace_runnable.py Project: totemprotocol/Wildfires

class DataFromNASAGrace(Runnable):
    """
    This class is responsible for collecting data from NASAGrace
    """

    def __init__(self):
        self.crawler = SoilMoisCrawler()
        self.extractor = TiffExtractor()
        self.dumper = SoilMoisDumper()
        self.end_time = datetime.strptime('20160104', '%Y%m%d')

    def run(self, begin_time_str=datetime.today().strftime('%Y%m%d')) -> None:
        """
        The function that can be referenced in task manager
        Crawl, extract and dump data from NASAGrace
        :param begin_time_str: the earliest needed data's time
        :return: None
        """
        # get data from nasagrace
        begin_time = datetime.strptime(begin_time_str, '%Y%m%d')
        # make it a datetime object
        exists_set = self.crawler.get_exists()
        # crawl everyday's data from begin_time to end_time
        current_time = begin_time
        found_week_start = False
        while current_time > self.end_time:
            formatted_date_stamp = current_time.strftime('%Y%m%d')
            logger.info(f'start crawling for date {formatted_date_stamp}')

            try:
                if not found_week_start:
                    # to detect whether this is the last day with data
                    file_path = self.crawler.crawl(current_time) if (current_time,) not in exists_set else None
                    if file_path is not None:
                        self.extract_and_dump(file_path)
                        found_week_start = True
                    else:
                        current_time -= timedelta(days=1)
                else:
                    # start crawling every 7 days
                    current_time -= timedelta(days=7)
                    if (current_time,) not in exists_set:
                        file_path = self.crawler.crawl(current_time)
                        self.extract_and_dump(file_path)
                    else:
                        logger.info(f'{formatted_date_stamp} existed, skipped')
            finally:
                for tif_file in glob.glob(os.path.join(SOIL_MOIS_DATA_DIR, "*.tif")):
                    if 'res' not in tif_file and 'masked' not in tif_file:
                        os.remove(tif_file)
                    logger.info(f"file: {tif_file} removed")

        # if there are no files left, delete the directory
        for root, dirs, files in os.walk(SOIL_MOIS_DATA_DIR, topdown=False):
            if not files and not dirs:
                os.rmdir(root)
        logger.info(f'all data from {begin_time_str} to {self.end_time.strftime("%Y%m%d")}  processing finished')

    def extract_and_dump(self, file_path: str) -> None:
        """
        Using the file_path provided to extract the information needed and dump it into the database
        :param file_path: the data to be processed
        :return: None
        """
        data = self.extractor.extract(file_path)
        formatted_date_stamp = file_path.split('/')[-1].split('.')[0].split('/')[-1]
        logger.info(f'{formatted_date_stamp} extraction finished')
        self.dumper.insert(formatted_date_stamp, data)
        logger.info(f'{formatted_date_stamp} dumping finished')

Example #6

Show file

                val = float('NaN') if val in [-999, -9999] else val
                try:
                    cur.execute(
                        self.INSERT_SOIL_MOISTURE,
                        (gid, datetime.datetime.strptime(date_str,
                                                         "%Y%m%d"), val))
                    self.inserted_count += cur.rowcount
                    conn.commit()
                except Exception:
                    logger.error("error: " + traceback.format_exc())

            logger.info(
                f'{date_str} finished, total inserted {self.inserted_count}')
            cur.close()


if __name__ == '__main__':
    logger.setLevel(logging.INFO)
    logger.addHandler(logging.StreamHandler())

    crawler = SoilMoisCrawler()
    extractor = TiffExtractor()
    dumper = SoilMoisDumper()
    target_time = "20131230"

    crawled_file_path = crawler.crawl(
        datetime.datetime.strptime(target_time, SoilMoisDumper.TIME_FORMAT))
    if crawled_file_path is not None:
        data = extractor.extract(crawled_file_path)
        dumper.insert(target_time, data)