def download_m2m(directory, username=None, products='STANDARD', dataset='ARD_TILE', N=50000, temporal=None, batch=1000, threads=1, maxcloudcover=80, fields=None): """ Search for and download Landsat Level-2 products to local directory Args: directory: Relative path to local directory (will be created) username: ERS Username (with full M2M download access) [Optional] dataset: EarthExplorer Catalog datasetName [Default: ARD_TILE] N: Maximum number of search results to return products: Comma-delmited list of download products [Default: STANDARD] temporal: Search Date image acquired [ Format: %Y-%m-%d or %Y-%m-%d,%Y-%m-%d ] batch: How many URLs to request before working on downloads threads: Number of download threads to launch in parallel max fields: JSON dataset-specific metadata fields (see #additionalCriteria) """ # username = '******' # dataset = 'ARD_TILE' # N = 50000 api_key = EarthExplorer.login(**credentials(username)) # temporal = "2000-01-01,2020-12-31" # fields = {"Grid Region": "CU", "Horizontal": 11, "Vertical": 9} # directory = u'/Users/coloury/Landsat_test' # threads = 4 # products = 'SR' log_path = '%s/download.log' % (directory) if os.path.exists(log_path): os.remove(log_path) logging.basicConfig(filename=log_path, filemode='w', format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) logger.info('Downloading starts') datasets = EarthExplorer.datasets(apiKey=api_key, datasetName=dataset, publicOnly=False) matches = [m['datasetName'] for m in datasets] if len(matches) > 1 and not any([m == dataset for m in matches]): message(['Multiple dataset matches found, please select only 1: '] + ['* [%s]: %s' % (m['datasetName'], m['datasetFullName']) for m in datasets], stop=True) search = dict(apiKey=api_key, datasetName=dataset, maxResults=N) if fields: search.update(EarthExplorer.additionalCriteriaValues(api_key, dataset, fields)) if temporal: search.update(EarthExplorer.temporalCriteria(temporal=temporal)) search['maxCloudCover'] = maxcloudcover results = EarthExplorer.search(**search) n_results = results['totalHits'] product_ids = results['results'] message('Total search results: %d \n' % n_results) logger.info('Total search results: %d' % n_results) if len(product_ids) < 1: logger.error('No valid products returned') return if not os.path.exists(directory): os.makedirs(directory) # current users only allowed sending 1 request download_pool = FixedThreadPoolExecutor(threads) for pids in product_ids: entities = pids['entityId'] download_pool.submit(download_executor, api_key, dataset, entities, products, directory, logger) download_pool.drain() download_pool.close()
class PClientV1(): def __init__(self, api_key): self.api_key = api_key self.max_clouds_initial = 0.25 self.max_clouds = 0.01 self.max_shadows = 0.01 self.max_bad_pixels = 0.25 self.max_nodata = 0.25 self.maximgs = 1 self.catalog_path = "catalog/" self.s3_catalog_bucket = "azavea-africa-test" self.s3_catalog_prefix = "planet/images" self.products = { 'analytic_sr': { 'item_type': 'PSScene4Band', 'asset_type': 'analytic_sr', 'ext': 'tif' }, 'analytic': { 'item_type': 'PSScene4Band', 'asset_type': 'analytic', 'ext': 'tif' }, 'analytic_xml': { 'item_type': 'PSScene4Band', 'asset_type': 'analytic_xml', 'ext': 'xml' }, 'visual': { 'item_type': 'PSScene3Band', 'asset_type': 'visual', 'ext': 'tif' } } self.client = api.ClientV1(api_key=api_key) self.output_filename = "output.csv" self.output_encoding = "utf-8" self.s3client = boto3.client('s3') self.with_analytic = False self.with_analytic_xml = False self.with_visual = False self.local_mode = False self.s3_only = False self.transfer = S3Transfer(self.s3client, TransferConfig(use_threads=False)) self.transfer_config = TransferConfig(use_threads=False) self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.INFO) self.secondary_uploads_executor = FixedThreadPoolExecutor(size=5) self.with_immediate_cleanup = False def __init__(self, api_key, config): imagery_config = config['imagery'] self.api_key = api_key self.max_clouds_initial = float( imagery_config['max_clouds_initial'] ) # max initial proportion of pixels that are clouds self.max_clouds = float( imagery_config['max_clouds'] ) # max proportion of clouds detected by filter self.max_shadows = float( imagery_config['max_shadows'] ) # max proportion of cloud shaodws detected by filter self.max_bad_pixels = float( imagery_config['max_bad_pixels'] ) # max proportion of bad pixels (transmission errors, etc.) self.max_nodata = float( imagery_config['max_nodata']) # max nodata values per cellgrid self.maximgs = int(imagery_config['maximgs']) # 15 #10 #20 self.output_encoding = imagery_config['output_encoding'] self.output_filename = imagery_config['output_filename'] self.output_filename_csv = imagery_config['output_filename_csv'] self.catalog_path = imagery_config['catalog_path'] self.s3_catalog_bucket = imagery_config['s3_catalog_bucket'] self.s3_catalog_prefix = imagery_config['s3_catalog_prefix'] self.products = { 'analytic_sr': { 'item_type': 'PSScene4Band', 'asset_type': 'analytic_sr', 'ext': 'tif' }, 'analytic': { 'item_type': 'PSScene4Band', 'asset_type': 'analytic', 'ext': 'tif' }, 'analytic_xml': { 'item_type': 'PSScene4Band', 'asset_type': 'analytic_xml', 'ext': 'xml' }, 'visual': { 'item_type': 'PSScene3Band', 'asset_type': 'visual', 'ext': 'tif' } } self.client = api.ClientV1(api_key=self.api_key) self.s3client = boto3.client('s3') self.with_analytic = json.loads( imagery_config['with_analytic'].lower()) self.with_analytic_xml = json.loads( imagery_config['with_analytic_xml'].lower()) self.with_visual = json.loads(imagery_config['with_visual'].lower()) self.with_immediate_cleanup = json.loads( imagery_config['with_immediate_cleanup'].lower()) self.local_mode = json.loads(imagery_config['local_mode'].lower()) self.s3_only = json.loads(imagery_config['s3_only'].lower()) self.transfer = S3Transfer(self.s3client, TransferConfig(use_threads=False)) self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.INFO) # planet has limitation 5 sec per key (search queries) threads_number = imagery_config['threads'] if threads_number == 'default': threads_number = multiprocessing.cpu_count() * 2 + 1 else: threads_number = int(threads_number) self.secondary_uploads_executor = FixedThreadPoolExecutor( size=threads_number) # there are start_date and end_date present as it should be the part of a row retrieved from psql / tiff file def set_filters_sr(self, aoi, start_date='2017-12-15T00:00:00.000Z', end_date='2018-03-15T00:00:00.000Z', id=''): # add an asset_filter for only those scenes that have an analytic_sr asset available date_filter = { 'type': 'DateRangeFilter', 'field_name': 'acquired', 'config': { 'gte': start_date, 'lte': end_date } } cloud_filter = { 'type': 'RangeFilter', 'field_name': 'cloud_cover', 'config': { 'lte': self.max_clouds_initial # only for UDM-detected clouds } } bad_pixel_filter = { 'type': 'RangeFilter', 'field_name': 'anomalous_pixels', 'config': { 'lte': self.max_bad_pixels } } location_filter = api.filters.geom_filter(aoi) geometry_filter = { "type": "GeometryFilter", "field_name": "geometry", "config": aoi } asset_filter = { "type": "PermissionFilter", "config": ["assets.analytic_sr:download"] } string_filter = { "type": "StringInFilter", "field_name": "id", "config": [id] } filters_list = [ date_filter, cloud_filter, geometry_filter, bad_pixel_filter, asset_filter ] if (id != ''): filters_list.append(string_filter) # combine filters: query = {'type': 'AndFilter', 'config': filters_list} return query # there are start_date and end_date present as it should be the part of a row retrieved from psql / tiff file def set_filters_id(self, id=''): asset_filter = { "type": "PermissionFilter", "config": ["assets.analytic_sr:download"] } string_filter = { "type": "StringInFilter", "field_name": "id", "config": [id] } filters_list = [asset_filter, string_filter] # combine filters: query = {'type': 'AndFilter', 'config': filters_list} return query @retry(tries=10, delay=2, backoff=2) def request_intersecting_scenes(self, query): # build the request item_types = ['PSScene4Band'] # params["lst_item_types"] request = api.filters.build_search_request(query, item_types) # post the request results = self.client.quick_search(request) return results # returns a full URI here def download_localfs_generic(self, scene_id, season='', asset_type='analytic_sr', ext='tif', item_type='PSScene4Band'): output_file = "{}{}/{}/{}.{}".format(self.catalog_path, asset_type, season, scene_id, ext) if not os.path.exists(output_file): # activation & download session = requests.Session() session.auth = (self.api_key, '') assets_uri = ( "https://api.planet.com/data/v1/item-types/{}/items/{}/assets/" ).format(item_type, scene_id) assets_query_result = session.get(assets_uri) self.logger.info(assets_query_result.status_code) item_activation_json = assets_query_result.json() # self.logger.info(item_activation_json) item_activation_url = item_activation_json[asset_type]["_links"][ "activate"] response = session.post(item_activation_url) self.logger.info(response.status_code) while response.status_code != 204: time.sleep(30) response = session.post(item_activation_url) response.status_code = response.status_code self.logger.info(response.status_code) item_url = 'https://api.planet.com/data/v1/item-types/{}/items/{}/assets/'.format( item_type, scene_id) result = requests.get(item_url, auth=HTTPBasicAuth(self.api_key, '')) if result.status_code != 200: self.logger.info(result.content.decode('utf-8')) download_url = result.json()[asset_type]['location'] # download with urllib.request.urlopen(download_url) as response, open( output_file, 'wb') as out_file: shutil.copyfileobj(response, out_file) return output_file # TODO: lots of copy pasting happens there, abstract over it? # returns a full S3 URI here def download_s3_generic(self, scene_id, season='', asset_type='analytic_sr', ext='tif', item_type='PSScene4Band'): output_key = "{}/{}/{}/{}.{}".format(self.s3_catalog_prefix, asset_type, season, scene_id, ext) result_path = 's3://{}/{}'.format(self.s3_catalog_bucket, output_key) try: self.s3client.head_object(Bucket=self.s3_catalog_bucket, Key=output_key) except botocore.exceptions.ClientError: self.logger.exception('Error Encountered') self.logger.info("Downloading {}...".format(scene_id)) # activation & download session = requests.Session() session.auth = (self.api_key, '') assets_uri = ( "https://api.planet.com/data/v1/item-types/{}/items/{}/assets/" ).format(item_type, scene_id) assets_query_result = session.get(assets_uri) self.logger.info(assets_query_result.status_code) item_activation_json = assets_query_result.json() # self.logger.info(item_activation_json) item_activation_url = item_activation_json[asset_type]["_links"][ "activate"] response = session.post(item_activation_url) self.logger.info(response.status_code) while response.status_code != 204: time.sleep(30) response = session.post(item_activation_url) response.status_code = response.status_code self.logger.info(response.status_code) item_url = 'https://api.planet.com/data/v1/item-types/{}/items/{}/assets/'.format( item_type, scene_id) result = requests.get(item_url, auth=HTTPBasicAuth(self.api_key, '')) if result.status_code != 200: self.logger.info(result.content.decode('utf-8')) download_url = result.json()[asset_type]['location'] # upload on s3 directly from the response with urllib.request.urlopen(download_url) as response: self.s3client.put_object(Body=response.read(), Bucket=self.s3_catalog_bucket, Key=output_key) # finished self.logger.info("Downloaded {}".format(scene_id)) return result_path # returns a full URI here def download_localfs_product(self, product_type, scene_id, season=''): cfg = self.products[product_type] return self.download_localfs_generic(scene_id=scene_id, season=season, asset_type=cfg['asset_type'], ext=cfg['ext'], item_type=cfg['item_type']) # returns a full URI here def download_s3_product(self, product_type, scene_id, season=''): cfg = self.products[product_type] return self.download_s3_generic(scene_id=scene_id, season=season, asset_type=cfg['asset_type'], ext=cfg['ext'], item_type=cfg['item_type']) def download_localfs_analytic_sr(self, scene_id, season=''): return self.download_localfs_product('analytic_sr', scene_id, season) def download_s3_analytic_sr(self, scene_id, season=''): return self.download_s3_product('analytic_sr', scene_id, season) def download_localfs_analytic(self, scene_id, season=''): return self.download_localfs_product('analytic', scene_id, season) def download_s3_analytic(self, scene_id, season=''): return self.download_s3_product('analytic', scene_id, season) def download_localfs_analytic_xml(self, scene_id, season=''): return self.download_localfs_product('analytic_xml', scene_id, season) def download_s3_analytic_xml(self, scene_id, season=''): return self.download_s3_product('analytic_xml', scene_id, season) def download_localfs_visual(self, scene_id, season=''): return self.download_localfs_product('visual', scene_id, season) def download_s3_visual(self, scene_id, season=''): return self.download_s3_product('visual', scene_id, season) def upload_s3_csv(self): result = '' if not self.local_mode: output_key = "{}/{}".format(self.s3_catalog_prefix, self.output_filename.split('/')[-1]) result = 's3://{}/{}'.format(self.s3_catalog_bucket, output_key) self.transfer.upload_file(self.output_filename, self.s3_catalog_bucket, output_key) else: result = self.output_filename return result def upload_s3_csv_csv(self): output_key = "{}/{}".format(self.s3_catalog_prefix, self.output_filename_csv.split('/')[-1]) result = 's3://{}/{}'.format(self.s3_catalog_bucket, output_key) self.transfer.upload_file(self.output_filename_csv, self.s3_catalog_bucket, output_key) return result def download_localfs_s3_product(self, scene_id, season='', product_type='analytic_sr'): cfg = self.products[product_type] asset_type = cfg['asset_type'] ext = cfg['ext'] item_type = cfg['item_type'] filepath = '' output_key = "{}/{}/{}/{}.{}".format(self.s3_catalog_prefix, asset_type, season, scene_id, ext) s3_result = 's3://{}/{}'.format(self.s3_catalog_bucket, output_key) local_result = "{}{}/{}/{}.{}".format(self.catalog_path, asset_type, season, scene_id, ext) if not self.s3_only: if not os.path.exists(local_result): if not self.local_mode: try: # if we have file in our s3 bucket, let's pull it down from the S3 (faster) self.s3client.head_object( Bucket=self.s3_catalog_bucket, Key=output_key) filepath = s3_result # self.logger.info("Downloading {} from the internal S3 storage...".format(scene_id)) # self.transfer.download_file(self.s3_catalog_bucket, output_key, local_result) # filepath = local_result # filepath = s3_result except botocore.exceptions.ClientError: self.logger.exception('Error Encountered') filepath = self.download_localfs_product( product_type, scene_id, season) self.logger.info("Uploading {}...".format(scene_id)) self.s3client.put_object(Bucket=self.s3_catalog_bucket, Key=output_key, Body=open(filepath, 'rb')) # self.transfer.upload_file(filepath, self.s3_catalog_bucket, output_key) else: filepath = self.download_localfs_product( product_type, scene_id, season) s3_result = local_result else: filepath = local_result if self.local_mode: s3_result = local_result else: try: self.s3client.head_object( Bucket=self.s3_catalog_bucket, Key=output_key) except botocore.exceptions.ClientError: self.logger.exception('Error Encountered') self.logger.info("Uploading {}...".format(scene_id)) self.s3client.put_object(Bucket=self.s3_catalog_bucket, Key=output_key, Body=open(filepath, 'rb')) # self.transfer.upload_file(filepath, self.s3_catalog_bucket, output_key) else: s3_result = self.download_s3_product('analytic_sr', scene_id, season) filepath = s3_result return filepath, s3_result def download_localfs_s3(self, scene_id, season=''): sub_products = [] if self.with_analytic: sub_products.append('with_analytic') if self.with_analytic_xml: sub_products.append('with_analytic_xml') if self.with_visual: sub_products.append('with_visual') for sub_product in sub_products: self.secondary_uploads_executor.submit( self.download_localfs_s3_product, scene_id, season, sub_product) return self.download_localfs_s3_product(scene_id, season) def drain(self): self.secondary_uploads_executor.drain() def cleanup_catalog(self): self.logger.info("Catalog cleanup...") if self.with_immediate_cleanup & (not self.s3_only): for product_type in [ 'analytic', 'analytic_sr', 'analytic_xml', 'visual' ]: for season in ['OS', 'GS']: lpath = "{}{}/{}".format(self.catalog_path, product_type, season) try: shutil.rmtree(lpath, ignore_errors=False) os.makedirs(lpath) except: self.logger.exception('Error Encountered') self.logger.info( "Could not remove a folder: {}".format(lpath)) def close(self): self.secondary_uploads_executor.close() self.cleanup_catalog()
def main(source_dir, out_dir, threads_number, parallel_mode, clear_threshold): # source_dir = '/Users/coloury/Dropbox/transfer_landsat' # out_dir = '/Users/coloury/sccd_test' if not os.path.exists(out_dir): os.makedirs(out_dir) if not os.path.exists(source_dir): print('Source directory not exists!') if parallel_mode == 'desktop': tz = timezone('US/Eastern') logging.basicConfig(filename=os.path.join( os.getcwd(), 'AutoPrepareDataARD_{}.log'.format( datetime.fromtimestamp(time.time()).strftime('%c').replace( " ", "_").replace(":", "-"))), filemode='w+', level=logging.INFO) logger = logging.getLogger(__name__) tmp_path = os.path.join(out_dir, 'tmp') if os.path.exists(tmp_path) is False: os.mkdir(tmp_path) if threads_number == 0: threads_number = multiprocessing.cpu_count() else: threads_number = int(threads_number) print( 'The thread number to be paralleled is {}'.format(threads_number)) folder_list = [ f[0:len(f) - 4] for f in listdir(source_dir) if (isfile(join(source_dir, f)) and f.endswith('.tar') and f[len(f) - 6:len(f) - 4] == 'SR') ] width = 5000 height = 5000 band_count = 8 prepare_executor = FixedThreadPoolExecutor(size=threads_number) for count, folder in enumerate(folder_list): print("it is processing {} th scene in total {} scene ".format( count + 1, len(folder_list))) prepare_executor.submit(single_image_processing, tmp_path, source_dir, out_dir, folder, clear_threshold, width, height, band_count, count + 1, len(folder_list)) # await all tile finished prepare_executor.drain() # await thread pool to stop prepare_executor.close() logger.info("Final report: finished preparation task ({})".format( datetime.now(tz).strftime('%Y-%m-%d %H:%M:%S'))) # count_valid = len(scene_list) # logger.warning("Total processing scene number is {}; valid scene number is {}".format(count, count_valid)) # remove tmp folder shutil.rmtree(tmp_path, ignore_errors=True) else: # for HPC mode comm = MPI.COMM_WORLD rank = comm.Get_rank() # query available cores/process number n_process = comm.Get_size() print('The core number to be paralleled is {}'.format(n_process)) if rank == 0: tz = timezone('US/Eastern') # logger = logging.getLogger(__name__) # logger.info('AutoPrepareDataARD starts: {}'.format(datetime.now(tz).strftime('%Y-%m-%d %H:%M:%S'))) print('AutoPrepareDataARD starts: {}'.format( datetime.now(tz).strftime('%Y-%m-%d %H:%M:%S'))) tmp_path = os.path.join(out_dir, 'tmp') # select only _SR folder_list = [ f[0:len(f) - 4] for f in listdir(source_dir) if (isfile(join(source_dir, f)) and f.endswith('.tar') and f[len(f) - 6:len(f) - 4] == 'SR') ] width = 5000 height = 5000 band_count = 8 scene_per_process = ceil(len(folder_list) / n_process) # scene number for the last process is smaller than others scene_extra = scene_per_process * n_process - len(folder_list) # logger.info('The total process number is : {}'.format(n_process)) # logger.info('scene number per process is : {}'.format(scene_per_process)) # logger.info('extra scene number is : {}'.format(scene_extra)) print('The total process number is : {}'.format(n_process)) print('scene number per process is : {}'.format(scene_per_process)) print('extra scene number is : {}'.format(scene_extra)) # if tmp path exists, delete path if os.path.exists(tmp_path) is False: os.mkdir(tmp_path) else: # logger = None tmp_path = None folder_list = None width = None height = None band_count = None scene_per_process = None scene_extra = None # MPI broadcasting variables # comm.bcast(logger, root=0) tmp_path = comm.bcast(tmp_path, root=0) folder_list = comm.bcast(folder_list, root=0) width = int(comm.bcast(width, root=0)) height = int(comm.bcast(height, root=0)) band_count = int(comm.bcast(band_count, root=0)) scene_per_process = int(comm.bcast(scene_per_process, root=0)) scene_extra = int(comm.bcast(scene_extra, root=0)) # for rank smaller scene_extra, we assigned scene_per_process-1 scenes per core if rank < scene_extra: for i in range( (scene_per_process - 1) * rank, (scene_per_process - 1) * rank + scene_per_process - 1): folder = folder_list[i] single_image_processing(tmp_path, source_dir, out_dir, folder, clear_threshold, width, height, band_count, i + 1, len(folder_list)) else: # for the last core for i in range( (scene_per_process - 1) * scene_extra + (rank - scene_extra) * scene_per_process, (scene_per_process - 1) * scene_extra + (rank - scene_extra) * scene_per_process + scene_per_process): folder = folder_list[i] single_image_processing(tmp_path, source_dir, out_dir, folder, clear_threshold, width, height, band_count, i + 1, len(folder_list))
class PSQLPClient(): def __init__(self, config): db_config = config['database'] imagery_config = config['imagery'] self.host = db_config['host'] self.dbname = db_config['dbname'] self.user = db_config['user'] self.password = db_config['password'] self.master_grid_table = db_config['master_grid_table'] self.scene_data_table = db_config['scene_data_table'] self.enabled = eval(db_config['enabled']) self.conn = None self.skip_existing = eval(imagery_config['skip_existing']) self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.INFO) # planet has limitation 5 sec per key (search queries) threads_number = imagery_config['threads'] if threads_number == 'default': threads_number = multiprocessing.cpu_count() * 2 + 1 else: threads_number = int(threads_number) self.query_executor = FixedThreadPoolExecutor(size=threads_number) def connect(self): if self.enabled: self.conn = psycopg2.connect( 'host={} dbname={} user={} password={}'.format( self.host, self.dbname, self.user, self.password)) def connection_close(self): if self.enabled: self.conn.close() def get_cursor(self): return self.conn.cursor() def query_by_extent(self, extent, limit=10): curs = self.get_cursor() query = "" if limit == None: query = """SELECT * FROM %s WHERE x >= %s AND x <= %s AND y >= %s AND y <= %s ORDER BY gid""" % (self.master_grid_table, extent['xmin'], extent['xmax'], extent['ymin'], extent['ymax']) else: query = """SELECT * FROM %s WHERE x >= %s AND x <= %s AND y >= %s AND y <= %s ORDER BY gid LIMIT %s""" % ( self.master_grid_table, extent['xmin'], extent['xmax'], extent['ymin'], extent['ymax'], limit) curs.execute(query) return curs def insert_row_with_commit(self, row): if self.enabled: try: curs = self.conn.cursor() self.insert_row(row, curs) self.conn.commit() except: conn.rollback() def insert_row(self, row, curs): if self.enabled: # [provider] | scene_id | [cell_id | season] | global_col | global_row | url | tms_url | date_time curs.execute( """INSERT INTO {} VALUES (%s, %s, %s, %s, %s, %s, %s, %s, now())""" .format(self.scene_data_table), (row['provider'], row['scene_id'], row['cell_id'], row['season'], row.get('global_col'), row.get('global_row'), row.get('url'), row.get('tms_url'))) def exists_row(self, cell_id, season, provider='planet'): if self.enabled and self.skip_existing: try: curs = self.conn.cursor() curs.execute( """SELECT FROM %s WHERE provider = '%s' AND cell_id = %s AND season = '%s' and tms_url <> ''""" % (self.scene_data_table, provider, cell_id, season)) return curs.fetchone() is not None except: self.logger.exception('Error Encountered') return False else: return False def insert_rows_by_one(self, rows): if self.enabled: # [provider] | scene_id | [cell_id | season] | global_col | global_row | url | tms_url | date_time curs = self.conn.cursor() for row in rows: try: curs.execute( """INSERT INTO {} VALUES (%s, %s, %s, %s, %s, %s, %s, %s, now())""" .format(self.scene_data_table), (row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7])) except psycopg2.IntegrityError: self.conn.rollback() try: curs.execute( """ UPDATE {} SET scene_id = %s, global_col = %s, global_row = %s, url = %s, tms_url = %s, date_time = now() WHERE provider = %s AND cell_id = %s AND season = %s """.format(self.scene_data_table), (row[1], row[4], row[5], row[6], row[7], row[0], row[2], row[3])) except psycopg2.IntegrityError: self.conn.rollback() else: self.conn.commit() else: self.conn.commit() def insert_rows_by_one_async(self, rows): if self.enabled: self.query_executor.submit(self.insert_rows_by_one, rows) def insert_rows(self, rows): if self.enabled: try: curs = self.conn.cursor() # [provider] | scene_id | [cell_id | season] | global_col | global_row | url | tms_url | date_time args_str = ','.join( curs.mogrify("%s", (row, )).decode('utf8') for row in rows) curs.execute("INSERT INTO {} VALUES {}".format( self.scene_data_table, args_str)) # ON CONFLICT DO NOTHING PSQL 9.5 only self.conn.commit() except: self.conn.rollback() def query_without_tms_url(self, limit=None): curs = self.get_cursor() query = "" if limit == None: query = """SELECT * FROM %s WHERE tms_url = ''""" % ( self.master_grid_table) else: query = """SELECT * FROM %s WHERE tms_url = '' LIMIT %s""" % ( self.master_grid_table, limit) curs.execute(query) return curs def drain(self): self.query_executor.drain() def close(self): self.query_executor.close()