def import_from_file_s2(in_csv, block_size): import pprint n, m = (0, block_size) api = Api() for n in range(0, count_lines(in_csv), m): print 'Range: <%d:%d>' % (n, n + m) datasets = sentinel_harvester(in_csv, n, m) out = api.create_dataset(datasets) pprint.pprint(out)
def import_from_sentinel_catalog(sensor, start_date, api_url): import numpy api = Api(api_url) max_cloud_ratio = 1.0 ag_season_start = dateutil.parser.parse(start_date) ag_season_end = ag_season_start + datetime.timedelta(days=1) for lon in numpy.arange(-180, 180, 9): for lat in numpy.arange(-90, 90, 9): lon_end = lon + 9 lat_end = lat + 9 aoi_se = (lon_end, lat) aoi_nw = (lon, lat_end) aoi_ne = (aoi_se[0], aoi_nw[1]) aoi_sw = (aoi_nw[0], aoi_se[1]) aoi = [aoi_nw, aoi_ne, aoi_se, aoi_sw, aoi_nw] cat = SentinelCatalog() datasets = cat.find(sensor, aoi, ag_season_start, ag_season_end, max_cloud_ratio) if datasets != None: ds_found = list() ds_missing = list() for counter, ds in enumerate(datasets): catalog_ds = api.get_dataset(ds.entity_id) if catalog_ds is None or len(catalog_ds) == 0: ds_missing.append(ds) elif len(catalog_ds) == 1: ds_found.append(catalog_ds) else: print 'More in catalog found: %s (%d)' % ( ds.entity_id, len(catalog_ds)) if (counter % 25) == 0: print counter, len(datasets) print 'already registered: ', len(ds_found), len(datasets) print 'missing: ', len(ds_missing), len(datasets) for counter, ds_obj in enumerate(ds_missing): new_ds = api.create_dataset(ds_obj) if not new_ds is None: print new_ds if (counter % 25) == 0: print counter, len(ds_missing) else: print 'No data found in catalog for %s from %s to %s' % ( sensor, ag_season_start.strftime("%Y-%m-%d"), ag_season_end.strftime("%Y-%m-%d"))
class ApiTest(unittest.TestCase): """Tests ConfigManager and local/remote config access.""" def setUp(self): self.api = Api() # url='http://api.eoss.cloud' def testCreateConfig(self): """ Create simple config object from string """ ds = self.api.get_dataset('LC81920272016240LGN00') self.assertEqual(len(ds), 1) ds = self.api.get_dataset('LE71010172003151EDC00') self.assertEqual(len(ds), 1) ds = self.api.get_dataset( 'S2A_OPER_PRD_MSIL1C_PDMC_20160806T202847_R142_V20160805T192909_20160805T192909' ) self.assertEqual(len(ds), 21) def testCatalogSearch(self): aoi_nw = (-91.5175095, 16.8333384) aoi_se = (-91.3617268, 16.8135385) aoi_ne = (aoi_se[0], aoi_nw[1]) aoi_sw = (aoi_nw[0], aoi_se[1]) aoi = [aoi_nw, aoi_ne, aoi_se, aoi_sw, aoi_nw] # Object representation results = self.api.search_dataset(aoi, 100, parse('2015-01-01'), parse('2015-03-01'), 'landsat8', full_objects=True) self.assertEqual(len(results), 3) for item in results: self.assertTrue(type(item).__name__ == 'Catalog_Dataset') results = self.api.search_dataset(aoi, 100, parse('2015-01-01'), parse('2015-03-01'), 'landsat8', full_objects=False) # JSON representation self.assertEqual(len(results), 3) for item in results: self.assertTrue(type(item) == dict)
def import_from_pipe_ls(lines): datasets = landsat_harvester_line(lines) api = Api() skipped = list() registered = list() for c, ds in enumerate(datasets): try: out = api.create_dataset(ds) if not 'already' in str(out): registered.append(c) else: skipped.append(c) except Exception, e: print e
def import_from_landsat_catalog(sensor, start_date, api_url): api = Api(api_url) max_cloud_ratio = 1.0 ag_season_start = dateutil.parser.parse(start_date) ag_season_end = ag_season_start + datetime.timedelta(days=1) aoi_se = (180, -90) aoi_nw = (-180, 90) aoi_ne = (aoi_se[0], aoi_nw[1]) aoi_sw = (aoi_nw[0], aoi_se[1]) aoi = [aoi_nw, aoi_ne, aoi_se, aoi_sw, aoi_nw] cat = USGSCatalog() # "LANDSAT_8", "LANDSAT_ETM_SLC_OFF", "LANDSAT_ETM" datasets = cat.find(sensor, aoi, ag_season_start, ag_season_end, max_cloud_ratio) if datasets != None: ds_found = list() ds_missing = list() for counter, ds in enumerate(datasets): catalog_ds = api.get_dataset(ds.entity_id) if catalog_ds is None or len(catalog_ds) == 0: ds_missing.append(ds) elif len(catalog_ds) == 1: ds_found.append(catalog_ds) else: print 'More in catalog found: %s (%d)' % (ds.entity_id, len(catalog_ds)) if (counter % 25) == 0: print counter, len(datasets) print 'already registered: ', len(ds_found), len(datasets) print 'missing: ', len(ds_missing), len(datasets) for counter, ds_obj in enumerate(ds_missing): new_ds = api.create_dataset(ds_obj) if not new_ds is None: print new_ds if (counter % 25) == 0: print counter, len(ds_missing) else: print 'No data found in catalog for sentinel from %s to %s' % ( ag_season_start.strftime("%Y-%m-%d"), ag_season_end.strftime("%Y-%m-%d"))
def import_from_file_ls(in_csv): datasets = landsat_harvester(in_csv) api = Api() skipped = list() registered = list() for c, ds in enumerate(datasets): try: out = api.create_dataset(ds) if not 'title' in str(out): registered.append(c) else: skipped.append(c) except Exception, e: print e if c % 100 == 0: print c print 'skipped:', skipped print 'registered:', registered skipped = list() registered = list()
def update_catalog(queue_name, api_endpoint): api = Api(api_endpoint) sqs = boto3.resource('sqs') if queue_name not in get_all_queues(): raise Exception('Queue %s does not exist in %s' % (queue_name, get_all_queues())) queue = sqs.get_queue_by_name(QueueName=queue_name) should_break = False counter = 1 while not should_break: if int(queue.attributes.get('ApproximateNumberOfMessages')) == 0: time_interval = 60 else: time_interval = 2 logger.info('Getting messages from SQS: %s (%d sec. interval)' % (queue_name, time_interval)) messages_to_delete = list() for message_obj in queue.receive_messages( MaxNumberOfMessages=MAX_MESSAGES, WaitTimeSeconds=10, VisibilityTimeout=60, ): messages_to_delete = list() notification = ujson.loads(message_obj.body) message = ujson.loads(notification[u'Message']) if get_message_type(message) == 'landsat': for rec in message[u'Records']: s3 = extract_s3_structure(rec) s3['metadata'] = os.path.join(s3['s3_http'], s3['s3_path'], s3['entity_id'] + '_MTL.txt') s3['metadata_json'] = os.path.join( s3['s3_http'], s3['s3_path'], s3['entity_id'] + '_MTL.json') s3['quicklook'] = os.path.join( s3['s3_http'], s3['s3_path'], s3['entity_id'] + '_thumb_large.jpg') req = requests.get(s3['metadata_json']) try: obj = parse_l1_metadata_file(req.json(), s3) new_ds = api.create_dataset(obj) if not new_ds is None: print new_ds counter += 1 messages_to_delete.append({ 'Id': message_obj.message_id, 'ReceiptHandle': message_obj.receipt_handle }) except ValueError: logger.exception( 'ERROR: metadata location structure corrupted', extra={'metadata_response': req.text}) except Exception, e: logging.exception('General Error ooccured', extra={'request_url': req.url}) should_break = True finally: if len(messages_to_delete) > 0: messages_to_delete = remove_messages_from_queue( queue, messages_to_delete)
def setUp(self): self.api = Api() # url='http://api.eoss.cloud'
def import_from_pipe_s2(lines): import pprint api = Api() datasets = sentinel_harvester_line(lines) out = api.create_dataset(datasets) pprint.pprint(out)
def main(sensor, start_date, days, api_endpoint): api = Api(api_endpoint) logger.info('Checking consistencty for %s between %s + %s' % (sensor, start_date, days)) aoi_nw = (-180, 90) aoi_se = (180, -90) aoi_ne = (aoi_se[0], aoi_nw[1]) aoi_sw = (aoi_nw[0], aoi_se[1]) aoi = [aoi_nw, aoi_ne, aoi_se, aoi_sw, aoi_nw] wrong_urls = list() for delta_day in range(1, days): start_time = time.time() start_date_date = parse(start_date)+ datetime.timedelta(days=delta_day) end_date_date = start_date_date + datetime.timedelta(days=1) logger.info('Checking consistencty for %s between %s and %s' % (sensor, start_date_date.isoformat(), end_date_date.isoformat())) # Object representation results = api.search_dataset(aoi, 100, start_date_date, end_date_date, sensor, full_objects=False) url_resources = list() missing_urls = list() missing_types = list() for r in results: if r['resources']['s3public']['zip'] != None: url_resources.append(r['resources']['s3public']['zip']) else: missing_urls.append('%s:%s' % (r['tile_identifier'], r['entity_id'])) missing_types.append('zip') if r['resources']['metadata']!= None: url_resources.append(r['resources']['metadata']) else: missing_urls.append('%s:%s' % (r['tile_identifier'], r['entity_id'])) missing_types.append('metadata') if r['resources']['quicklook'] != None: url_resources.append(r['resources']['quicklook']) else: missing_urls.append('%s:%s' % (r['tile_identifier'], r['entity_id'])) missing_types.append('quicklook') logger.info('total scans: %d' %len(url_resources)) logger.info('already missed resources: %d' %len(missing_urls)) if False: for counter, res in enumerate(url_resources): req = requests.head(res) if req.status_code != requests.codes.ok: print res, req.status_code missing_urls.append(res) print res if (counter % 25) == 0: print counter else: counter = 0 for url_parts in chunks(url_resources, 500): counter+=1 rs = (grequests.head(u) for u in url_parts) res = grequests.map(rs) for req in res: if req is not None: if req.status_code != requests.codes.ok: wrong_urls.append(res) missing_types.append('zip_registered') else: print req.url, req if len(wrong_urls) > 0: for item in wrong_urls: print item for req in item: if req.status_code != requests.codes.ok: append_data('/tmp/wrong_urls.txt', req.url) if len(missing_urls) > 0: append_data('/tmp/missing_urls.txt', missing_urls) if len(missing_types) > 0: for type in ['zip_registered', 'quicklook', 'metadata', 'zip']: logger.info('%d:%s' % (operator.countOf(missing_types, type), type)) logger.info('wrong resources resources: %d' % len(wrong_urls)) logger.info('Executed in %f secs.' % (time.time()-start_time)) print 'Wrong URLs:', wrong_urls