Beispiel #1
0
 def test_get_s3_public_url(self):
     envs = dict(os.environ)
     if 'AWS_ACCESS_KEY_ID' in envs:
         del os.environ['AWS_ACCESS_KEY_ID']
     if 'AWS_BUCKET_ACCESS_KEY_ID' in envs:
         del os.environ['AWS_BUCKET_ACCESS_KEY_ID']
     url = utils.get_s3_signed_url(self.remote_url)
     assert (len(url) == 2)
     assert (url[0] == self.remote_url)
     assert (url[1] is None)
     os.environ.clear()
     os.environ.update(envs)
Beispiel #2
0
 def test_get_s3_signed_url(self):
     url = utils.get_s3_signed_url(self.remote_url)
     assert (len(url) == 2)
Beispiel #3
0
def add_items(catalog,
              records,
              start_date=None,
              end_date=None,
              s3meta=False,
              prefix=None,
              publish=None):
    """ Stream records to a collection with a transform function 
    
    Keyword arguments:
    start_date -- Process this date and after
    end_date -- Process this date and earlier
    s3meta -- Retrieve metadata from s3 rather than Sinergise URL (roda)
    """

    # use existing collection or create new one if it doesn't exist
    cols = {c.id: c for c in catalog.collections()}
    if 'sentinel-2-l1c' not in cols.keys():
        catalog.add_catalog(_collection)
        cols = {c.id: c for c in catalog.collections()}
    collection = cols['sentinel-2-l1c']

    client = None
    if publish:
        parts = publish.split(':')
        client = boto3.client('sns', region_name=parts[3])

    duration = []
    # iterate through records
    for i, record in enumerate(records):
        start = datetime.now()
        if i % 50000 == 0:
            logger.info('%s: Scanned %s records' % (start, str(i)))
        dt = record['datetime'].date()
        if prefix is not None:
            # if path doesn't match provided prefix skip to next record
            if record['path'][:len(prefix)] != prefix:
                continue
        if s3meta:
            url = op.join(SETTINGS['s3_url'], record['path'])
        else:
            url = op.join(SETTINGS['roda_url'], record['path'])
        #if i == 10:
        #    break
        if (start_date is not None
                and dt < start_date) or (end_date is not None
                                         and dt > end_date):
            # skip to next if before start_date
            continue
        try:
            if s3meta:
                signed_url, headers = utils.get_s3_signed_url(
                    url, requestor_pays=True)
                resp = requests.get(signed_url, headers=headers)
                metadata = json.loads(resp.text)
            else:
                metadata = read_remote(url)
            item = transform(metadata)
        except Exception as err:
            logger.error('Error creating STAC Item %s: %s' %
                         (record['path'], err))
            continue
        try:
            collection.add_item(item,
                                path=SETTINGS['path_pattern'],
                                filename=SETTINGS['fname_pattern'])
            if client:
                client.publish(TopicArn=publish, Message=json.dumps(item.data))
            duration.append((datetime.now() - start).total_seconds())
            logger.info('Ingested %s in %s' % (item.filename, duration[-1]))
        except Exception as err:
            logger.error('Error adding %s: %s' % (item.id, err))
    logger.info('Read in %s records averaging %4.2f sec (%4.2f stddev)' %
                (i, np.mean(duration), np.std(duration)))