Ejemplo n.º 1
0
def find_and_process(obs_collection='HST',
                     dataproduct_type=('image'),
                     instrument_name='ACS/WFC',
                     filters='F814W',
                     N=100):
    # Use AWS S3 URLs for the MAST records (rather than the ones at http://mast.stsci.edu)
    Observations.enable_cloud_dataset(profile='ndmiles_admin')

    # Query MAST for some ACS/WFC data
    query_parameters = {
        'obs_collection': obs_collection,
        'dataproduct_type': dataproduct_type,
        'instrument_name': instrument_name,
        'filters': filters
    }
    obsTable = Observations.query_criteria(**query_parameters)

    # Grab 100 products:
    # http://astroquery.readthedocs.io/en/latest/mast/mast.html#getting-product-lists
    products = Observations.get_product_list(obsTable['obsid'][:N])

    # Filter out just the drizzled FITS files
    filtered_products = Observations.filter_products(
        products, mrp_only=False, productSubGroupDescription=['FLT'])

    # We want URLs like this: s3://stpubdata/hst/public/ibg7/ibg705080/ibg705081_drz.fits
    s3_urls = Observations.get_cloud_uris(filtered_products)

    # Auth to create a Lambda function
    session = boto3.Session(profile_name='ndmiles_admin')
    client = session.client('lambda', region_name='us-east-1')

    st = time.time()
    for url in s3_urls:
        fits_s3_key = url.replace("s3://stpubdata/", "")
        print(fits_s3_key)
        event = {
            'fits_s3_key': fits_s3_key,
            'fits_s3_bucket': 'stpubdata',
            's3_output_bucket': 'compute-sky-lambda'
        }
        Payload = json.dumps(event)
        lambda_inputs = {
            'FunctionName': 'compute_sky',
            'InvocationType': 'Event',
            'LogType': 'Tail',
            'Payload': Payload
        }
        response = client.invoke(**lambda_inputs)

    et = time.time()
    print(f"Duration: {et - st:0.2f}")
Ejemplo n.º 2
0
def aws_fullframe_fits():
    """
    Loop through full frame files, extract a subarray, and calculate mean.
    This must be done in a way that the file is deleted as soon as it is, no longer
    necessary to keep, so we do not use up all the disk space.
    """
    import boto3
    import os
    import typing

    import numpy as np

    from astropy.io import fits
    from astroquery.mast import Observations

    from tess_bert import shortcuts as tess_shortcuts

    from urllib.parse import urlparse, ParseResult

    work_queue, done_queue, ologger = utils.comm_binders(aws_fullframe_fits)

    OBS_ID: str = 'tess-s0001-1-1'
    DATA_DIR: str = os.path.join(shortcuts.getcwd(), 'data')
    if not os.path.exists(DATA_DIR):
        os.makedirs(DATA_DIR)

    obs_table = Observations.query_criteria(obs_id=OBS_ID)
    products = Observations.get_product_list(obs_table)
    filtered = Observations.filter_products(products,
                                            productSubGroupDescription="FFIC",
                                            mrp_only=False)
    Observations.enable_cloud_dataset()

    for idx, s3_url in enumerate(
            Observations.get_cloud_uris(filtered, includeBucket=True)):
        url_parts: ParseResult = urlparse(s3_url)
        filepath: str = os.path.join(DATA_DIR,
                                     os.path.basename(url_parts.path))
        done_queue.put({
            'bucket_path': url_parts.path.strip('/'),
            'filepath': filepath,
            'bucket': url_parts.netloc
        })
        if idx > 2 and constants.DEBUG:
            break
def download_wcs_file(filtered, local_dir, n=10, cloud=False):
    """
    filtered is results if id_wcs_file
    local_dir is the location to put the file
    cloud determines whether the file should first be pulled from the cloud.
    
    For lambda local_dir should be /tmp
    """

    if cloud:
        Observations.enable_cloud_dataset(provider='AWS')

    obsslice = slice(n, n + 1)
    manifest = Observations.download_products(filtered[obsslice],
                                              download_dir=local_dir,
                                              mrp_only=False)

    return manifest['Local Path'][0]
Ejemplo n.º 4
0
# NOTE: Use your own key values here.
os.environ['AWS_ACCESS_KEY_ID'] = 'somekey'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'somesecret'

# NOTE: Change TESS observation ID as needed.
obs_id = 'tess-s0001-1-1'

# Find full frame dataset for the observation ID.
obs_table = Observations.query_criteria(obs_id=obs_id)
products = Observations.get_product_list(obs_table)
filtered = Observations.filter_products(products,
                                        productSubGroupDescription="FFIC",
                                        mrp_only=False)

# Set up AWS S3 bucket to pull data from.
Observations.enable_cloud_dataset()
s3_urls = Observations.get_cloud_uris(filtered, include_bucket=False)
s3 = boto3.resource('s3')
bucket = s3.Bucket('stpubdata')


def time_mean():
    """Loop through full frame files, extract a subarray, and calculate mean.
    This must be done in a way that the file is deleted as soon as it is
    no longer necessary to keep, so we do not use up all the disk space.

    .. note:: Algorithm can also be modified to construct subarrays
              into a subcube.

    Returns
    -------
Ejemplo n.º 5
0
def bert_tess_fullframe_main_2():
    """Continuation of main function to run it across different sectors."""

    import os
    import time

    import boto3
    from astropy.io import fits
    from astropy.wcs import WCS
    from astroquery.mast import Observations

    s3 = boto3.resource('s3')
    bucket = s3.Bucket(name=os.environ.get('AWSBUCKETNAME'))
    outbucket = s3.Bucket(name=os.environ.get('CACHEBUCKETNAME'))
    homedir = os.environ.get('HOME')

    work_queue, done_queue, ologger = utils.comm_binders(
        bert_tess_fullframe_main_2)

    # Example event:
    # {
    #   "tic_id": "25155310",
    #   "sec_id": "tess-s0001-4-1",
    #   "ra": 63.3739396231274,
    #   "dec": -69.226822697583,
    #   "radius": 2.5,
    #   "cutout_width": 30,
    #   "use_cache": "true"
    # }
    #
    # work_queue populated by calling Lambda
    for event in work_queue:
        tic_id = event['tic_id']
        sec_id = event['sec_id']

        basename = f'{sec_id}_s3_uris.txt'  # noqa
        filename = os.path.join(homedir, basename)

        try:
            # Check if URI list already cached.
            # According to MAST, there is no need to invalidate cache here.
            ologger.info(f'Attempting to download {basename} from S3')
            outbucket.download_file(
                basename, filename,
                ExtraArgs={"RequestPayer": "requester"})
        except Exception:
            # Find full frame dataset for the observation ID.
            ologger.info('Started quering Observations...')
            obs_table = Observations.query_criteria(obs_id=sec_id)
            products = Observations.get_product_list(obs_table)
            filtered = Observations.filter_products(
                products, productSubGroupDescription="FFIC",
                mrp_only=False)

            # Use AWS S3 bucket to pull data from.
            Observations.enable_cloud_dataset(verbose=False)
            ologger.info('Started obtaining cloud URIs...')
            t_start = time.time()
            s3_urls = Observations.get_cloud_uris(
                filtered, include_bucket=False)
            t_end = time.time()
            ologger.info(f'Got {len(s3_urls)} URIs in {t_end - t_start} s')

            # Upload URI list to cache.
            with open(filename, 'w') as fout:
                for url in s3_urls:
                    fout.write(url + os.linesep)
            try:
                outbucket.upload_file(
                    filename, basename,
                    ExtraArgs={"RequestPayer": "requester"})
            except Exception as exc:
                ologger.error(str(exc))
            else:
                ologger.info(f'Uploaded {basename} to S3')
        else:
            # Use cache if it exists.
            with open(filename, 'r') as fin:
                s3_urls = [url.strip() for url in fin.readlines()]
            ologger.info(f'Read {len(s3_urls)} URIs from {basename}')
        finally:
            # Clean up
            if os.path.exists(filename):
                os.remove(filename)

        ra = float(event['ra'])
        dec = float(event['dec'])

        # TODO: Cache good WCS for a given sector/camera/ccd combo and use
        #       known good cache if available.
        # Find pixel coordinates from sky from first frame header.
        key = s3_urls[0]
        basename = key.split('/')[-1]
        filename = os.path.join(homedir, basename)
        ologger.info(f'Resolving WCS from {key}')
        bucket.download_file(
            key, filename, ExtraArgs={"RequestPayer": "requester"})
        hdr = fits.getheader(filename, ext=1)
        if hdr.get('WCSAXES', 0) != 2:  # Good WCS according to MIT
            ologger.error(f'{key} has invalid WCS')
            continue
        w = WCS(hdr)
        pix = w.all_world2pix(ra, dec, 0)
        xpos = round(float(pix[0]))  # float needed to get rid of 0-D array
        ypos = round(float(pix[1]))

        # Clean up
        os.remove(filename)

        # The star needs to be at least 2*radii pixels away in both X and Y.
        radius = float(event['radius'])
        edge_r = 2 * radius
        naxis1, naxis2 = w.pixel_shape  # X Y
        if (xpos < edge_r or xpos >= (naxis1 - edge_r) or
                ypos < edge_r or ypos >= (naxis2 - edge_r)):
            ologger.error(
                f'TIC f{tic_id} in {sec_id}: X={xpos},Y={ypos} not at least '
                f'{edge_r} pixels away from the edge, skipping...')
            continue

        # Pass data into the next AWS Lambda function.
        ologger.info(f'TIC f{tic_id} in {sec_id}: Started processing '
                     'full frame URIs...')
        for url in s3_urls:
            done_queue.put({
                'key': url,
                'tic_id': tic_id,
                'ra': ra,
                'dec': dec,
                'xpos': xpos,
                'ypos': ypos,
                'radius': radius,
                'cutout_width': event['cutout_width'],
                'use_cache': event['use_cache']})
Ejemplo n.º 6
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 19 19:31:11 2020

@author: smullally
"""

from astroquery.mast import Observations

Observations.enable_cloud_dataset(provider='AWS')

target = "Kepler-10"

#Do a cone search and find the Kepler long cadence data for your target
obs = Observations.query_object(target, radius="0s")
want = (obs['obs_collection'] == "Kepler") & (obs['t_exptime'] == 1800.0)

#Pick which data you want to retrieve
data_prod = Observations.get_product_list(obs[want])
filt_prod = Observations.filter_products(
    data_prod, description="Lightcurve Long Cadence (CLC) - Q4")

#Move data from the S3 bucket to the default astroquery location.
#cloud_only=True means that data will only be retrieved if available on AWS S3
manifest = Observations.download_products(filt_prod)

#%%
import pdb

from lightkurve import search_targetpixelfile
Ejemplo n.º 7
0
def lambda_handler(event, context):
    """Extract light curve data from one TESS full frame image.

    Parameters
    ----------
    event : dict
        API Gateway Lambda Proxy Input Format.
        Event doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html#api-gateway-simple-proxy-for-lambda-input-format

    context : object
        Lambda Context runtime methods and attributes.
        Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html

    Returns
    ------
    result : dict
        API Gateway Lambda Proxy Output Format.
        Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html

    """  # noqa
    obs_id = event['id']  # TESS observation ID; Example: 'tess-s0001-1-1'

    # TODO: Calculate some of these from the 10th frame?
    # For now, also takes these and pass them onto worker:
    payload = {
        'xpos': event['xpos'],
        'ypos': event['ypos'],
        'radius': event['radius'],
        'bright_pixel_threshold': event['bright_pixel_threshold']
    }

    # Find full frame dataset for the observation ID.
    obs_table = Observations.query_criteria(obs_id=obs_id)
    products = Observations.get_product_list(obs_table)
    filtered = Observations.filter_products(products,
                                            productSubGroupDescription="FFIC",
                                            mrp_only=False)

    # Use AWS S3 bucket to pull data from.
    Observations.enable_cloud_dataset()  # TODO: verbose=False ?
    s3_urls = Observations.get_cloud_uris(filtered, include_bucket=False)

    # TODO: Timed out! Try https://docs.python.org/3/library/asyncio.html ?
    # TODO: Handle same Lambda call invoked multiple times by AWS?
    # Call tess_fullframe_worker AWS Lambda function in parallel
    # https://aws.amazon.com/blogs/compute/parallel-processing-in-python-with-aws-lambda/
    parent_connections = []
    processes = []
    data = []
    for url in s3_urls[:2]:  # TODO: Remove [:2] when done testing
        payload['key'] = url
        parent_conn, child_conn = Pipe()
        parent_connections.append(parent_conn)
        arg = json.dumps(payload)
        process = Process(target=_pipe_worker, args=(arg, child_conn))
        processes.append(process)

    for process in processes:
        process.start()

    for process in processes:
        process.join()

    for parent_connection in parent_connections:
        try:
            response = parent_connection.recv()[0]
        except EOFError:
            response = {}
        if 'body' not in response:  # Worker Lambda threw exception
            continue
        body = json.loads(response['body'])
        row = (body['midtime'], body['signal'], body['background'])
        if np.all(list(map(np.isfinite, row))):
            data.append(row)

    # TODO: Save data as table.
    # filename = f'/tmp/{obs_id}_lightcurve.csv'
    # with open(filename) as fout:
    #     for row in data:
    #         fout.write(f'{row[0]},{row[1]},{row[2]}{os.linesep}')

    # TODO: Upload table to S3 and then delete the table locally.
    # TODO: Return table S3 URL below.
    # TODO: Do we want to plot it and upload the plot too?
    #       If so, need to add matplotlib as dependency.

    return {
        "statusCode": 200,
        "body": json.dumps({
            'n_rows': len(data),
            'data_url': 'TODO'
        })
    }