コード例 #1
0
def load(config):

    # If local files exists and is less than a day old, just use it.
    cache_file = '/tmp/cached-account-names.yaml'
    caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False)
    mtime = 0
    if caching:
        try:
            mtime = os.stat(cache_file).st_mtime
        except FileNotFoundError:
            pass

    if mtime > time.time() - 86400:
        LOGGER.info("Using existing cache file: " + cache_file)
    else:
        account, role = utils.get_master(config)
        retries = utils.get_config_value(config, 'ACCOUNT_NAMES', 'RETRIES', 5)
        file = utils.get_config_value(config, 'ACCOUNT_NAMES', 'FILE', '')

        account_names = {}
        if role != '':
            # Organizations should be queried, load that first
            session = utils.assume_role(boto3.Session(), role)
            org = session.client('organizations', region_name='us-east-1')

            rsp = org.list_accounts()
            while True:
                for account in rsp['Accounts']:
                    account_names[account['Id']] = account['Name']

                if 'NextToken' in rsp:
                    for i in range(retries):
                        try:
                            rsp = org.list_accounts(NextToken=rsp['NextToken'])
                            break
                        except ClientError as e:
                            if i == retries:
                                raise e
                            sleep(0.5 + 0.1 * i)
                    continue
                break

        if file != '':
            # Update account names with file contents
            with utils.get_read_handle(file) as f:
                account_names.update(yaml.load(f, Loader=yaml.FullLoader))

        with open(cache_file, 'w') as outfile:
            yaml.dump(account_names, outfile, default_flow_style=False)

        return account_names

    with utils.get_read_handle(cache_file) as input:
        account_names = yaml.load(input, Loader=yaml.FullLoader)
        return account_names
コード例 #2
0
def load(config):

    # If local files exists and is less than a day old, just use it.
    cache_file = '/tmp/cached-unlimited-summary.csv'
    caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False)
    mtime = 0
    if caching:
        try:
            mtime = os.stat(cache_file).st_mtime
        except FileNotFoundError:
            mtime = 0

    if mtime > time.time() - 86400:
        LOGGER.info("Using existing cache file: " + cache_file)
    else:
        account, role = utils.get_master(config)
        region = utils.get_config_value(
            config, 'ATHENA', 'AWS_REGION',
            utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION',
                                   os.environ.get('AWS_DEFAULT_REGION')))
        database = utils.get_config_value(config, 'ATHENA', 'CUR_DATABASE')
        staging = utils.get_config_value(
            config, 'ATHENA', 'STAGING',
            's3://aws-athena-query-results-{0}-{1}/ariel-cur-output/'.format(
                account, region))
        days = utils.get_config_value(config, 'ATHENA', 'DAYS', 28)
        offset = utils.get_config_value(config, 'ATHENA', 'OFFSET', 1)

        session = boto3.Session()
        proto, empty, staging_bucket, staging_prefix = staging.split('/', 3)

        # Assume role if needed
        if role is not None:
            session = utils.assume_role(session, role)

        # Connect to Athena
        athena = session.client('athena', region_name=region)

        # Validate database is usable
        status_id = utils.execute_athena_query(
            athena, staging,
            'SELECT status FROM ' + database + '.cost_and_usage_data_status')

        # Row 0 is header
        status = athena.get_query_results(
            QueryExecutionId=status_id
        )['ResultSet']['Rows'][1]['Data'][0]['VarCharValue']
        if status != 'READY':
            raise Exception('Athena database not in READY status')

        # Identify start to end range query
        today = datetime.datetime.combine(datetime.datetime.today(),
                                          datetime.time.min)
        endtime = today - datetime.timedelta(days=offset)
        starttime = endtime - datetime.timedelta(days=days)

        # Download Instance and RI usage
        query = ' '.join((
            "" + "SELECT line_item_usage_account_id AS accountid ,"
            "       product_region AS region, "
            "       lower(product_instance) AS instancetypefamily, "
            "       sum(line_item_usage_amount) AS unlimitedusageamount, "
            "       sum(line_item_unblended_cost) AS unlimitedusagecost " +
            "  FROM " + database + ".cur " +
            " WHERE line_item_usage_type like '%CPUCredits:%' " +
            "   AND line_item_usage_start_date >= cast('{}' as timestamp) ".
            format(starttime.isoformat(' ')) +
            "   AND line_item_usage_start_date < cast('{}' as timestamp) ".
            format(endtime.isoformat(' ')) +
            " GROUP BY line_item_usage_account_id, product_region, lower(product_instance) "
            +
            " ORDER BY line_item_usage_account_id, product_region, lower(product_instance) "
        ).split())
        query_id = utils.execute_athena_query(athena, staging, query)
        session.client('s3').download_file(
            staging_bucket, '{0}{1}.csv'.format(staging_prefix, query_id),
            cache_file)

    result = pd.read_csv(cache_file)
    if len(result) == 0:
        result = pd.DataFrame(columns=[
            'accountid', 'region', 'instancetypefamily',
            'unlimitedusageamount', 'unlimitedusagecost'
        ])

    result['accountid'] = result['accountid'].map('{:012}'.format)
    result['unlimitedusageamount'] = result['unlimitedusageamount'].map(
        '{:.2f}'.format)
    result['unlimitedusagecost'] = result['unlimitedusagecost'].map(
        '${:,.2f}'.format)
    LOGGER.info("Loaded {} unlimited rows".format(len(result)))
    return result
コード例 #3
0
def load(config):

    # If local files exists and is less than a day old, just use it.
    cache_file = '/tmp/cached-reserved-instances.csv'
    caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False)
    mtime = 0
    if caching:
        try:
            mtime = os.stat(cache_file).st_mtime
        except FileNotFoundError:
            pass

    if mtime > time.time() - 86400:
        LOGGER.info("Using existing cache file: " + cache_file)
        ris = pd.read_csv(cache_file)
    else:
        account, role = utils.get_master(config)
        region = utils.get_config_value(
            config, 'RESERVED_INSTANCES', 'AWS_REGION',
            utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION',
                                   os.environ.get('AWS_DEFAULT_REGION')))
        # start date cannot be after 2 days ago for GetReservationUtilization
        monthend = datetime.date.today()
        monthstart = (datetime.date.today() - datetime.timedelta(days=31))

        ris = []
        if role != '':
            session = utils.assume_role(boto3.Session(), role)
            ce = session.client('ce', region_name=region)

            rsp = ce.get_reservation_utilization(TimePeriod={
                "Start": str(monthstart),
                "End": str(monthend)
            },
                                                 GroupBy=[{
                                                     "Type":
                                                     "DIMENSION",
                                                     "Key":
                                                     "SUBSCRIPTION_ID"
                                                 }])

            while True:
                groups = rsp['UtilizationsByTime'][0]['Groups']
                for row in groups:
                    # Make sure to only capture active RIs
                    endDate = datetime.datetime.strptime(
                        row['Attributes']['endDateTime'],
                        "%Y-%m-%dT%H:%M:%S.000Z")
                    if endDate.date() > datetime.date.today():
                        operatingSystem = 'Linux' if row['Attributes'][
                            'platform'] == 'Linux/UNIX' else row['Attributes'][
                                'platform']  # for CUR compatibility
                        ri = {
                            'accountid':
                            int(row['Attributes']['accountId']),
                            'accountname':
                            row['Attributes']['accountName'],
                            'reservationid':
                            row['Attributes']['leaseId'],
                            'subscriptionid':
                            row['Attributes']['subscriptionId'],
                            'startdate':
                            row['Attributes']['startDateTime'],
                            'enddate':
                            row['Attributes']['endDateTime'],
                            'state':
                            row['Attributes']['subscriptionStatus'],
                            'quantity':
                            int(row['Attributes']['numberOfInstances']),
                            'availabilityzone':
                            row['Attributes']['availabilityZone'],
                            'region':
                            row['Attributes']['region'],
                            'instancetype':
                            row['Attributes']['instanceType'],
                            'paymentoption':
                            row['Attributes']['subscriptionType'],
                            'tenancy':
                            row['Attributes']['tenancy'],
                            'operatingsystem':
                            operatingSystem,
                            'amortizedhours':
                            int(row['Utilization']['PurchasedHours']),
                            'amortizedupfrontprice':
                            float(row['Utilization']['AmortizedUpfrontFee']),
                            'amortizedrecurringfee':
                            float(row['Utilization']['AmortizedRecurringFee']),
                            'offeringclass':
                            row['Attributes']['offeringType'],
                            'scope':
                            row['Attributes']['scope'],
                        }
                        ris.append(ri)

                if 'NextToken' in rsp:
                    rsp = ce.get_reservation_utilization(
                        NextToken=rsp['NextToken'])
                    continue
                break

        ris = pd.DataFrame.from_records(ris)
        ris.to_csv(cache_file, index=False)

    LOGGER.info("Loaded {} reserved instances".format(len(ris)))
    return ris
コード例 #4
0
ファイル: get_locations.py プロジェクト: sqlheisenberg/ariel
def load(config):

    # If local files exists and is less than a day old, just use it.
    cache_file = '/tmp/cached-locations.yaml'
    caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False)
    mtime = 0
    if caching:
        try:
            mtime = os.stat(cache_file).st_mtime
        except FileNotFoundError:
            pass

    if mtime > time.time() - 86400:
        LOGGER.info("Using existing cache file: " + cache_file)
    else:
        account, role = utils.get_master(config)
        region = utils.get_config_value(
            config, 'ATHENA', 'AWS_REGION',
            utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION',
                                   os.environ.get('AWS_DEFAULT_REGION')))
        database = utils.get_config_value(config, 'ATHENA', 'CUR_DATABASE')
        staging = utils.get_config_value(
            config, 'ATHENA', 'STAGING',
            's3://aws-athena-query-results-{0}-{1}/ariel-cur-output/'.format(
                account, region))
        days = utils.get_config_value(config, 'ATHENA', 'DAYS', 28)
        offset = utils.get_config_value(config, 'ATHENA', 'OFFSET', 1)

        session = boto3.Session()
        proto, empty, staging_bucket, staging_prefix = staging.split('/', 3)

        # Assume role if needed
        if role is not None:
            session = utils.assume_role(session, role)

        # Connect to Athena
        athena = session.client('athena', region_name=region)

        # Validate database is usable
        status_id = utils.execute_athena_query(
            athena, staging,
            'SELECT status FROM ' + database + '.cost_and_usage_data_status')

        # Row 0 is header
        status = athena.get_query_results(
            QueryExecutionId=status_id
        )['ResultSet']['Rows'][1]['Data'][0]['VarCharValue']
        if status != 'READY':
            raise Exception('Athena database not in READY status')

        # Identify start to end range query
        today = datetime.datetime.combine(datetime.datetime.today(),
                                          datetime.time.min)
        endtime = today - datetime.timedelta(days=offset)
        starttime = endtime - datetime.timedelta(days=days)

        # Retrieve location to region mapping for use with ec2 pricing data
        query = ' '.join(
            ("" + "SELECT DISTINCT product_location, product_region " +
             "  FROM " + database + ".cur " +
             " WHERE line_item_usage_start_date >= cast('{}' as timestamp) ".
             format(starttime.isoformat(' ')) +
             "   AND line_item_usage_start_date < cast('{}' as timestamp) ".
             format(endtime.isoformat(' ')) +
             "   AND product_operation = 'RunInstances' ").split())
        map_id = utils.execute_athena_query(athena, staging, query)
        map_result = athena.get_query_results(
            QueryExecutionId=map_id)['ResultSet']['Rows']
        locations = {}
        for i in range(1, len(map_result)):
            row = map_result[i]['Data']
            location = row[0]['VarCharValue']
            region = row[1]['VarCharValue']
            locations[location] = region

        with open(cache_file, 'w') as outfile:
            yaml.dump(locations, outfile, default_flow_style=False)
        return locations

    with utils.get_read_handle(cache_file) as input:
        locations = yaml.load(input, Loader=yaml.FullLoader)
        return locations
コード例 #5
0
def load(config):

    # If local files exists and is less than a day old, just use it.
    cache_file = '/tmp/cached-account-instance-summary.csv'
    caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False)
    mtime = 0
    if caching:
        try:
            mtime = os.stat(cache_file).st_mtime
        except FileNotFoundError:
            mtime = 0

    if mtime > time.time() - 86400:
        LOGGER.info("Using existing cache file: " + cache_file)
    else:
        account, role = utils.get_master(config)
        region = utils.get_config_value(
            config, 'ATHENA', 'AWS_REGION',
            utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION',
                                   os.environ.get('AWS_DEFAULT_REGION')))
        database = utils.get_config_value(config, 'ATHENA', 'CUR_DATABASE')
        table_name = utils.get_config_value(config, 'ATHENA', 'CUR_TABLE_NAME',
                                            'cur')
        staging = utils.get_config_value(
            config, 'ATHENA', 'STAGING',
            's3://aws-athena-query-results-{0}-{1}/ariel-cur-output/'.format(
                account, region))
        days = utils.get_config_value(config, 'ATHENA', 'DAYS', 28)
        offset = utils.get_config_value(config, 'ATHENA', 'OFFSET', 1)

        session = boto3.Session()
        proto, empty, staging_bucket, staging_prefix = staging.split('/', 3)

        # Assume role if needed
        if role is not None:
            session = utils.assume_role(session, role)

        # Connect to Athena
        athena = session.client('athena', region_name=region)

        # Validate database is usable
        status_id = utils.execute_athena_query(
            athena, staging,
            'SELECT status FROM ' + database + '.cost_and_usage_data_status')

        # Row 0 is header
        status = athena.get_query_results(
            QueryExecutionId=status_id
        )['ResultSet']['Rows'][1]['Data'][0]['VarCharValue']
        if status != 'READY':
            raise Exception('Athena database not in READY status')

        # Identify start to end range query
        today = datetime.datetime.combine(datetime.datetime.today(),
                                          datetime.time.min)
        endtime = today - datetime.timedelta(days=offset)
        starttime = endtime - datetime.timedelta(days=days)

        # Download Instance and RI usage
        query = ' '.join((
            "" + "  WITH preprocess AS ( " +
            "       SELECT line_item_usage_start_date AS usagestartdate, " +
            "              line_item_usage_account_id AS usageaccountid, " +
            "              line_item_availability_zone AS availabilityzone, " +
            "              CASE WHEN line_item_usage_type LIKE '%:%' THEN SPLIT(line_item_usage_type, ':')[2] "
            +
            "                   WHEN line_item_line_item_description LIKE '%m1.small%' THEN 'm1.small' "
            +
            "                   WHEN line_item_line_item_description LIKE '%m1.medium%' THEN 'm1.medium' "
            +
            "                   WHEN line_item_line_item_description LIKE '%m1.large%' THEN 'm1.large' "
            +
            "                   WHEN line_item_line_item_description LIKE '%m1.xlarge%' THEN 'm1.xlarge' "
            + "                   ELSE 'm1.error' " +
            "              END AS instancetype, " +
            "              product_tenancy AS tenancy, " +
            "              product_operating_system AS operatingsystem, " +
            "              CAST(line_item_usage_amount AS double) as usageamount, "
            +
            "              CASE WHEN line_item_line_item_type = 'DiscountedUsage' THEN CAST(line_item_usage_amount AS DOUBLE) ELSE 0 END as reservedamount "
            + "         FROM " + database + "." + table_name +
            "        WHERE product_operation = 'RunInstances' " +
            "          AND line_item_availability_zone != '' " +
            "          AND line_item_availability_zone NOT LIKE '%-wlz-%' "  # Filter out Wavelength Instances.  They're not available for RIs.
            + "          AND product_tenancy = 'Shared' " + " ) " +
            "SELECT usagestartdate, usageaccountid, availabilityzone, instancetype, tenancy, operatingsystem, SUM(usageamount) as instances, SUM(reservedamount) as reserved "
            + "  FROM preprocess " +
            " WHERE usagestartdate >= cast('{}' as timestamp) ".format(
                starttime.isoformat(' ')) +
            "   AND usagestartdate < cast('{}' as timestamp) ".format(
                endtime.isoformat(' ')) +
            " GROUP BY usagestartdate, usageaccountid, availabilityzone, instancetype, tenancy, operatingsystem "
            +
            " ORDER BY usagestartdate, usageaccountid, availabilityzone, instancetype, tenancy, operatingsystem "
        ).split())
        query_id = utils.execute_athena_query(athena, staging, query)
        session.client('s3').download_file(
            staging_bucket, '{0}{1}.csv'.format(staging_prefix, query_id),
            cache_file)

    result = pd.read_csv(cache_file, parse_dates=['usagestartdate'])

    LOGGER.info("Loaded {} instance summary rows".format(len(result)))
    return result
コード例 #6
0
def load(config):

    # If local files exists and is less than a day old, just use it.
    cache_file = '/tmp/cached-unlimited-summary.csv'
    caching = utils.get_config_value(config, 'DEFAULTS', 'CACHING', False)
    mtime = 0
    if caching:
        try:
            mtime = os.stat(cache_file).st_mtime
        except FileNotFoundError:
            mtime = 0

    if mtime > time.time() - 86400:
        LOGGER.info("Using existing cache file: " + cache_file)
    else:
        account, role = utils.get_master(config)
        region = utils.get_config_value(
            config, 'ATHENA', 'AWS_REGION',
            utils.get_config_value(config, 'DEFAULTS', 'AWS_REGION',
                                   os.environ.get('AWS_DEFAULT_REGION')))
        database = utils.get_config_value(config, 'ATHENA', 'CUR_DATABASE')
        table_name = utils.get_config_value(config, 'ATHENA', 'CUR_TABLE_NAME',
                                            'cur')
        staging = utils.get_config_value(
            config, 'ATHENA', 'STAGING',
            's3://aws-athena-query-results-{0}-{1}/ariel-cur-output/'.format(
                account, region))
        days = utils.get_config_value(config, 'ATHENA', 'DAYS', 28)
        offset = utils.get_config_value(config, 'ATHENA', 'OFFSET', 1)

        session = boto3.Session()
        proto, empty, staging_bucket, staging_prefix = staging.split('/', 3)

        # Assume role if needed
        if role is not None:
            session = utils.assume_role(session, role)

        # Connect to Athena
        athena = session.client('athena', region_name=region)

        # Validate database is usable
        status_id = utils.execute_athena_query(
            athena, staging,
            'SELECT status FROM ' + database + '.cost_and_usage_data_status')

        # Row 0 is header
        status = athena.get_query_results(
            QueryExecutionId=status_id
        )['ResultSet']['Rows'][1]['Data'][0]['VarCharValue']
        if status != 'READY':
            raise Exception('Athena database not in READY status')

        # Identify start to end range query
        today = datetime.datetime.combine(datetime.datetime.today(),
                                          datetime.time.min)
        endtime = today - datetime.timedelta(days=offset)
        starttime = endtime - datetime.timedelta(days=days)

        # Download Instance and RI usage
        # meckstmd: 07/30/2019 - Something must have changed with the way AWS is exposing CPU Credits.
        #  There is a line_item_line_item_type column of Tax for each account which has CPU Credits which
        #  does not have a product_region or product_instance.  Because these fields are empty, Ariel
        #  fails when trying to insert this report data into the unlimited_usage DB table because it does
        #  not allow nulls.  The line_item_line_item_type column of Usage in this report has the per-instance
        #  CPU credits for unlimited and does have product_region and product_instance.  I am guessing the
        #  Tax one was just added to this report and that is what broke Ariel.
        #  See https://github.com/yahoo/ariel/issues/5
        query = ' '.join((
            "" + "SELECT line_item_usage_account_id AS accountid ,"
            "       product_region AS region, "
            "       lower(product_instance) AS instancetypefamily, "
            "       sum(line_item_usage_amount) AS unlimitedusageamount, "
            "       sum(line_item_unblended_cost) AS unlimitedusagecost " +
            "  FROM " + database + "." + table_name +
            " WHERE line_item_usage_type like '%CPUCredits:%' " +
            "   AND line_item_usage_start_date >= cast('{}' as timestamp) ".
            format(starttime.isoformat(' ')) +
            "   AND line_item_usage_start_date < cast('{}' as timestamp) ".
            format(endtime.isoformat(' ')) +
            "   AND product_region <> '' AND product_instance <> ''" +
            " GROUP BY line_item_usage_account_id, product_region, lower(product_instance) "
            +
            " ORDER BY line_item_usage_account_id, product_region, lower(product_instance) "
        ).split())
        query_id = utils.execute_athena_query(athena, staging, query)
        session.client('s3').download_file(
            staging_bucket, '{0}{1}.csv'.format(staging_prefix, query_id),
            cache_file)

    result = pd.read_csv(cache_file)
    if len(result) == 0:
        result = pd.DataFrame(columns=[
            'accountid', 'region', 'instancetypefamily',
            'unlimitedusageamount', 'unlimitedusagecost'
        ])

    result['accountid'] = result['accountid'].map('{:012}'.format)
    result['unlimitedusageamount'] = result['unlimitedusageamount'].map(
        '{:.2f}'.format)
    result['unlimitedusagecost'] = result['unlimitedusagecost'].map(
        '${:,.2f}'.format)
    LOGGER.info("Loaded {} unlimited rows".format(len(result)))
    return result