Esempio n. 1
0
    def process_envs(self, envs, dtime_from, dtime_to):
        envs_ids = list(set([env['id'] for env in envs]))
        try:
            self.load_access_token(envs[0])

            subscription_id = envs[0]['azure.subscription_id']
            access_token = envs[0]['azure.access_token']

            begin_of_month = dtime_from.replace(day=1, hour=0)
            meters_ids_usage = self.get_meters_ids_usage(subscription_id, access_token,
                                                         begin_of_month, dtime_from)

            for rows in self.get_usage(subscription_id, access_token, dtime_from, dtime_to):
                records = self.get_records(rows)
                records = [record for record in records
                           if not (record.get('env_id') and record['env_id'] not in envs_ids)]
                self.load_records_data(records)
                records = [record for record in records if record['env_id'] in envs_ids]
                for record in records:
                    meters_ids_usage.setdefault(record['meter_id'], {}).setdefault(record['dtime'].month, 0.0)
                    self.set_cost(record, subscription_id, access_token,
                                  meters_ids_usage[record['meter_id']][record['dtime'].month])
                    meters_ids_usage[record['meter_id']][record['dtime'].month] += record['quantity']
                for chunk in helper.chunks(records, insert_chunk_size):
                    self.pool.wait()
                    self.pool.apply_async(self.analytics.insert_records,
                                          (chunk,),
                                          {'callback': self.on_insert_records})
                    gevent.sleep(0)  # force switch
        except:
            msg = 'Azure billing for environments {} failed'
            msg = msg.format(envs_ids)
            helper.handle_error(message=msg)
Esempio n. 2
0
    def __call__(self):
        try:
            dtime_from, dtime_to = self.get_billing_interval()
            msg = 'AWS billing interval: {} - {}'.format(dtime_from, dtime_to)
            LOG.info(msg)

            self._create_cache_dir()

            aws_accounts_ids = self.analytics.load_aws_accounts_ids()
            for chunk in helper.chunks(aws_accounts_ids, 100):
                envs = self.analytics.load_aws_accounts_ids_envs(chunk)
                envs = [env for env in envs if env.get('ec2.is_enabled', '0') == '1']
                self.analytics.load_env_credentials(envs, platform='ec2')
                envs = [env for env in envs if
                        env.get('ec2.detailed_billing.enabled', '0') == '1' and
                        env.get('ec2.detailed_billing.payer_account') in (None, '')]
                if not envs:
                    continue
                self._wait_pool()
                self.pool.apply_async(self.process_envs, args=(envs, dtime_from, dtime_to))

            aws_payers_accounts = self.analytics.load_aws_payers_accounts()
            for chunk in helper.chunks(aws_payers_accounts, 100):
                envs = self.analytics.load_aws_payers_accounts_envs(chunk)
                envs = [env for env in envs if env.get('ec2.is_enabled', '0') == '1']
                self.analytics.load_env_credentials(envs, platform='ec2')
                envs = [env for env in envs if
                        env.get('ec2.detailed_billing.enabled', '0') == '1']
                if not envs:
                    continue
                self._wait_pool()
                self.pool.apply_async(self.process_envs, args=(envs, dtime_from, dtime_to))

            self.pool.join()
        except:
            self.pool.kill()
            helper.handle_error(message='AWS billing failed')
            raise
        finally:
            self.downloading_locks = {}
            try:
                self._remove_cache_dir()
            except:
                msg = 'Unable to remove cache dir {}'
                msg = msg.format(self.cache_dir)
                helper.handle_error(message=msg, level='error')
Esempio n. 3
0
def sort_nodes(cloud_data, cred, envs_ids):
    platform = cred.platform

    # gce
    if platform == 'gce':
        query = ("SELECT server_id "
                 "FROM servers_history "
                 "WHERE server_id IN ({})")
        for region_data in cloud_data:
            region_data['managed'] = list()
            region_data['not_managed'] = list()
            servers_ids = [
                str(node['server_id']) for node in region_data['nodes']
                if node.get('server_id')
            ]
            if servers_ids:
                results = [
                    result['server_id'] for result in app.scalr_db.execute(
                        query.format(str(servers_ids)[1:-1]))
                ]
            else:
                results = []
            for node in region_data['nodes']:
                if node.get('server_id') and node[
                        'server_id'] in results and node['env_id'] in envs_ids:
                    region_data['managed'].append(node)
                else:
                    region_data['not_managed'].append(node)
            del region_data['nodes']
        return cloud_data

    # all platforms exclude gce
    url_key = analytics.url_key_map[platform]
    url = cred[url_key] if url_key else ''
    for region_data in cloud_data:
        cloud_location = region_data['region']
        for chunk in helper.chunks(region_data['nodes'], 200):
            app.analytics.get_server_id_by_instance_id(chunk,
                                                       platform,
                                                       cloud_location,
                                                       envs_ids=envs_ids,
                                                       url=url)
        region_data['managed'] = list()
        region_data['not_managed'] = list()
        for node in region_data['nodes']:
            if 'server_id' in node:
                region_data['managed'].append(node)
            else:
                region_data['not_managed'].append(node)
        del region_data['nodes']

    return cloud_data
Esempio n. 4
0
def sort_nodes(cloud_data, cred, envs_ids):
    platform = cred.platform

    # gce
    if platform == 'gce':
        query = (
            "SELECT server_id "
            "FROM servers_history "
            "WHERE server_id IN ({})"
        )
        for region_data in cloud_data:
            region_data['managed'] = list()
            region_data['not_managed'] = list()
            servers_ids = [str(node['server_id']) for node in
                           region_data['nodes'] if node.get('server_id')]
            if servers_ids:
                results = [result['server_id'] for result in
                           app.scalr_db.execute(query.format(str(servers_ids)[1:-1]))]
            else:
                results = []
            for node in region_data['nodes']:
                if node.get('server_id') and node['server_id'] in results and node['env_id'] in envs_ids:
                    region_data['managed'].append(node)
                else:
                    region_data['not_managed'].append(node)
            del region_data['nodes']
        return cloud_data

    # all platforms exclude gce
    url_key = analytics.url_key_map[platform]
    url = cred[url_key] if url_key else ''
    for region_data in cloud_data:
        cloud_location = region_data['region']
        for chunk in helper.chunks(region_data['nodes'], 200):
            app.analytics.get_server_id_by_instance_id(chunk, platform, cloud_location,
                                                       envs_ids=envs_ids, url=url)
        region_data['managed'] = list()
        region_data['not_managed'] = list()
        for node in region_data['nodes']:
            if 'server_id' in node:
                region_data['managed'].append(node)
            else:
                region_data['not_managed'].append(node)
        del region_data['nodes']

    return cloud_data
Esempio n. 5
0
def sort_nodes(cloud_data, cred, envs_ids):
    platform = cred.platform

    # gce
    if platform == 'gce':
        query = ("SELECT EXISTS "
                 "(SELECT 1 FROM servers s "
                 "JOIN servers_history h "
                 "ON s.server_id=h.server_id "
                 "WHERE s.server_id='{server_id}') AS value")
        for region_data in cloud_data:
            region_data['managed'] = list()
            region_data['not_managed'] = list()
            for node in region_data['nodes']:
                if node.get('server_id', '') and \
                        app.scalr_db.execute(query.format(**node))[0]['value'] and \
                        node['env_id'] in envs_ids:
                    region_data['managed'].append(node)
                else:
                    region_data['not_managed'].append(node)
            del region_data['nodes']
        return cloud_data

    # all platforms exclude gce
    url_key = analytics.url_key_map[platform]
    url = cred[url_key] if url_key else ''
    for region_data in cloud_data:
        cloud_location = region_data['region']
        for chunk in helper.chunks(region_data['nodes'], 200):
            app.analytics.get_server_id_by_instance_id(chunk,
                                                       platform,
                                                       cloud_location,
                                                       envs_ids=envs_ids,
                                                       url=url)
        region_data['managed'] = list()
        region_data['not_managed'] = list()
        for node in region_data['nodes']:
            if 'server_id' in node:
                region_data['managed'].append(node)
            else:
                region_data['not_managed'].append(node)
        del region_data['nodes']

    return cloud_data
Esempio n. 6
0
def sort_nodes(cloud_data, cred, envs_ids):
    platform = cred.platform

    # gce
    if platform == 'gce':
        query = (
            "SELECT EXISTS "
            "(SELECT 1 FROM servers s "
            "JOIN servers_history h "
            "ON s.server_id=h.server_id "
            "WHERE s.server_id='{server_id}') AS value"
        )
        for region_data in cloud_data:
            region_data['managed'] = list()
            region_data['not_managed'] = list()
            for node in region_data['nodes']:
                if node.get('server_id', '') and \
                        app.scalr_db.execute(query.format(**node))[0]['value'] and \
                        node['env_id'] in envs_ids:
                    region_data['managed'].append(node)
                else:
                    region_data['not_managed'].append(node)
            del region_data['nodes']
        return cloud_data

    # all platforms exclude gce
    url_key = analytics.url_key_map[platform]
    url = cred[url_key] if url_key else ''
    for region_data in cloud_data:
        cloud_location = region_data['region']
        for chunk in helper.chunks(region_data['nodes'], 200):
            app.analytics.get_server_id_by_instance_id(chunk, envs_ids, platform,
                                                       cloud_location, url)
        region_data['managed'] = list()
        region_data['not_managed'] = list()
        for node in region_data['nodes']:
            if 'server_id' in node:
                region_data['managed'].append(node)
            else:
                region_data['not_managed'].append(node)
        del region_data['nodes']

    return cloud_data
Esempio n. 7
0
    def process_csv_file(self, csv_file, envs, dtime_from=None, dtime_to=None):
        envs_ids = list(set(int(env['id']) for env in envs))
        for rows in self.csv_reader(csv_file, envs, dtime_from=dtime_from, dtime_to=dtime_to):
            records = self.get_records(rows)
            self.fix_records_with_missing_server_id(records)
            records = [r for r in records if
                       r.get('server_id') and
                       not (r.get('env_id') and r['env_id'] not in envs_ids) and
                       not (r['cost_distr_type'] == 1 and self.analytics.record_exists(r))]
            self.load_records_data(records)
            records = [record for record in records if record['env_id'] in envs_ids]

            # remove duplicates record with same record_id
            records = {record['record_id']: record for record in records}.values()

            for chunk in helper.chunks(records, insert_chunk_size):
                self.pool.wait()
                self.pool.apply_async(self.analytics.insert_records, (chunk,),
                                      {'callback': self.on_insert_records})
                gevent.sleep(0)  # force switch
Esempio n. 8
0
    def __call__(self):
        try:
            dtime_from, dtime_to = self.get_billing_interval()
            msg = 'Azure billing interval: {} - {}'.format(dtime_from, dtime_to)
            LOG.info(msg)

            azure_subscriptions_ids = self.analytics.load_azure_subscriptions_ids()
            for chunk in helper.chunks(azure_subscriptions_ids, 100):
                envs = self.analytics.load_azure_subscriptions_ids_envs(chunk)
                self.analytics.load_env_credentials(envs, platform='azure')
                if not envs:
                    continue
                self._wait_pool()
                self.pool.apply_async(self.process_envs, args=(envs, dtime_from, dtime_to))

            self.pool.join()
        except:
            self.pool.kill()
            helper.handle_error(message='Azure billing failed')
            raise
Esempio n. 9
0
    def __call__(self):
        try:
            dtime_from, dtime_to = self.get_billing_interval()
            LOG.info('Scalr Poller billing interval: {} - {}'.format(dtime_from, dtime_to))

            dtime_cur = dtime_from
            while dtime_cur <= dtime_to:
                date, hour = dtime_cur.date(), dtime_cur.hour
                for platform in self.config['platform']:
                    try:
                        msg = "Process Scalr Poller data, date {}, hour {}, platform '{}'"
                        msg = msg.format(date, hour, platform)
                        LOG.debug(msg)
                        for records in self.analytics.get_poller_servers(date, hour, platform=platform):
                            LOG.debug('Scalr Poller records for processing: {}'.format(len(records)))
                            prices = self.analytics.get_prices(records)
                            for record in records:
                                cost = self.analytics.get_cost_from_prices(record, prices) or 0
                                record['cost'] = cost
                                record['num'] = 1.0
                                record['cost_distr_type'] = 1
                            for chunk in helper.chunks(records, insert_chunk_size):
                                self.pool.wait()
                                self.pool.apply_async(self.analytics.insert_records, (chunk,),
                                                      {'callback': self.on_insert_records})
                                gevent.sleep(0)  # force switch
                    except:
                        msg = "Scalr Poller billing unable to process date {}, hour {}, platform '{}'"
                        msg = msg.format(date, hour, platform)
                        helper.handle_error(message=msg)
                self.pool.join()
                dtime_cur += datetime.timedelta(hours=1)
        except:
            self.pool.kill()
            helper.handle_error(message='Scalr Poller billing failed')
            raise
Esempio n. 10
0
    def delete_data(self, csv_file, envs, period):
        envs_ids = list(set(int(env['id']) for env in envs))
        dtime_from, dtime_to = period

        msg = 'Deleting AWS detailed billing data for environments: {}, period: {} - {}'
        msg = msg.format(envs_ids, dtime_from, dtime_to)
        LOG.info(msg)

        with self.analytics.lock:
            self.analytics.analytics_db.autocommit(False)
            try:
                # aws_billing_records
                for rows in self.csv_reader(csv_file, envs, dtime_from=dtime_from, dtime_to=dtime_to):
                    records_ids = [row['RecordId'] for row in rows]
                    for chunk in helper.chunks(records_ids, 1000):
                        if chunk:
                            query = (
                                "DELETE FROM aws_billing_records "
                                "WHERE record_id IN ({record_id})"
                            ).format(record_id=str(chunk)[1:-1])
                            self.analytics.analytics_db.execute(query)

                _dtime_from = dtime_from
                step_days = 15
                while _dtime_from < dtime_to:
                    _dtime_to = min(_dtime_from + datetime.timedelta(days=step_days), dtime_to)

                    # usage_servers_h, usage_h
                    query = (
                        "DELETE uh, us "
                        "FROM usage_h uh "
                        "LEFT JOIN usage_servers_h us ON uh.usage_id=us.usage_id "
                        "WHERE uh.platform='ec2' "
                        "AND uh.dtime BETWEEN '{dtime_from}' AND '{dtime_to}' "
                        "AND uh.env_id IN ({env_id})"
                    ).format(env_id=str(envs_ids)[1:-1], dtime_from=_dtime_from, dtime_to=_dtime_to)
                    self.analytics.analytics_db.execute(query)

                    # usage_d
                    query = (
                        "DELETE FROM usage_d "
                        "WHERE platform='ec2' "
                        "AND date BETWEEN '{date_from}' AND '{date_to}' "
                        "AND env_id IN ({env_id})"
                    ).format(env_id=str(envs_ids)[1:-1], date_from=_dtime_from.date(), date_to=_dtime_to.date())
                    self.analytics.analytics_db.execute(query)

                    # farm_usage_d
                    query = (
                        "DELETE FROM farm_usage_d "
                        "WHERE platform='ec2' "
                        "AND date BETWEEN '{date_from}' AND '{date_to}' "
                        "AND env_id IN ({env_id})"
                    ).format(env_id=str(envs_ids)[1:-1], date_from=_dtime_from.date(), date_to=_dtime_to.date())
                    self.analytics.analytics_db.execute(query)
                    _dtime_from += datetime.timedelta(days=step_days)

                self.analytics.analytics_db.commit()
            except:
                self.analytics.analytics_db.rollback()
                raise
            finally:
                self.analytics.analytics_db.autocommit(True)