Example #1
0
def get_report_details(report_directory):
    """
    Get OCP usage report details from manifest file.

    Date range is aligned on the first day of the current
    month and ends on the first day of the next month from the
    specified date.

    Args:
        report_directory (String): base directory for report.

    Returns:
        (Dict): keys: value
            "file: String,
             cluster_id: String,
             payload_date: DateTime,
             manifest_path: String,
             uuid: String,
             manifest_path: String"

    """
    manifest_path = "{}/{}".format(report_directory, "manifest.json")

    payload_dict = {}
    try:
        with open(manifest_path) as file:
            payload_dict = json.load(file)
            payload_dict["date"] = parser.parse(payload_dict["date"])
            payload_dict["manifest_path"] = manifest_path
    except (OSError, IOError, KeyError):
        LOG.error("Unable to extract manifest data")

    return payload_dict
Example #2
0
def upload_to_azure_container(storage_file_name, local_path,
                              storage_file_path):
    """Upload data to a storage account.

    Args:
        storage_file_name (String): The container to upload file to
        local_path  (String): The full local file system path of the file
        storage_file_path (String): The file path to upload to within container

    Returns:
        (Boolean): True if file was uploaded

    """
    try:
        # Retrieve the connection string for use with the application.
        connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
        blob_service_client = BlobServiceClient.from_connection_string(
            connect_str)
        blob_client = blob_service_client.get_blob_client(
            container=storage_file_name, blob=storage_file_path)
        with open(local_path, "rb") as data:
            blob_client.upload_blob(data=data)
        LOG.info(f"uploaded {storage_file_name} to {storage_file_path}")
    except (CloudError, ClientException, IOError) as error:
        LOG.error(error)
        traceback.print_exc(file=sys.stderr)
        return False
    return True
Example #3
0
def copy_to_local_dir(local_dir_home, local_path, local_file_path=None):
    """Upload data to an local directory.

    Args:
        local_dir_home (String): Local file path representing the bucket
        local_path  (String): The local file system path of the file
        local_file_path (String): The path to store the file to
    Returns:
        (Boolean): True if file was uploaded

    """
    if not os.path.isdir(local_dir_home):
        LOG.info(
            f"Path does not exist for the local directory: {local_dir_home}")
        return False
    full_bucket_path = local_dir_home
    outpath = local_path
    if local_file_path:
        full_bucket_path = f"{local_dir_home}/{local_file_path}"
        outpath = local_file_path
    os.makedirs(os.path.dirname(full_bucket_path), exist_ok=True)
    shutil.copyfile(local_path, full_bucket_path)
    msg = f"Copied {outpath} to local directory {local_dir_home}."
    LOG.info(msg)
    return True
Example #4
0
def _write_csv(output_file, data, header):
    """Output csv file data."""
    LOG.info(f"Writing to {output_file.split('/')[-1]}")
    with open(output_file, "w") as file:
        writer = csv.DictWriter(file, fieldnames=header)
        writer.writeheader()
        for row in data:
            writer.writerow(row)
Example #5
0
def _write_jsonl(output_file, data):
    """Output JSON Lines file data for bigquery."""
    LOG.info(f"Writing to {output_file.split('/')[-1]}")
    with open(output_file, "w") as file:
        for row in data:
            json.dump(row, file)
            # each dictionary "row" is its own line in a JSONL file
            file.write("\n")
Example #6
0
def _remove_files(file_list):
    """Remove files."""
    for file_path in file_list:
        try:
            os.remove(file_path)
        except FileNotFoundError:
            LOG.error(f"File {file_path} was not found.")
            raise FileNotFoundError
Example #7
0
    def __init__(self, start_date, end_date, user_config=None):
        """Initialize the generator."""
        if not self.TEMPLATE:
            raise AttributeError("Class attribute 'TEMPLATE' must be defined.")

        if not self.TEMPLATE_KWARGS:
            raise AttributeError(
                "Class attribute 'TEMPLATE_KWARGS' must be defined.")

        LOADER_LIST.append(PackageLoader("nise"))
        if user_config:
            LOADER_LIST.append(
                FileSystemLoader(os.path.abspath(
                    os.path.dirname(user_config))))

        env = Environment(loader=ChoiceLoader(LOADER_LIST))
        env.globals["faker"] = faker_passthrough

        default_template = env.get_template(self.TEMPLATE)
        if user_config:
            user_template = env.get_template(os.path.basename(user_config))
            user_yaml = load_yaml(user_template.render(**self.TEMPLATE_KWARGS))

            # sort lists of dicts so that generator class names align.
            generators = user_yaml.get("generators")
            user_yaml["generators"] = sorted(generators,
                                             key=lambda d: list(d.keys()))

            default_yaml = load_yaml(
                default_template.render(**self.TEMPLATE_KWARGS))
            config = deepupdate(
                default_yaml, user_yaml
            )  # merge user-supplied static file with base template
        else:
            config = load_yaml(default_template.render(**self.TEMPLATE_KWARGS))

        # handle special-cases in YAML config syntax
        config = self._format_config(config)

        # remove top-level class name
        self.config = []
        for generators in config.get("generators"):
            for key, val in generators.items():
                if key == type(self).__name__:
                    self.config.append(val)

        self.start_date = self._set_date_config(start_date, "start")
        self.end_date = self._set_date_config(end_date, "end")
        self.hours = self._set_hours()
        self.days = self._set_days()

        LOG.debug("Current config: %s", pformat(self.config))

        super().__init__()
Example #8
0
def run(provider_type, options):
    """Run nise."""
    LOG.info("Creating reports...")
    if provider_type == "aws":
        aws_create_report(options)
    elif provider_type == "azure":
        azure_create_report(options)
    elif provider_type == "ocp":
        ocp_create_report(options)
    elif provider_type == "gcp":
        gcp_create_report(options)
Example #9
0
def load_static_report_data(options):
    """Load and set start and end dates if static file is provided."""
    if not options.get("static_report_file"):
        options["start_date"] = datetime.now().replace(day=1,
                                                       hour=0,
                                                       minute=0,
                                                       second=0,
                                                       microsecond=0)
        options["end_date"] = datetime.now().replace(hour=0,
                                                     minute=0,
                                                     second=0,
                                                     microsecond=0)
        return options

    LOG.info("Loading static data...")
    start_dates = {}
    end_dates = {}
    static_report_data = load_yaml(options.get("static_report_file"))
    for generator_dict in static_report_data.get("generators"):
        for genname, attributes in generator_dict.items():

            generated_start_date = calculate_start_date(
                attributes.get("start_date"))
            start_dates[genname] = generated_start_date

            if attributes.get("end_date"):
                generated_end_date = calculate_end_date(
                    generated_start_date, attributes.get("end_date"))
            else:
                generated_end_date = today()

            if options.get("provider") == "azure":
                generated_end_date += timedelta(hours=24)
            else:
                generated_end_date = generated_end_date.replace(hour=23,
                                                                minute=59)
            end_dates[genname] = generated_end_date

    options["gen_starts"] = start_dates
    options["gen_ends"] = end_dates

    options["start_date"] = min(start_dates.values())
    latest_date = max(end_dates.values())
    last_day_of_month = calendar.monthrange(year=latest_date.year,
                                            month=latest_date.month)[1]
    if latest_date.month == datetime.now(
    ).month and latest_date.year == datetime.now().year:
        last_day_of_month = datetime.now(
        ).day  # don't generate date into the future.
    options["end_date"] = latest_date.replace(day=last_day_of_month,
                                              hour=0,
                                              minute=0)

    return options
Example #10
0
def _load_static_report_data(options):
    """Validate/load and set start_date if static file is provided."""
    if not options.get("static_report_file"):
        return

    static_file = options.get("static_report_file")
    if not os.path.exists(static_file):
        LOG.error(f"file does not exist: '{static_file}'")
        sys.exit()

    LOG.info("Loading static data...")
    aws_tags = set()
    start_dates = []
    end_dates = []
    static_report_data = load_yaml(static_file)
    for generator_dict in static_report_data.get("generators"):
        for _, attributes in generator_dict.items():
            start_date = get_start_date(attributes, options)
            generated_start_date = calculate_start_date(start_date)
            start_dates.append(generated_start_date)

            if attributes.get("end_date"):
                generated_end_date = calculate_end_date(
                    generated_start_date, attributes.get("end_date"))
            elif options.get("end_date") and options.get(
                    "end_date").date() != today().date():
                generated_end_date = calculate_end_date(
                    generated_start_date, options.get("end_date"))
            else:
                generated_end_date = today()
            if options.get("provider") == "azure":
                generated_end_date += datetime.timedelta(hours=24)
            end_dates.append(generated_end_date)

            attributes["start_date"] = str(generated_start_date)
            attributes["end_date"] = str(generated_end_date)

            if options.get("provider") == "aws":
                aws_tags.update(attributes.get("tags", {}).keys())

    options["start_date"] = min(start_dates)
    latest_date = max(end_dates)
    last_day_of_month = calendar.monthrange(year=latest_date.year,
                                            month=latest_date.month)[1]
    options["end_date"] = latest_date.replace(day=last_day_of_month,
                                              hour=0,
                                              minute=0)
    options["static_report_data"] = static_report_data

    if options.get("provider") == "aws" and aws_tags:
        options["aws_tags"] = aws_tags

    return True
Example #11
0
def run(provider_type, options):
    """Run nise."""
    static_data_bool = _load_static_report_data(options)
    if not options.get("start_date"):
        raise NiseError("'start_date' is required in static files.")
    if not static_data_bool:
        fix_dates(options, provider_type)

    LOG.info("Creating reports...")
    if provider_type == "aws":
        aws_create_report(options)
    elif provider_type == "azure":
        azure_create_report(options)
    elif provider_type == "ocp":
        ocp_create_report(options)
    elif provider_type == "gcp":
        gcp_create_report(options)
Example #12
0
def post_payload_to_ingest_service(insights_upload, local_path):
    """POST the payload to Insights via header or basic auth."""
    insights_account_id = os.environ.get("INSIGHTS_ACCOUNT_ID")
    insights_org_id = os.environ.get("INSIGHTS_ORG_ID")
    insights_user = os.environ.get("INSIGHTS_USER")
    insights_password = os.environ.get("INSIGHTS_PASSWORD")
    if os.path.isfile(local_path):
        file_info = os.stat(local_path)
        filesize = _convert_bytes(file_info.st_size)
    LOG.info(f"Upload File: ({local_path}) filesize is {filesize}.")
    with open(local_path, "rb") as upload_file:
        if insights_account_id and insights_org_id:
            header = {
                "identity": {
                    "account_number": insights_account_id,
                    "internal": {
                        "org_id": insights_org_id
                    }
                }
            }
            headers = {
                "x-rh-identity":
                base64.b64encode(json.dumps(header).encode("UTF-8"))
            }
            return requests.post(
                insights_upload,
                data={},
                files={
                    "file": ("payload.tar.gz", upload_file,
                             "application/vnd.redhat.hccm.tar+tgz")
                },
                headers=headers,
            )

        return requests.post(
            insights_upload,
            data={},
            files={
                "file": ("payload.tar.gz", upload_file,
                         "application/vnd.redhat.hccm.tar+tgz")
            },
            auth=(insights_user, insights_password),
            verify=False,
        )
Example #13
0
def main():
    """Run data generation program."""
    parser = create_parser()
    args = parser.parse_args()
    if args.log_level:
        LOG.setLevel(LOG_VERBOSITY[args.log_level])
    if not args.command:
        parser.error('"yaml" or "report" argument must be specified')
    elif args.command == "yaml":
        yaml_main(args)
        return
    options = vars(args)
    LOG.debug("Options are: %s", pformat(options))

    if not (options.get("start_date") or options.get("static_report_file")):
        parser.error("the following arguments are required: -s, --start-date")

    _, provider_type = _validate_provider_inputs(parser, options)

    run(provider_type, options)
Example #14
0
def upload_to_s3(bucket_name, bucket_file_path, local_path):
    """Upload data to an S3 bucket.

    Args:
        bucket_name (String): The name of the S3 bucket
        bucket_file_path (String): The path to store the file to
        local_path  (String): The local file system path of the file
    Returns:
        (Boolean): True if file was uploaded

    """
    uploaded = True
    try:
        s3_client = boto3.resource("s3")
        s3_client.Bucket(bucket_name).upload_file(local_path, bucket_file_path)
        msg = f"Uploaded {bucket_file_path} to s3 bucket {bucket_name}."
        LOG.info(msg)
    except (ClientError, BotoConnectionError,
            boto3.exceptions.S3UploadFailedError) as upload_err:
        LOG.error(upload_err)
        uploaded = False
    return uploaded
Example #15
0
def oci_create_report(options):
    """Create cost and usage report files."""
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    fake = Faker()
    attributes = {}
    attributes["tenant_id"] = f"ocid1.tenancy.oc1..{fake.pystr(min_chars=20, max_chars=50)}"
    generators = [
        {"generator": OCIComputeGenerator, "attributes": attributes},
        {"generator": OCINetworkGenerator, "attributes": attributes},
        {"generator": OCIBlockStorageGenerator, "attributes": attributes},
        {"generator": OCIDatabaseGenerator, "attributes": attributes},
    ]
    months = _create_month_list(start_date, end_date)
    currency = default_currency(options.get("currency"), static_currency=None)
    # write_monthly = options.get("write_monthly", False)
    file_number = 0

    for month in months:
        data = {OCI_COST_REPORT: [], OCI_USAGE_REPORT: []}
        monthly_files = []

        for report_type in OCI_REPORT_TYPE_TO_COLS:
            LOG.info(f"Generating data for OCI for {month.get('name')}")

            for generator in generators:
                generator_cls = generator.get("generator")
                attributes = generator.get("attributes")
                gen_start_date = month.get("start")
                gen_end_date = month.get("end")
                gen = generator_cls(gen_start_date, gen_end_date, currency, report_type, attributes)

                for hour in gen.generate_data(report_type=report_type):
                    data[report_type] += [hour]

            month_output_file = write_oci_file(report_type, file_number, data[report_type], options)
            monthly_files.append(month_output_file)
        file_number += 1
Example #16
0
def _gcp_bigquery_process(
    start_date, end_date, currency, projects, generators, options, gcp_bucket_name, gcp_dataset_name, gcp_table_name
):

    data = []
    for project in projects:
        num_gens = len(generators)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(f"Producing data for {num_gens} generators for start: {start_date} and end: {end_date}.")
        for count, generator in enumerate(generators):
            attributes = generator.get("attributes", {})
            if attributes:
                start_date = attributes.get("start_date")
                end_date = attributes.get("end_date")

            generator_cls = generator.get("generator")
            gen = generator_cls(start_date, end_date, currency, project, attributes=attributes)
            for hour in gen.generate_data():
                data += [hour]
            count += 1
            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

    monthly_files = []
    local_file_path, output_file_name = write_gcp_file_jsonl(start_date, end_date, data, options)
    monthly_files.append(local_file_path)

    if gcp_bucket_name:
        gcp_route_file(gcp_bucket_name, local_file_path, output_file_name)

    if not gcp_table_name:
        etag = options.get("gcp_etag") if options.get("gcp_etag") else str(uuid4())
        gcp_table_name = f"gcp_billing_export_{etag}"
    gcp_bucket_to_dataset(gcp_bucket_name, output_file_name, gcp_dataset_name, gcp_table_name)

    return monthly_files
Example #17
0
def ocp_route_file(insights_upload, local_path):
    """Route file to either Upload Service or local filesystem."""
    if os.path.isdir(insights_upload):
        extract_payload(insights_upload, local_path)
    else:
        response = post_payload_to_ingest_service(insights_upload, local_path)
        if response.status_code == 202:
            LOG.info("File uploaded successfully.")
        else:
            LOG.error(f"{response.status_code} File upload failed.")

        LOG.info(response.text)
Example #18
0
def replace_args(args, yaml, provider, ocp_on_cloud):
    """Replace appropriate file paths in args."""
    if not yaml:
        raise KeyError(
            f"Options YAML error: {provider} is not defined under {ocp_on_cloud}"
        )
    from nise.yaml_gen import STATIC_DIR

    args.provider = provider

    if yaml.get(f"{provider}-output-filename"):
        args.output_file_name = yaml.get(f"{provider}-output-filename")
    else:
        LOG.info(
            f"Output file not defined for {provider} under {ocp_on_cloud}. Writing to '{ocp_on_cloud}_{provider}.yml'."
        )
        args.output_file_name = f"{ocp_on_cloud}_{provider}.yml"

    if args.default:
        template_file_name = os.path.join(STATIC_DIR,
                                          yaml.get(f"{provider}-template"))
        config_file_name = os.path.join(STATIC_DIR,
                                        yaml.get(f"{provider}-gen-config"))
    else:
        template_file_name = yaml.get(f"{provider}-template")
        config_file_name = yaml.get(f"{provider}-gen-config")

    if template_file_name:
        args.template_file_name = template_file_name
    else:
        LOG.info(
            f"Template not defined for {provider} under {ocp_on_cloud}. Using default template."
        )
        args.template_file_name = os.path.join(
            STATIC_DIR, f"{provider}_static_data.yml.j2")
    if config_file_name:
        args.config_file_name = config_file_name
    else:
        LOG.info(
            f"Configuration not defined for {provider} under {ocp_on_cloud}. Using default configuration."
        )
        args.config_file_name = None
Example #19
0
def upload_to_gcp_storage(bucket_name, source_file_name,
                          destination_blob_name):
    """
    Upload data to a GCP Storage Bucket.

    Args:
        bucket_name (String): The container to upload file to
        source_file_name  (String): The full local file system path of the file
        destination_blob_name (String): Destination blob name to store in GCP.

    Returns:
        (Boolean): True if file was uploaded

    """
    uploaded = True

    if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
        LOG.warning("Please set your GOOGLE_APPLICATION_CREDENTIALS "
                    "environment variable before attempting to load file into"
                    "GCP Storage.")
        return False
    try:
        storage_client = storage.Client()

        bucket = storage_client.get_bucket(bucket_name)
        blob = bucket.blob(destination_blob_name)

        blob.upload_from_filename(source_file_name)

        LOG.info(
            f"File {source_file_name} uploaded to GCP Storage {destination_blob_name}."
        )
    except GoogleCloudError as upload_err:
        LOG.error(upload_err)
        uploaded = False
    return uploaded
Example #20
0
    def build_data(self, config, _random=False):  # noqa: C901
        """

        """
        LOG.info("Data build starting")

        data = dicta(
            payer=config.payer_account,
            bandwidth_gens=[],
            sql_gens=[],
            storage_gens=[],
            vmachine_gens=[],
            vnetwork_gens=[],
        )

        max_bandwidth_gens = FAKER.random_int(
            0, config.max_bandwidth_gens
        ) if _random else config.max_bandwidth_gens
        max_sql_gens = FAKER.random_int(
            0, config.max_sql_gens) if _random else config.max_sql_gens
        max_storage_gens = FAKER.random_int(
            0, config.max_storage_gens) if _random else config.max_storage_gens
        max_vmachine_gens = FAKER.random_int(
            0,
            config.max_vmachine_gens) if _random else config.max_vmachine_gens
        max_vnetwork_gens = FAKER.random_int(
            0,
            config.max_vnetwork_gens) if _random else config.max_vnetwork_gens

        LOG.info(f"Building {max_bandwidth_gens} Bandwidth generators ...")
        for _ in range(max_bandwidth_gens):
            data.bandwidth_gens.append(
                generate_azure_dicta(config, "bandwidth"))

        LOG.info(f"Building {max_sql_gens} SQL generators ...")
        for _ in range(max_sql_gens):
            data.sql_gens.append(generate_azure_dicta(config, "sql"))

        LOG.info(f"Building {max_storage_gens} Storage generators ...")
        for _ in range(max_storage_gens):
            data.storage_gens.append(generate_azure_dicta(config, "storage"))

        LOG.info(
            f"Building {max_vmachine_gens} Virtual Machine generators ...")
        for _ in range(max_vmachine_gens):
            data.vmachine_gens.append(generate_azure_dicta(config, "vmachine"))

        LOG.info(
            f"Building {max_vnetwork_gens} Virtual Network generators ...")
        for _ in range(max_vnetwork_gens):
            data.vnetwork_gens.append(generate_azure_dicta(config, "vnetwork"))

        return data
Example #21
0
def gcp_create_report(options):  # noqa: C901
    """Create a GCP cost usage report file."""
    fake = Faker()

    report_prefix = options.get("gcp_report_prefix") or fake.word()
    gcp_bucket_name = options.get("gcp_bucket_name")

    start_date = options.get("start_date")
    end_date = options.get("end_date")

    static_report_data = options.get("static_report_data")
    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
        projects = static_report_data.get("projects")

    else:
        generators = [
            {
                "generator": CloudStorageGenerator,
                "attributes": None
            },
            {
                "generator": ComputeEngineGenerator,
                "attributes": None
            },
        ]
        account = "{}-{}".format(fake.word(), fake.word())

        project_generator = ProjectGenerator(account)
        projects = project_generator.generate_projects()

    data = {}
    for project in projects:
        num_gens = len(generators)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(
            f"Producing data for {num_gens} generators for {'INSERT SOMETHING FOR GCP'}."
        )
        for count, generator in enumerate(generators):
            attributes = generator.get("attributes", {})
            if attributes:
                start_date = attributes.get("start_date")
                end_date = attributes.get("end_date")

            generator_cls = generator.get("generator")
            gen = generator_cls(start_date,
                                end_date,
                                project,
                                attributes=attributes)
            generated_data = gen.generate_data()
            for key, item in generated_data.items():
                if key in data:
                    data[key] += item
                else:
                    data[key] = item

            count += 1
            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

    monthly_files = []
    for day, daily_data in data.items():
        output_file_name = "{}-{}.csv".format(report_prefix,
                                              day.strftime("%Y-%m-%d"))

        output_file_path = os.path.join(os.getcwd(), output_file_name)
        monthly_files.append(output_file_path)
        _write_csv(output_file_path, daily_data, GCP_REPORT_COLUMNS)

    if gcp_bucket_name:
        gcp_route_file(gcp_bucket_name, output_file_path, output_file_name)

    write_monthly = options.get("write_monthly", False)
    if not write_monthly:
        _remove_files(monthly_files)
Example #22
0
def gcp_create_report(options):  # noqa: C901
    """Create a GCP cost usage report file."""
    fake = Faker()
    gcp_bucket_name = options.get("gcp_bucket_name")
    gcp_dataset_name = options.get("gcp_dataset_name")
    gcp_table_name = options.get("gcp_table_name")

    start_date = options.get("start_date")
    end_date = options.get("end_date")

    static_report_data = options.get("static_report_data")

    if gcp_dataset_name:
        # if the file is supposed to be uploaded to a bigquery table, it needs the JSONL version of everything
        if static_report_data:
            generators = _get_jsonl_generators(static_report_data.get("generators"))
            static_projects = static_report_data.get("projects", {})
            projects = []
            for static_dict in static_projects:
                # this lets the format of the YAML remain the same whether using the upload or local
                project = {}
                project["name"] = static_dict.get("project.name", "")
                project["id"] = static_dict.get("project.id", "")
                # the k:v pairs are split by ; and the keys and values split by :
                static_labels = static_dict.get("project.labels", [])
                labels = []
                if static_labels:
                    for pair in static_labels.split(";"):
                        key = pair.split(":")[0]
                        value = pair.split(":")[1]
                        labels.append({"key": key, "value": value})

                project["labels"] = labels
                location = {}
                location["location"] = static_dict.get("location.location", "")
                location["country"] = static_dict.get("location.country", "")
                location["region"] = static_dict.get("location.region", "")
                location["zone"] = static_dict.get("location.zone", "")
                row = {
                    "billing_account_id": static_dict.get("billing_account_id", ""),
                    "project": project,
                    "location": location,
                }
                projects.append(row)
                currency = default_currency(options.get("currency"), get_gcp_static_currency(generators))
        else:
            generators = [
                {"generator": JSONLCloudStorageGenerator, "attributes": {}},
                {"generator": JSONLComputeEngineGenerator, "attributes": {}},
                {"generator": JSONLGCPNetworkGenerator, "attributes": {}},
                {"generator": JSONLGCPDatabaseGenerator, "attributes": {}},
            ]
            account = fake.word()
            project_generator = JSONLProjectGenerator(account)
            projects = project_generator.generate_projects()
            currency = default_currency(options.get("currency"), None)

    elif static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
        projects = static_report_data.get("projects")
        processed_projects = copy.deepcopy(projects)
        for i, project in enumerate(projects):
            labels = []
            static_labels = project.get("project.labels", [])
            if static_labels:
                for pair in static_labels.split(";"):
                    key = pair.split(":")[0]
                    value = pair.split(":")[1]
                    labels.append({"key": key, "value": value})
                processed_projects[i]["project.labels"] = json.dumps(labels)
        projects = processed_projects

    else:
        generators = [
            {"generator": CloudStorageGenerator, "attributes": {}},
            {"generator": ComputeEngineGenerator, "attributes": {}},
            {"generator": GCPNetworkGenerator, "attributes": {}},
            {"generator": GCPDatabaseGenerator, "attributes": {}},
        ]
        account = fake.word()

        project_generator = ProjectGenerator(account)
        projects = project_generator.generate_projects()

    if gcp_dataset_name:
        monthly_files = _gcp_bigquery_process(
            start_date,
            end_date,
            currency,
            projects,
            generators,
            options,
            gcp_bucket_name,
            gcp_dataset_name,
            gcp_table_name,
        )
    else:
        months = _create_month_list(start_date, end_date)
        monthly_files = []
        output_files = []
        for month in months:
            data = []
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            for project in projects:
                num_gens = len(generators)
                ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
                LOG.info(
                    f"Producing data for {num_gens} generators for start: {gen_start_date} and end: {gen_end_date}."
                )
                for count, generator in enumerate(generators):
                    attributes = generator.get("attributes", {})
                    if attributes:
                        start_date = attributes.get("start_date")
                        end_date = attributes.get("end_date")
                        currency = default_currency(options.get("currency"), attributes.get("currency"))
                    else:
                        currency = default_currency(options.get("currency"), None)
                    if gen_end_date > end_date:
                        gen_end_date = end_date

                    generator_cls = generator.get("generator")
                    gen = generator_cls(gen_start_date, gen_end_date, currency, project, attributes=attributes)
                    for hour in gen.generate_data():
                        data += [hour]
                    count += 1
                    if count % ten_percent == 0:
                        LOG.info(f"Done with {count} of {num_gens} generators.")

            local_file_path, output_file_name = write_gcp_file(gen_start_date, gen_end_date, data, options)
            output_files.append(output_file_name)
            monthly_files.append(local_file_path)

        for index, month_file in enumerate(monthly_files):
            if gcp_bucket_name:
                gcp_route_file(gcp_bucket_name, month_file, output_files[index])

    write_monthly = options.get("write_monthly", False)
    if not write_monthly:
        _remove_files(monthly_files)
Example #23
0
    def build_data(self, config, _random=False):  # noqa: C901
        """
        Build a structure to fill out a nise yaml template.

        Struture has the form of:
            {start_date: date,    (config.start_date)
            ens_date: date,      (config.end_date)
            nodes: [             (number of nodes controlled by config.max_nodes)
                {node_name: str,     (dynamic)
                cpu_cores: int,     (config.max_node_cpu_cores)
                memory_gig: int,    (config.max_node_memory_gig)
                resource_id: str,   (dynamic)
                namespaces: [     (number of namespaces controlled by config.max_node_namespaces)
                    {namespace: str,   (dynamic)
                    pods: [           (number of pods controlled by config.max_node_namespace_pods)
                        pod_name: str,        (dynamic)
                        cpu_request: int,     (config.max_node_namespace_pod_cpu_request)
                        mem_request_gig: int, (config.max_node_namespace_pod_mem_request_gig)
                        cpu_limit: int,       (config.max_node_namespace_pod_cpu_limit)
                        mem_limit_gig: int,   (config.max_node_namespace_pod_mem_limit_gig)
                        pod_seconds: int,     (config.max_node_namespace_pod_seconds)
                        labels: str           (dynamic)
                    ],
                    volumes: [
                        volume_name: str,
                        storage_class: str,
                        volume_request_gig: int,
                        labels: str,
                        volume_claims: [
                            volume_claim_name: str,
                            pod_name: str,
                            labels: str,
                            capacity_gig: int
                        ]
                    ]}
                ]}
            ]}

        Parameters:
            config : dicta

        Returns:
            dicta
        """
        LOG.info("Data build starting")

        data = dicta(start_date=str(config.start_date),
                     end_date=str(config.end_date),
                     nodes=[],
                     resourceid_labels=None)
        resourceid_labels = {}

        if _random:
            max_nodes = FAKER.random_int(1, config.max_nodes)
        else:
            max_nodes = config.max_nodes

        for node_ix in range(max_nodes):
            LOG.info(f"Building node {node_ix + 1}/{max_nodes}...")
            if _random:
                cores = FAKER.random_int(1, config.max_node_cpu_cores)
                memory = FAKER.random_int(1, config.max_node_memory_gig)
            else:
                cores = config.max_node_cpu_cores
                memory = config.max_node_memory_gig

            resource_id = generate_resource_id(config)
            node_name = generate_name(config)
            id_label_key = (resource_id, node_name)
            resourceid_labels[id_label_key] = []
            node = dicta(name=node_name,
                         cpu_cores=cores,
                         memory_gig=memory,
                         resource_id=resource_id,
                         namespaces=[])
            data.nodes.append(node)

            if _random:
                max_namespaces = FAKER.random_int(1,
                                                  config.max_node_namespaces)
            else:
                max_namespaces = config.max_node_namespaces

            for namespace_ix in range(max_namespaces):
                LOG.info(
                    f"Building node {node_ix + 1}/{max_nodes}; namespace {namespace_ix + 1}/{max_namespaces}..."
                )

                namespace = dicta(name=generate_name(config, prefix=node.name),
                                  pods=[],
                                  volumes=[])
                node.namespaces.append(namespace)

                if _random:
                    max_pods = FAKER.random_int(1,
                                                config.max_node_namespace_pods)
                else:
                    max_pods = config.max_node_namespace_pods

                LOG.info(f"Building {max_pods} pods...")
                for pod_ix in range(max_pods):
                    if _random:
                        cpu_req = FAKER.random_int(1, node.cpu_cores)
                        mem_req = FAKER.random_int(1, node.memory_gig)
                        cpu_lim = FAKER.random_int(1, node.cpu_cores)
                        mem_lim = FAKER.random_int(1, node.memory_gig)
                        pod_sec = FAKER.random_int(
                            config.min_node_namespace_pod_seconds,
                            config.max_node_namespace_pod_seconds,
                            step=(config.max_node_namespace_pod_seconds // 10)
                            or 1800,
                        )
                    else:
                        cpu_lim = cpu_req = node.cpu_cores
                        mem_lim = mem_req = node.memory_gig
                        pod_sec = config.max_node_namespace_pod_seconds

                    pod_labels = generate_labels(
                        config.max_node_namespace_pod_labels)
                    resourceid_labels[id_label_key].append(pod_labels)
                    pod = dicta(
                        name=generate_name(config,
                                           prefix=namespace.name + "-pod",
                                           suffix=str(pod_ix),
                                           dynamic=False),
                        cpu_request=cpu_req,
                        mem_request_gig=mem_req,
                        cpu_limit=cpu_lim,
                        mem_limit_gig=mem_lim,
                        pod_seconds=pod_sec,
                        labels=pod_labels,
                    )
                    namespace.pods.append(pod)

                if _random:
                    max_volumes = FAKER.random_int(
                        1, config.max_node_namespace_volumes)
                else:
                    max_volumes = config.max_node_namespace_volumes

                LOG.info(f"Building {max_volumes} volumes...")
                for volume_ix in range(max_volumes):
                    if _random:
                        storage_cls = config.storage_classes[FAKER.random_int(
                            0,
                            len(config.storage_classes) - 1)]
                        vol_req = FAKER.random_int(
                            1, config.max_node_namespace_volume_request_gig)
                    else:
                        storage_cls = config.storage_classes[0]
                        vol_req = config.max_node_namespace_volume_request_gig

                    volume_labels = generate_labels(
                        config.max_node_namespace_volume_labels)
                    resourceid_labels[id_label_key].append(volume_labels)
                    volume = dicta(
                        name=generate_name(config,
                                           prefix=namespace.name + "-vol",
                                           suffix=str(volume_ix),
                                           dynamic=False),
                        storage_class=storage_cls,
                        volume_request_gig=vol_req,
                        labels=volume_labels,
                        volume_claims=[],
                    )
                    namespace.volumes.append(volume)

                    if _random:
                        max_volume_claims = FAKER.random_int(
                            1, config.max_node_namespace_volume_volume_claims)
                    else:
                        max_volume_claims = config.max_node_namespace_volume_volume_claims

                    for volume_claim_ix in range(max_volume_claims):
                        if _random:
                            cap = FAKER.random_int(
                                1, config.
                                max_node_namespace_volume_volume_claim_capacity_gig
                            )
                        else:
                            cap = config.max_node_namespace_volume_volume_claim_capacity_gig

                        pod_name = namespace.pods[-1 if volume_claim_ix >= len(
                            namespace.pods) else volume_claim_ix].name
                        volume_claim_labels = generate_labels(
                            config.
                            max_node_namespace_volume_volume_claim_labels)
                        resourceid_labels[id_label_key].append(
                            volume_claim_labels)
                        volume_claim = dicta(
                            name=generate_name(
                                config,
                                prefix=namespace.name + "-vol-claim",
                                suffix=str(volume_claim_ix),
                                dynamic=False,
                            ),
                            pod_name=pod_name,
                            labels=volume_claim_labels,
                            capacity_gig=cap,
                        )
                        volume.volume_claims.append(volume_claim)
        data.resourceid_labels = resourceid_labels
        return data
Example #24
0
def azure_create_report(options):  # noqa: C901
    """Create a cost usage report file."""
    data = []
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    static_report_data = options.get("static_report_data")
    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
        accounts_list = static_report_data.get("accounts")
    else:
        generators = [
            {"generator": BandwidthGenerator, "attributes": {}},
            {"generator": SQLGenerator, "attributes": {}},
            {"generator": StorageGenerator, "attributes": {}},
            {"generator": VMGenerator, "attributes": {}},
            {"generator": VNGenerator, "attributes": {}},
        ]
        accounts_list = None

    months = _create_month_list(start_date, end_date)

    account_info = _generate_azure_account_info(accounts_list)
    currency = default_currency(options.get("currency"), account_info["currency_code"])

    meter_cache = {}
    # The options params are not going to change so we don't
    # have to keep resetting the var inside of the for loop
    azure_container_name = options.get("azure_container_name")
    storage_account_name = options.get("azure_account_name")
    azure_prefix_name = options.get("azure_prefix_name")
    azure_report_name = options.get("azure_report_name")
    version_two = options.get("version_two", False)
    write_monthly = options.get("write_monthly", False)
    for month in months:
        data = []
        monthly_files = []
        num_gens = len(generators)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}.")
        for count, generator in enumerate(generators):
            generator_cls = generator.get("generator")
            attributes = generator.get("attributes", {})
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            if attributes:
                # Skip if generator usage is outside of current month
                if attributes.get("end_date") < month.get("start"):
                    continue
                if attributes.get("start_date") > month.get("end"):
                    continue
            else:
                attributes = {"end_date": end_date, "start_date": start_date}

            gen_start_date, gen_end_date = _create_generator_dates_from_yaml(attributes, month)

            if attributes.get("meter_cache"):
                meter_cache.update(attributes.get("meter_cache"))  # needed so that meter_cache can be defined in yaml
            attributes["meter_cache"] = meter_cache
            attributes["version_two"] = version_two
            gen = generator_cls(gen_start_date, gen_end_date, currency, account_info, attributes)
            azure_columns = gen.azure_columns
            data += gen.generate_data()
            meter_cache = gen.get_meter_cache()

            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

        local_path, output_file_name = _generate_azure_filename()
        date_range = _generate_azure_date_range(month)

        _write_csv(local_path, data, azure_columns)
        monthly_files.append(local_path)

        if azure_container_name:
            file_path = ""
            if azure_prefix_name:
                file_path += azure_prefix_name + "/"
            file_path += azure_report_name + "/"
            file_path += date_range + "/"
            file_path += output_file_name

            # azure blob upload
            storage_account_name = options.get("azure_account_name", None)
            if storage_account_name:
                azure_route_file(storage_account_name, azure_container_name, local_path, file_path)
            # local dir upload
            else:
                azure_route_file(azure_container_name, file_path, local_path)
        if not write_monthly:
            _remove_files(monthly_files)
Example #25
0
def ocp_create_report(options):  # noqa: C901
    """Create a usage report file."""
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    cluster_id = options.get("ocp_cluster_id")
    static_report_data = options.get("static_report_data")
    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
    else:
        generators = [{"generator": OCPGenerator, "attributes": {}}]

    months = _create_month_list(start_date, end_date)
    insights_upload = options.get("insights_upload")
    write_monthly = options.get("write_monthly", False)
    for month in months:
        data = {OCP_POD_USAGE: [], OCP_STORAGE_USAGE: [], OCP_NODE_LABEL: [], OCP_NAMESPACE_LABEL: []}
        file_numbers = {OCP_POD_USAGE: 0, OCP_STORAGE_USAGE: 0, OCP_NODE_LABEL: 0, OCP_NAMESPACE_LABEL: 0}
        monthly_files = []
        for generator in generators:
            generator_cls = generator.get("generator")
            attributes = generator.get("attributes")
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            if attributes:
                # Skip if generator usage is outside of current month
                if attributes.get("end_date") < month.get("start"):
                    continue
                if attributes.get("start_date") > month.get("end"):
                    continue

                gen_start_date, gen_end_date = _create_generator_dates_from_yaml(attributes, month)

            gen = generator_cls(gen_start_date, gen_end_date, attributes)
            for report_type in gen.ocp_report_generation.keys():
                LOG.info(f"Generating data for {report_type} for {month.get('name')}")
                for hour in gen.generate_data(report_type):
                    data[report_type] += [hour]
                    if len(data[report_type]) == options.get("row_limit"):
                        file_numbers[report_type] += 1
                        month_output_file = write_ocp_file(
                            file_numbers[report_type],
                            cluster_id,
                            month.get("name"),
                            gen_start_date.year,
                            report_type,
                            data[report_type],
                        )
                        monthly_files.append(month_output_file)
                        data[report_type].clear()

        for report_type in gen.ocp_report_generation.keys():
            if file_numbers[report_type] != 0:
                file_numbers[report_type] += 1

            month_output_file = write_ocp_file(
                file_numbers[report_type],
                cluster_id,
                month.get("name"),
                gen_start_date.year,
                report_type,
                data[report_type],
            )
            monthly_files.append(month_output_file)

        if insights_upload:
            # Generate manifest for all files
            ocp_assembly_id = uuid4()
            report_datetime = gen_start_date
            temp_files = {}
            for num_file in range(len(monthly_files)):
                temp_filename = f"{ocp_assembly_id}_openshift_report.{num_file}.csv"
                temp_usage_file = create_temporary_copy(monthly_files[num_file], temp_filename, "payload")
                temp_files[temp_filename] = temp_usage_file

            manifest_file_names = ", ".join(f'"{w}"' for w in temp_files)
            cr_status = {
                "clusterID": "4e009161-4f40-42c8-877c-3e59f6baea3d",
                "clusterVersion": "stable-4.6",
                "api_url": "https://console.redhat.com",
                "authentication": {"type": "token"},
                "packaging": {"max_reports_to_store": 30, "max_size_MB": 100},
                "upload": {
                    "ingress_path": "/api/ingress/v1/upload",
                    "upload": "True",
                    "upload_wait": 27,
                    "upload_cycle": 360,
                },
                "operator_commit": __version__,
                "prometheus": {
                    "prometheus_configured": "True",
                    "prometheus_connected": "True",
                    "last_query_start_time": "2021-07-28T12:22:37Z",
                    "last_query_success_time": "2021-07-28T12:22:37Z",
                    "service_address": "https://thanos-querier.openshift-monitoring.svc:9091",
                },
                "reports": {
                    "report_month": "07",
                    "last_hour_queried": "2021-07-28 11:00:00 - 2021-07-28 11:59:59",
                    "data_collected": "True",
                },
                "source": {
                    "sources_path": "/api/sources/v1.0/",
                    "name": "INSERT-SOURCE-NAME",
                    "create_source": "False",
                    "check_cycle": 1440,
                },
            }
            cr_status = json.dumps(cr_status)
            manifest_values = {
                "ocp_cluster_id": cluster_id,
                "ocp_assembly_id": ocp_assembly_id,
                "report_datetime": report_datetime,
                "files": manifest_file_names[1:-1],
                "start": gen_start_date,
                "end": gen_end_date,
                "version": __version__,
                "certified": False,
                "cr_status": cr_status,
            }
            manifest_data = ocp_generate_manifest(manifest_values)
            temp_manifest = _write_manifest(manifest_data)
            temp_manifest_name = create_temporary_copy(temp_manifest, "manifest.json", "payload")

            # Tarball and upload files individually
            for temp_usage_file in temp_files.values():
                report_files = [temp_usage_file, temp_manifest_name]
                temp_usage_zip = _tar_gzip_report_files(report_files)
                ocp_route_file(insights_upload, temp_usage_zip)
                os.remove(temp_usage_file)
                os.remove(temp_usage_zip)

            os.remove(temp_manifest)
            os.remove(temp_manifest_name)
        if not write_monthly:
            LOG.info("Cleaning up local directory")
            _remove_files(monthly_files)
Example #26
0
def aws_create_report(options):  # noqa: C901
    """Create a cost usage report file."""
    data = []
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    aws_finalize_report = options.get("aws_finalize_report")
    static_report_data = options.get("static_report_data")
    manifest_gen = True if options.get("manifest_generation") is None else options.get("manifest_generation")

    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
        accounts_list = static_report_data.get("accounts")
    else:
        generators = [
            {"generator": DataTransferGenerator, "attributes": {}},
            {"generator": EBSGenerator, "attributes": {}},
            {"generator": EC2Generator, "attributes": {}},
            {"generator": S3Generator, "attributes": {}},
            {"generator": RDSGenerator, "attributes": {}},
            {"generator": Route53Generator, "attributes": {}},
            {"generator": VPCGenerator, "attributes": {}},
            {"generator": MarketplaceGenerator, "attributes": {}},
        ]
        accounts_list = None

    months = _create_month_list(start_date, end_date)

    payer_account, usage_accounts, currency_code = _generate_accounts(accounts_list)
    currency_code = default_currency(options.get("currency"), currency_code)

    aws_bucket_name = options.get("aws_bucket_name")
    aws_report_name = options.get("aws_report_name")
    write_monthly = options.get("write_monthly", False)
    for month in months:
        data = []
        file_number = 0
        monthly_files = []
        fake = Faker()
        num_gens = len(generators)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(f"Producing data for {num_gens} generators for {month.get('start').strftime('%Y-%m')}.")
        for count, generator in enumerate(generators):
            generator_cls = generator.get("generator")
            attributes = generator.get("attributes")
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            if attributes:
                # Skip if generator usage is outside of current month
                if attributes.get("end_date") < month.get("start"):
                    continue
                if attributes.get("start_date") > month.get("end"):
                    continue

                gen_start_date, gen_end_date = _create_generator_dates_from_yaml(attributes, month)

            gen = generator_cls(
                gen_start_date,
                gen_end_date,
                currency_code,
                payer_account,
                usage_accounts,
                attributes,
                options.get("aws_tags"),
            )
            num_instances = 1 if attributes else randint(2, 60)
            for _ in range(num_instances):
                for hour in gen.generate_data():
                    data += [hour]
                    if len(data) == options.get("row_limit"):
                        file_number += 1
                        month_output_file = write_aws_file(
                            file_number,
                            aws_report_name,
                            month.get("name"),
                            gen_start_date.year,
                            data,
                            aws_finalize_report,
                            static_report_data,
                            gen.AWS_COLUMNS,
                        )
                        monthly_files.append(month_output_file)
                        data.clear()

            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

        if file_number != 0:
            file_number += 1
        month_output_file = write_aws_file(
            file_number,
            aws_report_name,
            month.get("name"),
            gen_start_date.year,
            data,
            aws_finalize_report,
            static_report_data,
            gen.AWS_COLUMNS,
        )
        monthly_files.append(month_output_file)

        if aws_bucket_name:
            manifest_values = {"account": payer_account}
            manifest_values.update(options)
            manifest_values["start_date"] = gen_start_date
            manifest_values["end_date"] = gen_end_date
            manifest_values["file_names"] = monthly_files

            if not manifest_gen:
                s3_cur_path, _ = aws_generate_manifest(fake, manifest_values)
                for monthly_file in monthly_files:
                    temp_cur_zip = _gzip_report(monthly_file)
                    destination_file = "{}/{}.gz".format(s3_cur_path, os.path.basename(monthly_file))
                    aws_route_file(aws_bucket_name, destination_file, temp_cur_zip)
                    os.remove(temp_cur_zip)
            else:
                s3_cur_path, manifest_data = aws_generate_manifest(fake, manifest_values)
                s3_month_path = os.path.dirname(s3_cur_path)
                s3_month_manifest_path = s3_month_path + "/" + aws_report_name + "-Manifest.json"
                s3_assembly_manifest_path = s3_cur_path + "/" + aws_report_name + "-Manifest.json"

                temp_manifest = _write_manifest(manifest_data)
                aws_route_file(aws_bucket_name, s3_month_manifest_path, temp_manifest)
                aws_route_file(aws_bucket_name, s3_assembly_manifest_path, temp_manifest)

                for monthly_file in monthly_files:
                    temp_cur_zip = _gzip_report(monthly_file)
                    destination_file = "{}/{}.gz".format(s3_cur_path, os.path.basename(monthly_file))
                    aws_route_file(aws_bucket_name, destination_file, temp_cur_zip)
                    os.remove(temp_cur_zip)

                os.remove(temp_manifest)

        if not write_monthly:
            _remove_files(monthly_files)
Example #27
0
def extract_payload(base_path, payload_file):
    """
    Extract OCP usage report payload into local directory structure.

    Payload is expected to be a .tar.gz file that contains:
    1. manifest.json - dictionary containing usage report details needed
        for report processing.
        Dictionary Contains:
            file - .csv usage report file name
            date - DateTime that the payload was created
            uuid - uuid for payload
            cluster_id  - OCP cluster ID.
    2. *.csv - Actual usage report for the cluster.  Format is:
        Format is: <uuid>_report_name.csv

    On successful completion the report and manifest will be in a directory
    structure that the OCPReportDownloader is expecting.

    Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101

    Args:
        basepath (String): base local directory path.
        payload_file (String): path to payload.tar.gz file containing report and manifest.

    Returns:
        None

    """
    # Create temporary directory for initial file staging and verification
    temp_dir = tempfile.mkdtemp()

    # Extract tarball into temp directory
    try:
        mytar = TarFile.open(payload_file)
        mytar.extractall(path=temp_dir)
        files = mytar.getnames()
        manifest_path = [manifest for manifest in files if "manifest.json" in manifest]
    except ReadError as error:
        LOG.error("Unable to untar file. Reason: {}".format(str(error)))
        shutil.rmtree(temp_dir)
        return

    # Open manifest.json file and build the payload dictionary.
    full_manifest_path = "{}/{}".format(temp_dir, manifest_path[0])
    report_meta = get_report_details(os.path.dirname(full_manifest_path))

    # Create directory tree for report.
    usage_month = month_date_range(report_meta.get("date"))
    destination_dir = "{}/{}/{}".format(base_path, report_meta.get("cluster_id"), usage_month)
    os.makedirs(destination_dir, exist_ok=True)

    # Copy manifest
    manifest_destination_path = "{}/{}".format(destination_dir, os.path.basename(report_meta.get("manifest_path")))
    shutil.copy(report_meta.get("manifest_path"), manifest_destination_path)

    # Copy report payload
    for report_file in report_meta.get("files"):
        subdirectory = os.path.dirname(full_manifest_path)
        payload_source_path = f"{subdirectory}/{report_file}"
        payload_destination_path = f"{destination_dir}/{report_file}"
        try:
            shutil.copy(payload_source_path, payload_destination_path)
        except FileNotFoundError:
            pass

    LOG.info("Successfully extracted OCP for {}/{}".format(report_meta.get("cluster_id"), usage_month))
    # Remove temporary directory and files
    shutil.rmtree(temp_dir)
Example #28
0
    def build_data(self, config, _random=False):  # noqa: C901
        """Build the data."""
        LOG.info("Data build starting")

        data = dicta(
            payer=config.payer_account,
            data_transfer_gens=[],
            ebs_gens=[],
            ec2_gens=[],
            rds_gens=[],
            route53_gens=[],
            s3_gens=[],
            vpc_gens=[],
            users=[],
        )

        max_data_transfer_gens = (FAKER.random_int(
            0, config.max_data_transfer_gens) if _random else
                                  config.max_data_transfer_gens)
        max_ebs_gens = FAKER.random_int(
            0, config.max_ebs_gens) if _random else config.max_ebs_gens
        max_ec2_gens = FAKER.random_int(
            0, config.max_ec2_gens) if _random else config.max_ec2_gens
        max_rds_gens = FAKER.random_int(
            0, config.max_rds_gens) if _random else config.max_rds_gens
        max_route53_gens = FAKER.random_int(
            0, config.max_route53_gens) if _random else config.max_route53_gens
        max_s3_gens = FAKER.random_int(
            0, config.max_s3_gens) if _random else config.max_s3_gens
        max_vpc_gens = FAKER.random_int(
            0, config.max_vpc_gens) if _random else config.max_vpc_gens
        max_users = FAKER.random_int(
            0, config.max_users) if _random else config.max_users

        LOG.info(
            f"Building {max_data_transfer_gens} data transfer generators ...")
        for _ in range(max_data_transfer_gens):
            _rate, _amount = RATE_AMT.get("DTG")
            data_transfer_gen = initialize_dicta("DTG", config)
            data_transfer_gen.update(amount=round(next(_amount), 5),
                                     rate=round(next(_rate), 5))
            data.data_transfer_gens.append(data_transfer_gen)

        LOG.info(f"Building {max_ebs_gens} EBS generators ...")
        for _ in range(max_ebs_gens):
            _rate, _amount = RATE_AMT.get("EBS")
            ebs_gen = initialize_dicta("EBS", config)
            ebs_gen.update(amount=round(next(_amount), 5),
                           rate=round(next(_rate), 5))
            data.ebs_gens.append(ebs_gen)

        LOG.info(f"Building {max_ec2_gens} EC2 generators ...")
        for _ in range(max_ec2_gens):
            instance_type = random.choice(EC2_INSTANCES)
            ec2_gen = initialize_dicta("EC2", config)
            ec2_gen.update(
                processor_arch=instance_type.get("processor_arch"),
                region=random.choice(REGIONS),
                instance_type=instance_type,
            )
            data.ec2_gens.append(ec2_gen)

        LOG.info(f"Building {max_rds_gens} RDS generators ...")
        for _ in range(max_rds_gens):
            instance_type = random.choice(RDS_INSTANCES)
            rds_gen = initialize_dicta("RDS", config)
            rds_gen.update(
                processor_arch=instance_type.get("processor_arch"),
                region=random.choice(REGIONS),
                instance_type=instance_type,
            )
            data.rds_gens.append(rds_gen)

        LOG.info(f"Building {max_route53_gens} Route 53 generators ...")
        for _ in range(max_route53_gens):
            route53_gen = initialize_dicta("R53", config)
            route53_gen.update(product_family=random.choices(
                ("DNS Zone", "DNS Query"), weights=[1, 10])[0])
            data.route53_gens.append(route53_gen)

        LOG.info(f"Building {max_s3_gens} S3 generators ...")
        for _ in range(max_s3_gens):
            _rate, _amount = RATE_AMT.get("S3")
            s3_gen = initialize_dicta("S3", config)
            s3_gen.update(amount=round(next(_amount), 5),
                          rate=round(next(_rate), 5))
            data.s3_gens.append(s3_gen)

        LOG.info(f"Building {max_vpc_gens} VPC generators ...")
        for _ in range(max_vpc_gens):
            vpc_gen = initialize_dicta("VPC", config)
            data.vpc_gens.append(vpc_gen)

        LOG.info(f"Adding {max_users} users.")
        for _ in range(max_users):
            data.users.append(generate_account_id(config))

        return data
Example #29
0
def ocp_create_report(options):  # noqa: C901
    """Create a usage report file."""
    start_date = options.get("start_date")
    end_date = options.get("end_date")
    cluster_id = options.get("ocp_cluster_id")
    static_report_data = options.get("static_report_data")
    if static_report_data:
        generators = _get_generators(static_report_data.get("generators"))
    else:
        generators = [{"generator": OCPGenerator, "attributes": None}]

    months = _create_month_list(start_date, end_date)
    insights_upload = options.get("insights_upload")
    write_monthly = options.get("write_monthly", False)
    for month in months:
        data = {OCP_POD_USAGE: [], OCP_STORAGE_USAGE: [], OCP_NODE_LABEL: []}
        file_numbers = {
            OCP_POD_USAGE: 0,
            OCP_STORAGE_USAGE: 0,
            OCP_NODE_LABEL: 0
        }
        monthly_files = []
        for generator in generators:
            generator_cls = generator.get("generator")
            attributes = generator.get("attributes")
            gen_start_date = month.get("start")
            gen_end_date = month.get("end")
            if attributes:
                # Skip if generator usage is outside of current month
                if attributes.get("end_date") < month.get("start"):
                    continue
                if attributes.get("start_date") > month.get("end"):
                    continue

                gen_start_date, gen_end_date = _create_generator_dates_from_yaml(
                    attributes, month)

            gen = generator_cls(gen_start_date, gen_end_date, attributes)
            for report_type in gen.ocp_report_generation.keys():
                LOG.info(
                    f"Generating data for {report_type} for {month.get('name')}"
                )
                for hour in gen.generate_data(report_type):
                    data[report_type] += [hour]
                    if len(data[report_type]) == options.get("row_limit"):
                        file_numbers[report_type] += 1
                        month_output_file = write_ocp_file(
                            file_numbers[report_type],
                            cluster_id,
                            month.get("name"),
                            gen_start_date.year,
                            report_type,
                            data[report_type],
                        )
                        monthly_files.append(month_output_file)
                        data[report_type].clear()

        for report_type in gen.ocp_report_generation.keys():
            if file_numbers[report_type] != 0:
                file_numbers[report_type] += 1

            month_output_file = write_ocp_file(
                file_numbers[report_type],
                cluster_id,
                month.get("name"),
                gen_start_date.year,
                report_type,
                data[report_type],
            )
            monthly_files.append(month_output_file)

        if insights_upload:
            # Generate manifest for all files
            ocp_assembly_id = uuid4()
            report_datetime = gen_start_date
            temp_files = {}
            for num_file in range(len(monthly_files)):
                temp_filename = f"{ocp_assembly_id}_openshift_report.{num_file}.csv"
                temp_usage_file = create_temporary_copy(
                    monthly_files[num_file], temp_filename, "payload")
                temp_files[temp_filename] = temp_usage_file

            manifest_file_names = ", ".join(f'"{w}"' for w in temp_files)
            manifest_values = {
                "ocp_cluster_id": cluster_id,
                "ocp_assembly_id": ocp_assembly_id,
                "report_datetime": report_datetime,
                "files": manifest_file_names[1:-1],
            }
            manifest_data = ocp_generate_manifest(manifest_values)
            temp_manifest = _write_manifest(manifest_data)
            temp_manifest_name = create_temporary_copy(temp_manifest,
                                                       "manifest.json",
                                                       "payload")

            # Tarball and upload files individually
            for temp_usage_file in temp_files.values():
                report_files = [temp_usage_file, temp_manifest_name]
                temp_usage_zip = _tar_gzip_report_files(report_files)
                ocp_route_file(insights_upload, temp_usage_zip)
                os.remove(temp_usage_file)
                os.remove(temp_usage_zip)

            os.remove(temp_manifest)
            os.remove(temp_manifest_name)
        if not write_monthly:
            LOG.info("Cleaning up local directory")
            _remove_files(monthly_files)
Example #30
0
def gcp_create_report(options):  # noqa: C901
    """Create a GCP cost usage report file."""
    fake = Faker()
    report_prefix = options.get("gcp_report_prefix") or fake.word()
    gcp_bucket_name = options.get("gcp_bucket_name")

    start_date = options.get("start_date")
    end_date = options.get("end_date")

    projects = []
    if options.get("static_report_file"):
        config = load_yaml(options.get("static_report_file"))
        project_gens = list(
            filter(lambda x: "ProjectGenerator" in x,
                   config.get("generators")))
        projects = []
        for gen in project_gens:
            project_generator = ProjectGenerator(
                gen.get("ProjectGenerator", {}).get("Account ID"))
            projects = projects + [
                prj for prj in project_generator.generate_projects()
            ]
    else:
        account = "{}-{}".format(fake.word(), fake.word())
        project_generator = ProjectGenerator(account)
        projects = projects + [
            prj for prj in project_generator.generate_projects()
        ]

    data = {}
    for project in projects:
        num_gens = len(GCP_GENERATORS)
        ten_percent = int(num_gens * 0.1) if num_gens > 50 else 5
        LOG.info(
            f"Producing data for {num_gens} generators for GCP Project '{project}'."
        )
        for count, generator in enumerate(GCP_GENERATORS):
            gen = generator(start_date,
                            end_date,
                            project,
                            user_config=options.get("static_report_file"))
            generated_data = gen.generate_data()
            for key, item in generated_data.items():
                if key in data:
                    data[key] += item
                else:
                    data[key] = item

            count += 1
            if count % ten_percent == 0:
                LOG.info(f"Done with {count} of {num_gens} generators.")

    monthly_files = []
    for day, daily_data in data.items():
        output_file_name = "{}-{}.csv".format(report_prefix,
                                              day.strftime("%Y-%m-%d"))

        output_file_path = os.path.join(os.getcwd(), output_file_name)
        monthly_files.append(output_file_path)
        _write_csv(output_file_path, daily_data, GCP_REPORT_COLUMNS)

    if gcp_bucket_name:
        gcp_route_file(gcp_bucket_name, output_file_path, output_file_name)

    write_monthly = options.get("write_monthly", False)
    if not write_monthly:
        _remove_files(monthly_files)