def load_supported_services(context: LoggingContext, selected_services: List[str]) -> List[GCPService]:
    working_directory = os.path.dirname(os.path.realpath(__file__))
    config_directory = os.path.join(working_directory, "config")
    config_files = [
        file for file
        in listdir(config_directory)
        if isfile(os.path.join(config_directory, file)) and is_yaml_file(file)
    ]

    services = []
    for file in config_files:
        config_file_path = os.path.join(config_directory, file)
        try:
            with open(config_file_path, encoding="utf-8") as config_file:
                config_yaml = yaml.safe_load(config_file)
                technology_name = extract_technology_name(config_yaml)

                for service_yaml in config_yaml.get("gcp", {}):
                    # If whitelist of services exists and current service is not present in it, skip
                    should_skip = selected_services and \
                                  (service_yaml.get("service", "None") not in selected_services)
                    if should_skip:
                        continue
                    services.append(GCPService(tech_name=technology_name, **service_yaml))
        except Exception as error:
            context.log(f"Failed to load configuration file: '{config_file_path}'. Error details: {error}")
            continue
    services_names = [service.name for service in services]
    context.log("Selected services: " + ",".join(services_names))
    return services
Esempio n. 2
0
 def _load_configs(self):
     context = LoggingContext("ME startup")
     working_directory = os.path.dirname(os.path.realpath(__file__))
     config_directory = os.path.join(working_directory, "../../config_logs")
     config_files = [
         file for file in listdir(config_directory)
         if isfile(os.path.join(config_directory, file))
         and _is_json_file(file)
     ]
     for file in config_files:
         config_file_path = os.path.join(config_directory, file)
         try:
             with open(config_file_path) as config_file:
                 config_json = json.load(config_file)
                 if config_json.get("name", "") == DEFAULT_RULE_NAME:
                     self.default_rule = _create_config_rules(
                         context, config_json)[0]
                 elif config_json.get("name", "") == COMMON_RULE_NAME:
                     self.common_rule = _create_config_rules(
                         context, config_json)[0]
                 elif config_json.get("name",
                                      "").startswith(AUDIT_LOGS_RULE):
                     self.audit_logs_rules = _create_config_rules(
                         context, config_json)
                 else:
                     self.rules.extend(
                         _create_config_rules(context, config_json))
         except Exception as e:
             context.exception(
                 f"Failed to load configuration file: '{config_file_path}'")
Esempio n. 3
0
async def get_dynatrace_token_metadata(dt_session: ClientSession,
                                       context: LoggingContext,
                                       dynatrace_url: str,
                                       dynatrace_api_key: str,
                                       timeout: Optional[int] = 2) -> dict:
    try:
        response = await dt_session.post(
            url=f"{dynatrace_url.rstrip('/')}/api/v1/tokens/lookup",
            headers={
                "Authorization": f"Api-Token {dynatrace_api_key}",
                "Content-Type": "application/json; charset=utf-8"
            },
            json={"token": dynatrace_api_key},
            verify_ssl=get_should_require_valid_certificate(),
            timeout=timeout)
        if response.status != 200:
            context.log(
                f'Unable to get Dynatrace token metadata: {response.status}, url: {response.url}, reason: {response.reason}'
            )
            return {}

        return await response.json()
    except Exception as e:
        context.log(
            f'Unable to get Dynatrace token metadata. Error details: {e}')
        return {}
def _apply_rule(context: LoggingContext, rule: ConfigRule, record: Dict, parsed_record: Dict):
    for attribute in rule.attributes:
        try:
            value = jmespath.search(attribute.pattern, record, JMESPATH_OPTIONS)
            if value:
                parsed_record[attribute.key] = value
        except Exception:
            context.exception(f"Encountered exception when evaluating attribute {attribute} of rule for {rule.entity_type_name}")
Esempio n. 5
0
def _check_configuration_flags(logging_context: LoggingContext,
                               flags_to_check: List[str]):
    configuration_flag_values = []
    for key in flags_to_check:
        value = os.environ.get(key, None)
        if value is None:
            configuration_flag_values.append(f"{key} is None")
        else:
            configuration_flag_values.append(f"{key} = '{value}'")
    logging_context.log(
        f"Found configuration flags: {', '.join(configuration_flag_values)}")
def _create_config_rule(context: LoggingContext, entity_name: str, rule_json: Dict) -> Optional[ConfigRule]:
    sources_json = rule_json.get("sources", [])
    if entity_name not in SPECIAL_RULE_NAMES and not sources_json:
        context.log(f"Encountered invalid rule with missing sources for config entry named {entity_name}")
        return None
    sources = _create_sources(context, sources_json)
    if entity_name not in SPECIAL_RULE_NAMES and not sources:
        context.log(f"Encountered invalid rule with invalid sources for config entry named {entity_name}: {sources_json}")
        return None
    attributes = _create_attributes(context, rule_json.get("attributes", []))
    return ConfigRule(entity_type_name=entity_name, source_matchers=sources, attributes=attributes)
async def get_all_accessible_projects(context: LoggingContext,
                                      session: ClientSession, token: str):
    url = "https://cloudresourcemanager.googleapis.com/v1/projects"
    headers = {"Authorization": "Bearer {token}".format(token=token)}
    response = await session.get(url, headers=headers)
    response_json = await response.json()
    all_projects = [
        project["projectId"] for project in response_json.get("projects", [])
    ]
    context.log("Access to following projects: " + ", ".join(all_projects))
    return all_projects
async def create_sfm_worker_loop(sfm_queue: Queue,
                                 logging_context: LoggingContext,
                                 instance_metadata: InstanceMetadata):
    while True:
        try:
            await asyncio.sleep(SFM_WORKER_EXECUTION_PERIOD_SECONDS)
            self_monitoring = LogSelfMonitoring()
            asyncio.get_event_loop().create_task(
                _loop_single_period(self_monitoring, sfm_queue,
                                    logging_context, instance_metadata))
        except Exception:
            logging_context.exception(
                "Logs Self Monitoring Worker Loop Exception:")
 def apply(self, context: LoggingContext, record: Dict, parsed_record: Dict):
     try:
         if self.common_rule:
             _apply_rule(context, self.common_rule, record, parsed_record)
         for rule in self.rules:
             if _check_if_rule_applies(rule, record, parsed_record):
                 _apply_rule(context, rule, record, parsed_record)
                 return
         # No matching rule has been found, applying the default rule
         if self.default_rule:
             _apply_rule(context, self.default_rule, record, parsed_record)
     except Exception:
         context.exception("Encountered exception when running Rule Engine")
def _create_attributes(context: LoggingContext, attributes_json: List[Dict]) -> List[Attribute]:
    result = []

    for source_json in attributes_json:
        key = source_json.get("key", None)
        pattern = source_json.get("pattern", None)

        if key and pattern:
            result.append(Attribute(key, pattern))
        else:
            context.log(f"Encountered invalid rule attribute with missing parameter, parameters were: key = {key}, pattern = {pattern}")

    return result
def load_activated_feature_sets(logging_context: LoggingContext,
                                activation_yaml) -> List[str]:
    services_whitelist = []
    for service in activation_yaml.get("services", []):
        feature_sets = service.get("featureSets", [])
        for feature_set in feature_sets:
            services_whitelist.append(
                f"{service.get('service')}/{feature_set}")
        if not feature_sets:
            logging_context.error(
                f"No feature set in given {service} service.")

    return services_whitelist
async def get_all_accessible_projects(context: LoggingContext,
                                      session: ClientSession, token: str):
    url = _CLOUD_RESOURCE_MANAGER_ROOT + "/projects?filter=lifecycleState%3AACTIVE"
    headers = {"Authorization": "Bearer {token}".format(token=token)}
    response = await session.get(url, headers=headers)
    response_json = await response.json()
    all_projects = [
        project["projectId"] for project in response_json.get("projects", [])
    ]
    if all_projects:
        context.log("Access to following projects: " + ", ".join(all_projects))
    else:
        context.log(
            "There is no access to any projects. Check service account configuration."
        )
    return all_projects
Esempio n. 13
0
def generate_metadata():
    toc = []
    units = set()
    unmapped_units = set()
    # some metrics are used for multiple services and script will encounter them multiple times
    visited_metric_keys = set()
    supported_services = load_supported_services(LoggingContext(None), [])

    prepare_metric_metadata_dir()

    for supported_service in supported_services:
        print(f"\n => {supported_service.name}")
        for metric in supported_service.metrics:
            print(f"{metric.dynatrace_name}")
            if metric.dynatrace_name in visited_metric_keys:
                print(" - Already mapped, skipping")
                continue
            else:
                visited_metric_keys.add(metric.dynatrace_name)

            metadata = create_metadata(metric, unmapped_units)

            if not metadata:
                continue

            filename = write_metadata(metadata, metric)

            units.add(metric.unit)
            toc.append(filename)

    write_toc(toc)

    print(f"\nFound units: {units}")
    print(f"\nFailed to map units: {unmapped_units}")
async def create_token(context: LoggingContext, session: ClientSession):
    credentials_path = os.environ[
        'GOOGLE_APPLICATION_CREDENTIALS'] if 'GOOGLE_APPLICATION_CREDENTIALS' in os.environ.keys(
        ) else ""

    if credentials_path:
        context.log(f"Using credentials from {credentials_path}")
        with open(credentials_path) as key_file:
            credentials_data = json.load(key_file)

        return await get_token(key=credentials_data['private_key'],
                               service=credentials_data['client_email'],
                               uri=credentials_data['token_uri'],
                               session=session)
    else:
        context.log("Trying to use default service account")
        return await create_default_service_account_token(context, session)
def create_dimension(
    name: str, value: Any,
    context: LoggingContext = LoggingContext(None)) -> DimensionValue:
    string_value = str(value)

    if len(name) > MAX_DIMENSION_NAME_LENGTH:
        context.log(
            f'MINT rejects dimension names longer that {MAX_DIMENSION_NAME_LENGTH} chars. Dimension name \"{name}\" "has been truncated'
        )
        name = name[:MAX_DIMENSION_NAME_LENGTH]
    if len(string_value) > MAX_DIMENSION_VALUE_LENGTH:
        context.log(
            f'MINT rejects dimension values longer that {MAX_DIMENSION_VALUE_LENGTH} chars. Dimension value \"{string_value}\" has been truncated'
        )
        string_value = string_value[:MAX_DIMENSION_VALUE_LENGTH]

    return DimensionValue(name, string_value)
    def __init__(self, context: LoggingContext, source: str, condition: str):
        self.source = source
        self.condition = condition
        for key in _CONDITION_COMPARATOR_MAP:
            if condition.startswith(key):
                self._evaluator = _CONDITION_COMPARATOR_MAP[key]
                break
        operands = re.findall(r"'(.*?)'", condition, re.DOTALL)
        self._operand = operands[0] if operands else None
        self._source_value_extractor = _SOURCE_VALUE_EXTRACTOR_MAP.get(source.casefold(), None)

        if not self._source_value_extractor:
            context.log(f"Unsupported source type: '{source}'")
            self.valid = False
        if not self._evaluator or not self._operand:
            context.log(f"Failed to parse condition macro for expression: '{condition}'")
            self.valid = False
Esempio n. 17
0
 def apply(self, context: LoggingContext, record: Dict,
           parsed_record: Dict):
     try:
         if self.common_rule:
             _apply_rule(context, self.common_rule, record, parsed_record)
         any_rule_applied = self._apply_rules(context, self.rules, record,
                                              parsed_record)
         any_audit_rule_applied = self._apply_rules(context,
                                                    self.audit_logs_rules,
                                                    record, parsed_record)
         # No matching rule has been found, applying the default rule
         no_rule_applied = not (any_rule_applied or any_audit_rule_applied)
         if no_rule_applied and self.default_rule:
             _apply_rule(context, self.default_rule, record, parsed_record)
     except Exception as e:
         context.t_exception(
             f"Encountered exception when running Rule Engine. {e}")
def _create_sources(context: LoggingContext, sources_json: List[Dict]) -> List[SourceMatcher]:
    result = []

    for source_json in sources_json:
        source = source_json.get("source", None)
        condition = source_json.get("condition", None)
        source_matcher = None

        if source and condition:
            source_matcher = SourceMatcher(context, source, condition)

        if source_matcher and source_matcher.valid:
            result.append(source_matcher)
        else:
            context.log(f"Encountered invalid rule source, parameters were: source= {source}, condition = {condition}")
            return []

    return result
Esempio n. 19
0
async def async_dynatrace_gcp_extension(
        project_ids: Optional[List[str]] = None,
        services: Optional[List[GCPService]] = None):
    """
    Used in docker or for tests
    """
    timestamp_utc = datetime.utcnow()
    timestamp_utc_iso = timestamp_utc.isoformat()
    execution_identifier = hashlib.md5(
        timestamp_utc_iso.encode("UTF-8")).hexdigest()
    logging_context = LoggingContext(execution_identifier)
    logging_context.log(f'Starting execution for project(s): {project_ids}'
                        if project_ids else "Starting execution")
    event_context = {
        'timestamp': timestamp_utc_iso,
        'event_id': timestamp_utc.timestamp(),
        'event_type': 'test',
        'execution_id': execution_identifier
    }
    data = {'data': '', 'publishTime': timestamp_utc_iso}

    start_time = time.time()
    await handle_event(data, event_context, project_ids, services)
    elapsed_time = time.time() - start_time
    logging_context.log(f"Execution took {elapsed_time}\n")
async def _loop_single_period(self_monitoring: LogSelfMonitoring,
                              sfm_queue: Queue, context: LoggingContext,
                              instance_metadata: InstanceMetadata):
    try:
        sfm_list = _pull_sfm(sfm_queue)
        if sfm_list:
            async with init_gcp_client_session() as gcp_session:
                context = await _create_sfm_logs_context(
                    sfm_queue, context, gcp_session, instance_metadata)
                self_monitoring = aggregate_self_monitoring_metrics(
                    self_monitoring, sfm_list)
                _log_self_monitoring_data(self_monitoring, context)
                if context.self_monitoring_enabled:
                    if context.token is None:
                        context.log(
                            "Cannot proceed without authorization token, failed to send log self monitoring"
                        )
                        return
                    if not isinstance(context.token, str):
                        context.log(
                            f"Failed to fetch access token, got non string value: {context.token}"
                        )
                        return
                    time_series = create_self_monitoring_time_series(
                        self_monitoring, context)
                    await push_self_monitoring_time_series(
                        context, time_series)
                for _ in sfm_list:
                    sfm_queue.task_done()
    except Exception:
        context.exception("Log SFM Loop Exception:")
def generate_ddu_estimation():
    supported_services = load_supported_services(LoggingContext(None), [])
    print("|| name || data points rate (/min) || estimated DDU rate (/min) (1 data point = 0.001 DDU)||")
    for supported_service in supported_services:
        data_points_per_minute = 0

        for metric in supported_service.metrics:
            dimensions_multiplier = (ASSUMED_AVG_DIMENSION_VALUES ** len(metric.dimensions))
            rate_per_minute = (metric.sample_period_seconds.seconds / 60.0)
            data_points_per_minute += rate_per_minute * dimensions_multiplier

        ddu_estimation = round(data_points_per_minute * DATA_POINT_WEIGHT, DECIMAL_PLACES)
        data_points_rate_estimation = round(data_points_per_minute, 0)
        print(f"| {supported_service.name} | {data_points_rate_estimation} | {ddu_estimation} |")
async def test_execution_expired_token():
    expected_cluster_response_code = 401
    expected_sent_requests = 3

    response(expected_cluster_response_code, "Expired token")

    ack_queue = Queue()
    sfm_queue = Queue()
    mock_subscriber_client = MockSubscriberClient(ack_queue)

    expected_ack_ids = [f"ACK_ID_{i}" for i in range(0, 10)]

    message_data_json = json.loads(LOG_MESSAGE_DATA)
    message_data_json["timestamp"] = datetime.utcnow().isoformat() + "Z"
    fresh_message_data = json.dumps(message_data_json)

    for ack_id in expected_ack_ids:
        message = create_fake_message(ack_id=ack_id, message_data=fresh_message_data)
        mock_subscriber_client.add_message(message)

    worker_state = WorkerState("TEST")
    perform_pull(worker_state, sfm_queue, mock_subscriber_client, "")
    # Flush down rest of messages
    perform_flush(worker_state, sfm_queue, mock_subscriber_client, "")

    metadata = InstanceMetadata(
        project_id="",
        container_name="",
        token_scopes="",
        service_account="",
        audience="",
        hostname="local deployment 2",
        zone="us-east1"
    )

    self_monitoring = LogSelfMonitoring()
    await log_self_monitoring._loop_single_period(self_monitoring, sfm_queue, LoggingContext("TEST"), metadata)
    sfm_queue.join()

    assert ack_queue.qsize() == 0

    verify_requests(expected_cluster_response_code, expected_sent_requests)

    assert self_monitoring.too_old_records == 0
    assert self_monitoring.parsing_errors == 0
    assert self_monitoring.records_with_too_long_content == 0
    assert Counter(self_monitoring.dynatrace_connectivity) == {DynatraceConnectivity.ExpiredToken: 3}
    assert self_monitoring.processing_time > 0
    assert self_monitoring.sending_time > 0
async def create_default_service_account_token(context: LoggingContext,
                                               session: ClientSession):
    """
    For reference check out https://github.com/googleapis/google-auth-library-python/tree/master/google/auth/compute_engine
    :param session:
    :return:
    """
    url = _METADATA_ROOT + "/instance/service-accounts/{0}/token".format(
        "default")
    try:
        response = await session.get(url, headers=_METADATA_HEADERS)
        if response.status >= 300:
            body = await response.text()
            context.log(
                f"Failed to authorize with Service Account from Metadata Service, response is {response.status} => {body}"
            )
            return None
        response_json = await response.json()
        return response_json["access_token"]
    except Exception as e:
        context.log(
            f"Failed to authorize with Service Account from Metadata Service due to '{e}'"
        )
        return None
async def test_empty_activation_config(mocker: MockerFixture,
                                       monkeypatch: MonkeyPatchFixture):
    # NO filestore/default configured
    monkeypatch.setenv("ACTIVATION_CONFIG", "{services: []}")

    dt_session = ClientSession()
    mocker.patch.object(dt_session, 'get', side_effect=mocked_get)

    extensions_fetcher = ExtensionsFetcher(dt_session, "", "",
                                           LoggingContext("TEST"))
    result = await extensions_fetcher.execute()
    assert_that(result).is_not_none()
    feature_sets_to_filter_conditions = {
        f"{gcp_service_config.name}/{gcp_service_config.feature_set}":
        gcp_service_config.monitoring_filter
        for gcp_service_config in result.services
    }
    assert_that(feature_sets_to_filter_conditions).is_equal_to({})
Esempio n. 25
0
def load_supported_services(context: LoggingContext) -> List[GCPService]:
    activation_yaml = read_activation_yaml()
    activation_config_per_service = get_activation_config_per_service(
        activation_yaml)
    feature_sets_from_activation_config = load_activated_feature_sets(
        context, activation_yaml)

    working_directory = os.path.dirname(os.path.realpath(__file__))
    config_directory = os.path.join(working_directory, "config")
    config_files = [
        file for file in listdir(config_directory)
        if isfile(os.path.join(config_directory, file)) and is_yaml_file(file)
    ]

    services = []
    for file in config_files:
        config_file_path = os.path.join(config_directory, file)
        try:
            with open(config_file_path, encoding="utf-8") as config_file:
                config_yaml = yaml.safe_load(config_file)
                technology_name = extract_technology_name(config_yaml)

                for service_yaml in config_yaml.get("gcp", {}):
                    service_name = service_yaml.get("service", "None")
                    featureSet = service_yaml.get("featureSet",
                                                  "default_metrics")
                    # If whitelist of services exists and current service is not present in it, skip
                    # If whitelist is empty - no services explicitly selected - load all available
                    whitelist_exists = feature_sets_from_activation_config.__len__(
                    ) > 0
                    if f'{service_name}/{featureSet}' in feature_sets_from_activation_config or not whitelist_exists:
                        activation = activation_config_per_service.get(
                            service_name, {})
                        services.append(
                            GCPService(tech_name=technology_name,
                                       **service_yaml,
                                       activation=activation))

        except Exception as error:
            context.log(
                f"Failed to load configuration file: '{config_file_path}'. Error details: {error}"
            )
            continue
    featureSets = [
        f"{service.name}/{service.feature_set}" for service in services
    ]
    if featureSets:
        context.log("Selected feature sets: " + ", ".join(featureSets))
    else:
        context.log("Empty feature sets. GCP services not monitored.")
    return services
Esempio n. 26
0
def run_ack_logs(worker_name: str, sfm_queue: Queue):
    logging_context = LoggingContext(worker_name)
    subscriber_client = pubsub.SubscriberClient()
    subscription_path = subscriber_client.subscription_path(
        LOGS_SUBSCRIPTION_PROJECT, LOGS_SUBSCRIPTION_ID)
    logging_context.log(f"Starting processing")

    worker_state = WorkerState(worker_name)
    while True:
        try:
            perform_pull(worker_state, sfm_queue, subscriber_client,
                         subscription_path)
        except Exception as e:
            if isinstance(e, Forbidden):
                logging_context.error(
                    f"{e} Please check whether assigned service account has permission to fetch Pub/Sub messages."
                )
            else:
                logging_context.exception("Failed to pull messages")
            # Backoff for 1 minute to avoid spamming requests and logs
            time.sleep(60)
def run_ack_logs(worker_name: str, sfm_queue: Queue):
    logging_context = LoggingContext(worker_name)
    subscriber_client = pubsub.SubscriberClient()
    subscription_path = subscriber_client.subscription_path(
        LOGS_SUBSCRIPTION_PROJECT, LOGS_SUBSCRIPTION_ID)
    logging_context.log(f"Starting processing")

    worker_state = WorkerState(worker_name)
    while True:
        try:
            perform_pull(worker_state, sfm_queue, subscriber_client,
                         subscription_path)
        except Exception as e:
            logging_context.exception("Failed to pull messages")
def generate_ddu_estimation():
    supported_services = load_supported_services(LoggingContext(None), [])

    print("|| Service name || Configuration || DDU per minute per instance ||")

    for supported_service in supported_services:
        data_points_per_minute = 0

        for metric in supported_service.metrics:
            dimensions_multiplier = (ASSUMED_AVG_DIMENSION_VALUES**len(
                metric.dimensions))
            rate_per_minute = (metric.sample_period_seconds.seconds / 60.0)
            data_points_per_minute += rate_per_minute * dimensions_multiplier

        ddu_estimation = round(data_points_per_minute * DATA_POINT_WEIGHT,
                               DECIMAL_PLACES)
        data_points_rate_estimation = round(data_points_per_minute, 0)
        feature_set = "/" + supported_service.feature_set

        print(
            f"| {supported_service.technology_name} | {supported_service.name}{feature_set} | {ddu_estimation} |"
        )
async def test_execute(mocker: MockerFixture, monkeypatch: MonkeyPatchFixture):
    # NO filestore/default configured
    monkeypatch.setenv("ACTIVATION_CONFIG", ACTIVATION_CONFIG)
    dt_session = ClientSession()
    mocker.patch.object(dt_session, 'get', side_effect=mocked_get)

    extensions_fetcher = ExtensionsFetcher(dt_session, "", "",
                                           LoggingContext("TEST"))
    result = await extensions_fetcher.execute()
    assert_that(result).is_not_none()
    feature_sets_to_filter_conditions = {
        f"{gcp_service_config.name}/{gcp_service_config.feature_set}":
        gcp_service_config.monitoring_filter
        for gcp_service_config in result.services
    }
    assert_that(feature_sets_to_filter_conditions).is_equal_to({
        "cloudsql_database/default_metrics":
        "",
        "gce_instance/default_metrics":
        "resource.labels.instance_name=starts_with(\"test\")",
        "gce_instance/agent":
        "resource.labels.instance_name=starts_with(\"test\")"
    })
async def async_dynatrace_gcp_extension():
    timestamp_utc = datetime.utcnow()
    timestamp_utc_iso = timestamp_utc.isoformat()
    execution_identifier = hashlib.md5(timestamp_utc_iso.encode("UTF-8")).hexdigest()
    logging_context = LoggingContext(execution_identifier)
    logging_context.log(f"Starting execution")
    event_context = {
        'timestamp': timestamp_utc_iso,
        'event_id': timestamp_utc.timestamp(),
        'event_type': 'test',
        'execution_id': execution_identifier
    }
    data = {'data': '', 'publishTime': timestamp_utc_iso}

    start_time = time.time()
    await handle_event(data, event_context, "dynatrace-gcp-extension")
    elapsed_time = time.time() - start_time
    logging_context.log(f"Execution took {elapsed_time}\n")