def _calculate_metric_interval(metric_variant: dict, samples: int) -> int:
        """
        Calculate index interval per metric_variant variant so that the
        total amount of intervals does not exceeds the samples
        """
        count = safe_get(metric_variant, "count/value")
        if not count or count < samples:
            return 1

        min_index = safe_get(metric_variant, "min_index/value", default=0)
        max_index = safe_get(metric_variant, "max_index/value", default=min_index)
        return max(1, int(max_index - min_index + 1) // samples)
Example #2
0
 def _get_metric_fields(metrics: Sequence[dict]) -> dict:
     names = {
         "cpu_usage": "cpu_usage",
         "memory_used": "mem_used_gb",
         "memory_free": "mem_free_gb",
     }
     return {
         names[m["key"]]: {
             "min": safe_get(m, "min/value"),
             "max": safe_get(m, "max/value"),
             "avg": safe_get(m, "avg/value"),
         }
         for m in metrics if m["key"] in names
     }
    def _get_task_metrics(self, task_id, es_index, event_type: EventType) -> Sequence:
        es_req = {
            "size": 0,
            "query": {
                "bool": {
                    "must": [
                        {"term": {"task": task_id}},
                        {"term": {"type": event_type.value}},
                    ]
                }
            },
            "aggs": {
                "metrics": {
                    "terms": {"field": "metric", "size": self.MAX_METRICS_COUNT}
                }
            },
        }

        with translate_errors_context(), TimingContext("es", "_get_task_metrics"):
            es_res = self.es.search(index=es_index, body=es_req, routing=task_id)

        return [
            metric["key"]
            for metric in safe_get(es_res, "aggregations/metrics/buckets", default=[])
        ]
Example #4
0
 def _get_active_workers(cls, company_id, from_timestamp: int,
                         to_timestamp: int) -> dict:
     es_req = {
         "size": 0,
         "query": QueryBuilder.dates_range(from_timestamp, to_timestamp),
         "aggs": {
             "workers": {
                 "terms": {
                     "field": "worker"
                 },
                 "aggs": {
                     "last_activity_time": {
                         "max": {
                             "field": "timestamp"
                         }
                     }
                 },
             }
         },
     }
     res = cls._run_worker_stats_query(company_id, es_req)
     buckets = safe_get(res, "aggregations/workers/buckets", default=[])
     return {
         b["key"]: {
             "last_activity_time": b["last_activity_time"]["value"]
         }
         for b in buckets
     }
Example #5
0
 def _get_cardinality_fields(categories: Sequence[dict]) -> dict:
     names = {"cpu": "num_cores"}
     return {
         names[c["key"]]: safe_get(c, "count/value")
         for c in categories
         if c["key"] in names
     }
Example #6
0
def get_page(page_number):
    """Returns the HTML of a specific list page or None if response failed"""
    url = "https://www.imdb.com/list/" + list_id + "/?sort=list_order,asc&st_dt=&mode=detail&page=" + str(
        page_number)

    response = safe_get(url)

    if response is None:
        return None
    else:
        return response.text
def get_movie_synopsis(movie_id):
    response = safe_get('https://www.imdb.com/title/' + movie_id +
                        '/plotsummary')

    if response is None:
        return None

    movie_page_html = BeautifulSoup(response.content, 'html.parser')
    if movie_page_html.select('#no-synopsis-content'):
        return None
    else:
        synopsis_html = movie_page_html.findAll(
            'li', id=lambda x: x and x.startswith('synopsis-'))[0]
        return remove_spaces(synopsis_html)
Example #8
0
def unprepare_from_saved(call: APICall, tasks_data: Union[Sequence[dict], dict]):
    if isinstance(tasks_data, dict):
        tasks_data = [tasks_data]

    conform_output_tags(call, tasks_data)

    for task_data in tasks_data:
        parameters = safe_get(task_data, "execution/parameters")
        if parameters is not None:
            # Escape keys to make them mongo-safe
            parameters = {
                ParameterKeyEscaper.unescape(k): v for k, v in parameters.items()
            }
            dpath.set(task_data, "execution/parameters", parameters)
Example #9
0
def prepare_create_fields(call: APICall,
                          valid_fields=None,
                          output=None,
                          previous_task: Task = None):
    valid_fields = valid_fields if valid_fields is not None else create_fields
    t_fields = task_fields
    t_fields.add("output_dest")

    fields = parse_from_call(call.data, valid_fields, t_fields)

    # Move output_dest to output.destination
    output_dest = fields.get("output_dest")
    if output_dest is not None:
        fields.pop("output_dest")
        if output:
            output.destination = output_dest
        else:
            output = Output(destination=output_dest)
        fields["output"] = output

    try:
        dpath.delete(fields, "script/requirements")
    except dpath.exceptions.PathNotFound:
        pass

    # Make sure there are no duplicate tags
    tags = fields.get("tags")
    if tags:
        fields["tags"] = list(set(tags))

    # Strip all script fields (remove leading and trailing whitespace chars) to avoid unusable names and paths
    for field in task_script_fields:
        try:
            path = "script/%s" % field
            value = dpath.get(fields, path)
            if isinstance(value, six.string_types):
                value = value.strip()
            dpath.set(fields, path, value)
        except KeyError:
            pass

    parameters = safe_get(fields, "execution/parameters")
    if parameters is not None:
        parameters = {k.strip(): v for k, v in parameters.items()}
        dpath.set(fields, "execution/parameters", parameters)

    return fields
Example #10
0
def prepare_for_save(call: APICall, fields: dict):
    conform_tag_fields(call, fields)

    # Strip all script fields (remove leading and trailing whitespace chars) to avoid unusable names and paths
    for field in task_script_fields:
        try:
            path = f"script/{field}"
            value = dpath.get(fields, path)
            if isinstance(value, str):
                value = value.strip()
            dpath.set(fields, path, value)
        except KeyError:
            pass

    parameters = safe_get(fields, "execution/parameters")
    if parameters is not None:
        # Escape keys to make them mongo-safe
        parameters = {ParameterKeyEscaper.escape(k): v for k, v in parameters.items()}
        dpath.set(fields, "execution/parameters", parameters)

    return fields
Example #11
0
    def _get_resource_stats_per_agent(cls, company_id: str, key: str) -> dict:
        agent_resource_threshold_sec = timedelta(hours=config.get(
            "apiserver.statistics.report_interval_hours", 24)).total_seconds()
        to_timestamp = int(time.time())
        from_timestamp = to_timestamp - int(agent_resource_threshold_sec)
        es_req = {
            "size": 0,
            "query": QueryBuilder.dates_range(from_timestamp, to_timestamp),
            "aggs": {
                "workers": {
                    "terms": {
                        "field": "worker"
                    },
                    "aggs": {
                        "categories": {
                            "terms": {
                                "field": "category"
                            },
                            "aggs": {
                                "count": {
                                    "cardinality": {
                                        "field": "variant"
                                    }
                                }
                            },
                        },
                        "metrics": {
                            "terms": {
                                "field": "metric"
                            },
                            "aggs": {
                                "min": {
                                    "min": {
                                        "field": "value"
                                    }
                                },
                                "max": {
                                    "max": {
                                        "field": "value"
                                    }
                                },
                                "avg": {
                                    "avg": {
                                        "field": "value"
                                    }
                                },
                            },
                        },
                    },
                }
            },
        }
        res = cls._run_worker_stats_query(company_id, es_req)

        def _get_cardinality_fields(categories: Sequence[dict]) -> dict:
            names = {"cpu": "num_cores"}
            return {
                names[c["key"]]: safe_get(c, "count/value")
                for c in categories if c["key"] in names
            }

        def _get_metric_fields(metrics: Sequence[dict]) -> dict:
            names = {
                "cpu_usage": "cpu_usage",
                "memory_used": "mem_used_gb",
                "memory_free": "mem_free_gb",
            }
            return {
                names[m["key"]]: {
                    "min": safe_get(m, "min/value"),
                    "max": safe_get(m, "max/value"),
                    "avg": safe_get(m, "avg/value"),
                }
                for m in metrics if m["key"] in names
            }

        buckets = safe_get(res, "aggregations/workers/buckets", default=[])
        return {
            b["key"]: {
                key: {
                    "interval_sec":
                    agent_resource_threshold_sec,
                    **_get_cardinality_fields(
                        safe_get(b, "categories/buckets", [])),
                    **_get_metric_fields(safe_get(b, "metrics/buckets", [])),
                }
            }
            for b in buckets
        }