Ejemplo n.º 1
0
    def start_sender(cls):
        if not cls.supported:
            return

        url = config.get("apiserver.statistics.url")

        retries = config.get("apiserver.statistics.max_retries", 5)
        max_backoff = config.get("apiserver.statistics.max_backoff_sec", 5)
        session = requests.Session()
        adapter = HTTPAdapter(max_retries=Retry(retries))
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        session.headers["Content-type"] = "application/json"

        WarningFilter.attach()

        while not ThreadsManager.terminating:
            try:
                report = cls.send_queue.get()

                # Set a random backoff factor each time we send a report
                adapter.max_retries.backoff_factor = random.random(
                ) * max_backoff

                session.post(url, data=dumps(report))

            except Exception as ex:
                pass
Ejemplo n.º 2
0
    def generate():
        scroll_id = None
        batch_size = 1000
        while True:
            log_events, scroll_id, _ = event_bll.scroll_task_events(
                task.company,
                task_id,
                order="asc",
                event_type="log",
                batch_size=batch_size,
                scroll_id=scroll_id,
            )
            if not log_events:
                break
            for ev in log_events:
                ev["asctime"] = ev.pop("timestamp")
                if is_json:
                    ev.pop("type")
                    ev.pop("task")
                    yield json.dumps(ev) + "\n"
                else:
                    try:
                        yield line_format.format(**ev)
                    except KeyError as ex:
                        raise errors.bad_request.FieldsValueError(
                            "undefined placeholders in line format",
                            placeholders=[str(ex)],
                        )

            if len(log_events) < batch_size:
                break
Ejemplo n.º 3
0
    def generate():
        scroll_id = None
        batch_size = 1000
        while True:
            log_events, scroll_id, _ = event_bll.scroll_task_events(
                company_id,
                task_id,
                order="asc",
                event_type="log",
                batch_size=batch_size,
                scroll_id=scroll_id)
            if not log_events:
                break
            for ev in log_events:
                ev['asctime'] = ev.pop('@timestamp')
                if is_json:
                    ev.pop('type')
                    ev.pop('task')
                    yield json.dumps(ev) + '\n'
                else:
                    try:
                        yield line_format.format(**ev)
                    except KeyError as ex:
                        raise errors.bad_request.FieldsValueError(
                            'undefined placeholders in line format',
                            placeholders=[str(ex)])

            if len(log_events) < batch_size:
                break
Ejemplo n.º 4
0
    def get_tags(self,
                 company,
                 include_system: bool = False,
                 filter_: Sequence[str] = None) -> dict:
        """
        Get tags and optionally system tags for the company
        Return the dictionary of tags per tags field name
        The function retrieves both cached values from Redis in one call
        and re calculates any of them if missing in Redis
        """
        fields = [
            self._tags_field,
            *([self._system_tags_field] if include_system else []),
        ]
        redis_keys = [
            self._get_tags_cache_key(company, f, filter_) for f in fields
        ]
        cached = self.redis.mget(redis_keys)
        ret = {}
        for field, tag_data, key in zip(fields, cached, redis_keys):
            if tag_data is not None:
                tags = json.loads(tag_data)
            else:
                tags = list(self._get_tags_from_db(company, field, filter_))
                self.redis.setex(
                    key,
                    time=self._tags_cache_expiration_seconds,
                    value=json.dumps(tags),
                )
            ret[field] = tags

        return ret
Ejemplo n.º 5
0
    def _export_task_events(
        cls, task: Task, base_filename: str, writer: ZipFile, hash_
    ) -> Sequence[str]:
        artifacts = []
        filename = f"{base_filename}_{task.id}{cls.events_file_suffix}.json"
        print(f"Writing task events into {writer.filename}:{filename}")
        with BytesIO() as f:
            with cls.JsonLinesWriter(f) as w:
                scroll_id = None
                while True:
                    res = cls.event_bll.get_task_events(
                        task.company, task.id, scroll_id=scroll_id
                    )
                    if not res.events:
                        break
                    scroll_id = res.next_scroll_id
                    for event in res.events:
                        if event.get("type") == "training_debug_image":
                            url = cls._get_fixed_url(event.get("url"))
                            if url:
                                event["url"] = url
                                artifacts.append(url)
                        w.write(json.dumps(event))
            data = f.getvalue()
            hash_.update(data)
            writer.writestr(filename, data)

        return artifacts
Ejemplo n.º 6
0
 def _write_update_file(cls, map_file: Path, entities: dict,
                        created_files: Sequence[str]):
     map_data = cls._MapData(
         files=created_files,
         entities={
             entity.id: cls._get_last_update_time(entity)
             for entity in chain.from_iterable(entities.values())
         },
     )
     map_file.write_text(json.dumps(attr.asdict(map_data)))
Ejemplo n.º 7
0
    def get_response(self):
        def make_version_number(version):
            """
            Client versions <=2.0 expect expect endpoint versions in float format, otherwise throwing an exception
            """
            if version is None:
                return None
            if self.requested_endpoint_version < PartialVersion("2.1"):
                return float(str(version))
            return str(version)

        if self.result.raw_data and not self.failed:
            # endpoint returned raw data and no error was detected, return raw data, no fancy dicts
            return self.result.raw_data, self.result.content_type

        else:
            res = {
                "meta": {
                    "id": self.id,
                    "trx": self.trx,
                    "endpoint": {
                        "name":
                        self.endpoint_name,
                        "requested_version":
                        make_version_number(self.requested_endpoint_version),
                        "actual_version":
                        make_version_number(self.actual_endpoint_version),
                    },
                    "result_code": self.result.code,
                    "result_subcode": self.result.subcode,
                    "result_msg": self.result.msg,
                    "error_stack": self.result.traceback,
                },
                "data": self.result.data,
            }
            if self.content_type.lower() == JSON_CONTENT_TYPE:
                with TimingContext("json", "serialization"):
                    try:
                        res = json.dumps(res)
                    except Exception as ex:
                        # JSON serialization may fail, probably problem with data so pop it and try again
                        if not self.result.data:
                            raise
                        self.result.data = None
                        msg = "Error serializing response data: " + str(ex)
                        self.set_error_result(code=500,
                                              subcode=0,
                                              msg=msg,
                                              include_stack=False)
                        return self.get_response()

            return res, self.content_type
Ejemplo n.º 8
0
def set_preferences(call, company_id, req_model):
    # type: (APICall, str, SetPreferencesRequest) -> Dict
    assert isinstance(call, APICall)
    changes = req_model.preferences

    def invalid_key(_, key, __):
        if not isinstance(key, str):
            return True
        elif key.startswith("$") or "." in key:
            raise errors.bad_request.FieldsValueError(
                f"Key {key} is invalid. Keys cannot start with '$' or contain '.'."
            )
        return True

    remap(changes, visit=invalid_key)

    base_preferences = get_user_preferences(call)
    new_preferences = deepcopy(base_preferences)
    for key, value in changes.items():
        try:
            dpath.new(new_preferences, key, value, separator=".")
        except Exception:
            log.exception(
                'invalid preferences update for user "{}": key=`%s`, value=`%s`',
                key,
                value,
            )
            raise errors.bad_request.InvalidPreferencesUpdate(key=key,
                                                              value=value)

    if new_preferences == base_preferences:
        updated, fields = 0, {}
    else:
        with translate_errors_context("updating user preferences"):
            updated = User.objects(id=call.identity.user,
                                   company=company_id).update(
                                       upsert=False,
                                       preferences=dumps(new_preferences))

    return {
        "updated": updated,
        "fields": {
            "preferences": new_preferences
        } if updated else {},
    }
Ejemplo n.º 9
0
 def _write_update_file(
     cls,
     map_file: Path,
     entities: dict,
     created_files: Sequence[str],
     metadata_hash: str,
 ):
     map_file.write_text(
         json.dumps(
             dict(
                 files=created_files,
                 entities={
                     entity.id: cls._get_last_update_time(entity)
                     for entity in chain.from_iterable(entities.values())
                 },
                 metadata_hash=metadata_hash,
             )
         )
     )
Ejemplo n.º 10
0
 def to_json(self: ModelBase):
     return dumps(self.to_struct())
Ejemplo n.º 11
0
    def export_to_zip(
        cls,
        filename: str,
        experiments: Sequence[str] = None,
        projects: Sequence[str] = None,
        artifacts_path: str = None,
        task_statuses: Sequence[str] = None,
        tag_exported_entities: bool = False,
        metadata: Mapping[str, Any] = None,
    ) -> Sequence[str]:
        if task_statuses and not set(task_statuses).issubset(get_options(TaskStatus)):
            raise ValueError("Invalid task statuses")

        file = Path(filename)
        entities = cls._resolve_entities(
            experiments=experiments, projects=projects, task_statuses=task_statuses
        )

        hash_ = hashlib.md5()
        if metadata:
            meta_str = json.dumps(metadata)
            hash_.update(meta_str.encode())
            metadata_hash = hash_.hexdigest()
        else:
            meta_str, metadata_hash = "", ""

        map_file = file.with_suffix(".map")
        updated, old_files = cls._check_for_update(
            map_file, entities=entities, metadata_hash=metadata_hash
        )
        if not updated:
            print(f"There are no updates from the last export")
            return old_files

        for old in old_files:
            old_path = Path(old)
            if old_path.is_file():
                old_path.unlink()

        with ZipFile(file, **cls.zip_args) as zfile:
            if metadata:
                zfile.writestr(cls.metadata_filename, meta_str)
            artifacts = cls._export(
                zfile,
                entities=entities,
                hash_=hash_,
                tag_entities=tag_exported_entities,
            )

        file_with_hash = file.with_name(f"{file.stem}_{hash_.hexdigest()}{file.suffix}")
        file.replace(file_with_hash)
        created_files = [str(file_with_hash)]

        artifacts = cls._filter_artifacts(artifacts)
        if artifacts and artifacts_path and os.path.isdir(artifacts_path):
            artifacts_file = file_with_hash.with_suffix(cls.artifacts_ext)
            with ZipFile(artifacts_file, **cls.zip_args) as zfile:
                cls._export_artifacts(zfile, artifacts, artifacts_path)
            created_files.append(str(artifacts_file))

        cls._write_update_file(
            map_file,
            entities=entities,
            created_files=created_files,
            metadata_hash=metadata_hash,
        )

        return created_files
Ejemplo n.º 12
0
 def to_json(self):
     return dumps(self.to_struct())