Beispiel #1
0
    def get_jobs_queues(self):
        """Returns a list of Location object for reading the jobs queue.

        A client can track a set of queues at the same time depending on the
        policy.

        In this implementation we track the client's individual jobs queue as
        well as a job queue for each client label. This allows very efficient
        hunting because writing a single job request in a label queue will run
        on all clients in that label.
        """
        # The jobs queue is world readable.
        result = [
            cloud.GCSUnauthenticatedLocation.from_keywords(
                session=self._session,
                bucket=self.manifest_location.bucket,
                path=utils.join_path(self.client_id, "jobs"))
        ]

        for label in self.labels:
            result.append(
                cloud.GCSUnauthenticatedLocation.from_keywords(
                    session=self._session,
                    bucket=self.manifest_location.bucket,
                    path=utils.join_path("labels", label, "jobs")))

        return result
Beispiel #2
0
 def flow_metadata_collection_for_server(self, client_id):
     if not client_id:
         raise RuntimeError("client id expected")
     return self.service_account.create_oauth_location(bucket=self.bucket,
                                                       path=utils.join_path(
                                                           client_id,
                                                           "flows.sqlite"))
Beispiel #3
0
 def ticket_for_server(self, batch_name, *args):
     """The location of the ticket queue for this batch."""
     return http.HTTPLocation.New(session=self._session,
                                  access=["READ", "LIST"],
                                  path_prefix=utils.join_path(
                                      "tickets", batch_name, *args),
                                  path_template="/")
Beispiel #4
0
    def _move_file_into_place(self, local_filename):
        # This is the new generation.
        generation = self._get_generation_from_timestamp(time.time())
        # Where shall we put the path.
        path = self.base_path
        if self.public:
            path = utils.join_path(".public", path)

        # FIXME: This must be done under lock.
        match_condition = self.headers.get("If-Match")
        current_generation = self._cache.get_generation(path)
        if (match_condition and current_generation != match_condition):
            os.unlink(local_filename)
            self.send_response(304)
            self.send_header("Content-Length", 0)
            self.send_header("ETag", '"%s"' % current_generation)
            self.end_headers()
            return

        self._cache.update_local_file_generation(path, generation,
                                                 local_filename)

        self.send_response(200)
        self.send_header("Content-Length", 0)
        self.send_header("ETag", '"%s"' % generation)
        self.end_headers()
Beispiel #5
0
 def vfs_prefix_for_client(self, client_id, path="", expiration=None,
                           vfs_type="files"):
     """Returns a Location suitable for storing a path using the prefix."""
     return self.service_account.create_signed_policy_location(
         bucket=self.bucket, path_prefix=utils.join_path(
             client_id, "vfs", vfs_type, path),
         path_template="{subpath}/{nonce}",
         expiration=expiration)
Beispiel #6
0
    def vfs_path_for_server(self, client_id, path, vfs_type="analysis"):
        """Returns a Location for storing the path in the client's VFS area.

        Passed to the agent to write on client VFS.
        """
        return http.HTTPLocation.New(session=self._session,
                                     path_prefix=utils.join_path(
                                         client_id, "vfs", vfs_type, path))
Beispiel #7
0
    def vfs_path_for_server(self, client_id, path, vfs_type="analysis"):
        """Returns a Location for storing the path in the client's VFS area.

        Passed to the agent to write on client VFS.
        """
        return self.service_account.create_oauth_location(
            bucket=self.bucket, path=utils.join_path(
                client_id, "vfs", vfs_type, path))
Beispiel #8
0
 def hunt_vfs_path_for_client(self, hunt_id, path_prefix="", expiration=None,
                              vfs_type="analysis",
                              path_template="{client_id}"):
     return self.service_account.create_signed_policy_location(
         bucket=self.bucket,
         path_prefix=utils.join_path(
             "hunts", hunt_id, "vfs", vfs_type, path_prefix),
         path_template=path_template + "/{nonce}",
         expiration=expiration)
Beispiel #9
0
    def vfs_path_for_client(self, client_id, path, mode="w", expiration=None,
                            vfs_type="analysis"):
        """Returns a Location for storing the path in the client's VFS area.

        Passed to the agent to write on client VFS.
        """
        return self.service_account.create_signed_url_location(
            bucket=self.bucket, mode=mode, path=utils.join_path(
                client_id, "vfs", vfs_type, path),
            expiration=expiration)
Beispiel #10
0
    def get_jobs_queues(self):
        # The jobs queue is world readable.
        result = [
            http.HTTPLocation.from_keywords(session=self._session,
                                            base=self.manifest_location.base,
                                            path_prefix=utils.join_path(
                                                self.client_id, "jobs"))
        ]
        for label in self.labels:
            result.append(
                http.HTTPLocation.from_keywords(
                    session=self._session,
                    base=self.manifest_location.base,
                    # Make sure to append the secret to the unauthenticated
                    # queues to prevent public (non deployment) access.
                    path_prefix=utils.join_path("labels", label, "jobs",
                                                self.secret)))

        return result
Beispiel #11
0
 def vfs_prefix_for_client(self,
                           client_id,
                           path="",
                           expiration=None,
                           vfs_type="files"):
     """Returns a Location suitable for storing a path using the prefix."""
     return http.HTTPLocation.New(session=self._session,
                                  access=["WRITE"],
                                  path_prefix=utils.join_path(
                                      client_id, "vfs", vfs_type, path),
                                  path_template="{subpath}/{nonce}",
                                  expiration=expiration)
Beispiel #12
0
 def hunt_vfs_path_for_client(self,
                              hunt_id,
                              path_prefix="",
                              expiration=None,
                              vfs_type="analysis",
                              path_template="{client_id}"):
     return http.HTTPLocation.New(session=self._session,
                                  access=["WRITE"],
                                  path_prefix=utils.join_path(
                                      "hunts", hunt_id, "vfs", vfs_type,
                                      path_prefix),
                                  path_template=path_template + "/{nonce}",
                                  expiration=expiration)
Beispiel #13
0
    def location_from_path_for_server(self, path):
        """Construct a location from a simple string path.

        Path is just a reference into the bucket of the form:

        {bucket_name}/{object_path}
        """
        if not path:
            path = self.bucket

        posix_path = pathlib.PurePosixPath(path.lstrip("/"))
        return self.service_account.create_oauth_location(
            bucket=posix_path.parts[0],
            path=utils.join_path(*posix_path.parts[1:]))
Beispiel #14
0
    def _get_parameters(self, if_modified_since=None, **kwargs):
        subpath = self.expand_path(**kwargs)
        path = utils.join_path(self.path_prefix, subpath)
        base_url = _join_url(self.base, path)
        headers = {
            "Cache-Control": "private",
            "x-rekall-policy": base64.b64encode(self.policy),
            "x-rekall-signature": base64.b64encode(self.signature),
        }

        if if_modified_since:
            headers["If-Modified-Since"] = handlers.format_date_time(
                if_modified_since)

        return base_url, {}, headers, path
Beispiel #15
0
    def flow_ticket_for_client(self, batch_name, *ticket_names, **kw):
        """Returns a Location for the client to write tickets.

        When we issue requests to the client, we need to allow the client to
        report progress about the progress of the flow requests running on the
        client. We do this by instructing the client to write a "Flow Ticket" to
        the ticket location.
        """
        expiration = kw.pop("expiration", None)
        path_template = kw.pop("path_template", None)
        return self.service_account.create_signed_policy_location(
            bucket=self.bucket,
            path_prefix=utils.join_path("tickets", batch_name, *ticket_names),
            path_template=path_template + "/{nonce}",
            expiration=expiration)
Beispiel #16
0
    def flow_ticket_for_client(self, batch_name, *ticket_names, **kw):
        """Returns a Location for the client to write tickets.

        When we issue requests to the client, we need to allow the client to
        report progress about the progress of the flow requests running on the
        client. We do this by instructing the client to write a "Flow Ticket" to
        the ticket location.
        """
        expiration = kw.pop("expiration", None)
        path_template = kw.pop("path_template", None)
        return http.HTTPLocation.New(session=self._session,
                                     access=["WRITE"],
                                     path_prefix=utils.join_path(
                                         "tickets", batch_name, *ticket_names),
                                     path_template=path_template + "/{nonce}",
                                     expiration=expiration)
Beispiel #17
0
    def _get_parameters(self, **kwargs):
        """Calculates the params for the request."""
        subpath = self.expand_path(**kwargs)
        key = utils.join_path(self.bucket, self.path_prefix, subpath)

        url_endpoint = "https://storage.googleapis.com/"
        params = dict(GoogleAccessId=self.GoogleAccessId,
                      Signature=base64.b64encode(self.signature),
                      Policy=base64.b64encode(self.policy),
                      bucket=self.bucket,
                      key=key)

        params["content-encoding"] = "gzip"
        headers = {"content-encoding": "gzip"}

        return url_endpoint, params, headers, key
Beispiel #18
0
    def create_signed_policy_location(self,
                                      expiration=None,
                                      path_prefix=None,
                                      bucket=None,
                                      path_template=None):
        """Generate a GCSSignedPolicyLocation object.

        The generated Location object grants its possessor the respected acl
        rights for all paths starting with the specified prefix. Note that
        GCSSignedPolicyLocation is only useful for writing.

        https://cloud.google.com/storage/docs/xml-api/post-object#policydocument
        """
        if expiration is None:
            expiration = int(time.time()) + 60 * 60

        # If the bucket is not specified take it from the server's config.
        if bucket is None:
            bucket = self._config.server.bucket

        policy = dict(
            expiration=arrow.get(expiration).isoformat(),
            conditions=[
                ["starts-with", "$key",
                 utils.join_path(bucket, path_prefix)],
                {
                    "bucket": bucket
                },
                {
                    "Content-Encoding": "gzip"
                },
            ])

        encoded_policy = json.dumps(policy, sort_keys=True)
        signature = self._sign_blob(base64.b64encode(encoded_policy))

        return GCSSignedPolicyLocation.from_keywords(
            session=self._session,
            policy=encoded_policy,
            signature=signature,
            bucket=bucket,
            path_prefix=path_prefix,
            path_template=path_template,
            GoogleAccessId=self.client_email,
            expiration=expiration)
Beispiel #19
0
    def jobs_queue_for_server(self, client_id=None, queue=None):
        """Returns a Location for the client's job queue.

        Used by the server to manipulate the client's job queue.

        If a queue is specified, the jobs file is shared under this public queue
        name. Otherwise the jobs file is private to the client_id.
        """
        if queue:
            return self.service_account.create_oauth_location(
                bucket=self.bucket, path="labels/%s/jobs" % queue,
                public=True)

        # The client's jobs queue itself is publicly readable since the client
        # itself has no credentials.
        return self.service_account.create_oauth_location(
            bucket=self.bucket, path=utils.join_path(client_id, "jobs"),
            public=True)
Beispiel #20
0
    def list_files(self,
                   completion_routine=None,
                   paging=100,
                   max_results=100,
                   **kwargs):
        """A generator of Location object below this one."""
        _, params, headers, _ = self._get_parameters(**kwargs)
        url_endpoint = ("https://www.googleapis.com/storage/v1/b/%s/o" %
                        self.bucket)

        params["prefix"] = utils.join_path(self.path)
        params["maxResults"] = paging
        count = 0
        while count < max_results:
            resp = self.get_requests_session().get(url_endpoint,
                                                   params=params,
                                                   headers=headers)

            if not resp.ok:
                self._report_error(completion_routine, resp)
                return

            data = json.loads(resp.text)
            items = data.get("items", [])
            for item in items:
                sublocation = self.copy()
                sublocation.path = item["name"]
                sublocation.generation = item["generation"]

                count += 1
                yield location.LocationStat.from_keywords(
                    session=self._session,
                    location=sublocation,
                    size=item["size"],
                    generation=item["generation"],
                    created=arrow.get(item["timeCreated"]).timestamp,
                    updated=arrow.get(item["updated"]).timestamp)

            next_page_token = data.get("nextPageToken")
            if not next_page_token or not items:
                break

            params["pageToken"] = next_page_token
Beispiel #21
0
    def jobs_queue_for_server(self, client_id=None, queue=None):
        """Returns a Location for the client's job queue.

        Used by the server to manipulate the client's job queue.

        If a queue is specified, the jobs file is shared under this public queue
        name. Otherwise the jobs file is private to the client_id.
        """
        if queue:
            return http.HTTPLocation.New(session=self._session,
                                         path_prefix="labels/%s/jobs/%s" %
                                         (queue, self._config.client.secret),
                                         public=True)

        # The client's jobs queue itself is publicly readable since the client
        # itself has no credentials.
        return http.HTTPLocation.New(session=self._session,
                                     path_prefix=utils.join_path(
                                         client_id, "jobs"),
                                     public=True)
Beispiel #22
0
    def do_GET(self):
        """Serve the server pem with GET requests."""
        self._parse_qs()

        # This is an API call.
        if "action" in self.params:
            self.serve_api(self.base_path, self.params)
            return

        if self.authenticate("READ"):
            self.serve_static(self.base_path)
            return

        else:
            public_path = utils.join_path(".public", self.base_path)
            generation = self._cache.get_generation(public_path)
            if generation:
                self.serve_static(public_path)
                return

        # Not authorized.
        self.send_error(403, "You are not authorized to view this location.")
Beispiel #23
0
    def do_GET(self):
        """Serve the server pem with GET requests."""
        if self.authenticate("GET"):
            if "?" in self.path:
                path, qs = self.path.split("?", 1)
                params = urlparse.parse_qs(qs)
                if "action" in params:
                    self.serve_api(path, params)
                    return

            self.serve_static(self.path)
            return

        else:
            public_path = utils.join_path(".public", self.path)
            generation = self._cache.get_generation(public_path)
            if generation:
                self.serve_static(public_path)
                return

        # Not authorized.
        self.send_error(403, "You are not authorized to view this location.")
Beispiel #24
0
    def vfs_path_for_client(self,
                            client_id,
                            path,
                            mode="w",
                            expiration=None,
                            vfs_type="analysis"):
        """Returns a Location for storing the path in the client's VFS area.

        Passed to the agent to write on client VFS.
        """
        if mode == "r":
            access = ["READ"]
        elif mode == "w":
            access = ["WRITE"]
        else:
            raise ValueError("Invalid mode")

        return http.HTTPLocation.New(session=self._session,
                                     access=access,
                                     path_prefix=utils.join_path(
                                         client_id, "vfs", vfs_type, path),
                                     expiration=expiration)
Beispiel #25
0
 def ticket_for_server(self, batch_name, *args):
     """The location of the ticket queue for this batch."""
     return self.service_account.create_oauth_location(
         bucket=self.bucket,
         path=utils.join_path("tickets", batch_name, *args))
Beispiel #26
0
 def flows_for_server(self, flow_id):
     """A location to write flow objects."""
     return self.service_account.create_oauth_location(bucket=self.bucket,
                                                       path=utils.join_path(
                                                           "flows",
                                                           flow_id))
Beispiel #27
0
 def vfs_index_for_server(self, client_id=None):
     return self.service_account.create_oauth_location(bucket=self.bucket,
                                                       path=utils.join_path(
                                                           client_id,
                                                           "vfs.index"))
Beispiel #28
0
 def get_canonical(self, **kwargs):
     return GCSLocation.from_keywords(session=self._session,
                                      bucket=self.bucket,
                                      path=utils.join_path(
                                          self.path_prefix,
                                          self.expand_path(**kwargs)))
Beispiel #29
0
 def to_path(self):
     return utils.join_path(self.bucket, self.path)
Beispiel #30
0
    def create_signed_url_location(self,
                                   mode="r",
                                   expiration=None,
                                   path=None,
                                   bucket=None,
                                   upload="direct",
                                   headers=None,
                                   public=False):
        """A Factory for GCSSignedURLLocation() instances.

        Args:
          mode: Can be "r" for reading, "w" for writing.
          expiration: When this URL should expire. By default 1 hour.
          path: The path within the bucket for the object.
          bucket: The bucket name.
        """
        if headers is None:
            headers = GCSHeaders(session=self._session)
            if public:
                headers.SetMember("x-goog-acl", "public-read")
        elif isinstance(headers, dict):
            headers = GCSHeaders.from_primitive(headers, self._session)

        if mode == "r":
            method = "GET"

        elif mode == "w":
            method = "PUT"

            if upload == "resumable":
                method = "POST"
                # Resumable uploads require these headers.
                headers.SetMember("x-goog-resumable", "start")

        else:
            raise IOError("Mode not supported")

        if expiration is None:
            # Default 1 hour from now.
            expiration = time.time() + 60 * 60

        # If the bucket is not specified take it from the server's config.
        if bucket is None:
            bucket = self._config.server.bucket

        # Build the signed string according to
        # https://cloud.google.com/storage/docs/access-control/signed-urls#string-components
        components = []
        components.append(method)  # HTTP_Verb
        components.append("")  # Content_MD5
        components.append("")  # Content_Type
        components.append(str(int(expiration)))  # Expiration
        for k, v in sorted(headers.to_primitive(False).iteritems()):
            components.append("%s:%s" % (k, v))

        path = urllib.quote(path, safe="/:")
        base_url = "/" + utils.join_path(bucket, path)

        components.append(base_url)  # Canonicalized_Resource

        signature_string = "\n".join(components)
        return GCSSignedURLLocation.from_keywords(
            session=self._session,
            signature=self._sign_blob(signature_string),
            GoogleAccessId=self.client_email,
            expiration=expiration,
            bucket=bucket,
            path=path,
            method=method,
            headers=headers,
            upload=upload,
        )