def _retry_request(self, request, timeout=2, attempts=3):
        """The Google Python API client frequently has BrokenPipe errors. This
        function takes a request, and executes it up to number of retry,
        each time with a 2* increase in timeout.

        Parameters
        ==========
        request: the Google Cloud request that needs to be executed
        timeout: time to sleep (in seconds) before trying again
        attempts: remaining attempts, throw error when hit 0
        """
        import googleapiclient

        try:
            return request.execute()
        except BrokenPipeError as ex:
            if attempts > 0:
                time.sleep(timeout)
                return self._retry_request(request, timeout * 2, attempts - 1)
            raise ex
        except googleapiclient.errors.HttpError as ex:
            log_verbose_traceback(ex)
            raise ex
        except Exception as ex:
            log_verbose_traceback(ex)
            raise ex
    def _get_bucket(self):
        """get a connection to the storage bucket (self.bucket) and exit
        if the name is taken or otherwise invalid.

        Parameters
        ==========
        workflow: the workflow object to derive the prefix from
        """
        import google

        # Hold path to requested subdirectory and main bucket
        bucket_name = self.workflow.default_remote_prefix.split("/")[0]
        self.gs_subdir = re.sub("^{}/".format(bucket_name), "",
                                self.workflow.default_remote_prefix)
        self.gs_logs = os.path.join(self.gs_subdir, "google-lifesciences-logs")

        # Case 1: The bucket already exists
        try:
            self.bucket = self._bucket_service.get_bucket(bucket_name)

        # Case 2: The bucket needs to be created
        except google.cloud.exceptions.NotFound:
            self.bucket = self._bucket_service.create_bucket(bucket_name)

        # Case 2: The bucket name is already taken
        except Exception as ex:
            logger.error("Cannot get or create {} (exit code {}):\n{}".format(
                bucket_name, ex.returncode, ex.output.decode()))
            log_verbose_traceback(ex)
            raise ex

        logger.debug("bucket=%s" % self.bucket.name)
        logger.debug("subdir=%s" % self.gs_subdir)
        logger.debug("logs=%s" % self.gs_logs)
    def _get_services(self):
        """use the Google Discovery Build to generate API clients
        for Life Sciences, and use the google storage python client
        for storage.
        """
        from googleapiclient.discovery import build as discovery_build
        from oauth2client.client import (
            GoogleCredentials,
            ApplicationDefaultCredentialsError,
        )
        from google.cloud import storage

        # Credentials must be exported to environment
        try:
            creds = GoogleCredentials.get_application_default()
        except ApplicationDefaultCredentialsError as ex:
            log_verbose_traceback(ex)
            raise ex

        # Discovery clients for Google Cloud Storage and Life Sciences API
        self._storage_cli = discovery_build("storage",
                                            "v1",
                                            credentials=creds,
                                            cache_discovery=False)
        self._compute_cli = discovery_build("compute",
                                            "v1",
                                            credentials=creds,
                                            cache_discovery=False)
        self._api = discovery_build("lifesciences",
                                    "v2beta",
                                    credentials=creds,
                                    cache_discovery=False)
        self._bucket_service = storage.Client()
Esempio n. 4
0
    def _get_services(self):
        """
        Use the Google Discovery Build to generate API clients
        for Life Sciences, and use the google storage python client
        for storage.
        """
        from googleapiclient.discovery import build as discovery_build
        from google.cloud import storage
        import google.auth
        import google_auth_httplib2
        import httplib2
        import googleapiclient

        # Credentials must be exported to environment
        try:
            # oauth2client is deprecated, see: https://google-auth.readthedocs.io/en/master/oauth2client-deprecation.html
            # google.auth is replacement
            # not sure about scopes here. this cover all cloud services
            creds, _ = google.auth.default(
                scopes=["https://www.googleapis.com/auth/cloud-platform"])
        except google.auth.DefaultCredentialsError as ex:
            log_verbose_traceback(ex)
            raise ex

        def build_request(http, *args, **kwargs):
            """
            See https://googleapis.github.io/google-api-python-client/docs/thread_safety.html
            """
            new_http = google_auth_httplib2.AuthorizedHttp(
                creds, http=httplib2.Http())
            return googleapiclient.http.HttpRequest(new_http, *args, **kwargs)

        # Discovery clients for Google Cloud Storage and Life Sciences API
        # create authorized http for building services
        authorized_http = google_auth_httplib2.AuthorizedHttp(
            creds, http=httplib2.Http())
        self._storage_cli = discovery_build(
            "storage",
            "v1",
            cache_discovery=False,
            requestBuilder=build_request,
            http=authorized_http,
        )
        self._compute_cli = discovery_build(
            "compute",
            "v1",
            cache_discovery=False,
            requestBuilder=build_request,
            http=authorized_http,
        )
        self._api = discovery_build(
            "lifesciences",
            "v2beta",
            cache_discovery=False,
            requestBuilder=build_request,
            http=authorized_http,
        )
        self._bucket_service = storage.Client()
Esempio n. 5
0
def run_wrapper(run, input, output, params, wildcards, threads, resources, log,
                version, benchmark, benchmark_repeats, rule, conda_env, linemaps, debug=False,
                shadow_dir=None):
    """
    Wrapper around the run method that handles exceptions and benchmarking.

    Arguments
    run        -- the run method
    input      -- list of input files
    output     -- list of output files
    wildcards  -- so far processed wildcards
    threads    -- usable threads
    log        -- list of log files
    rule (str) -- rule name
    shadow_dir -- optional shadow directory root
    """
    if os.name == "posix" and debug:
        sys.stdin = open('/dev/stdin')

    try:
        runs = 1 if benchmark is None else benchmark_repeats
        wallclock = []
        for i in range(runs):
            w = time.time()
            # execute the actual run method.
            with change_working_directory(shadow_dir):
                run(input, output, params, wildcards, threads, resources, log,
                    version, rule, conda_env)
            w = time.time() - w
            wallclock.append(w)

    except (KeyboardInterrupt, SystemExit) as e:
        # re-raise the keyboard interrupt in order to record an error in the scheduler but ignore it
        raise e
    except (Exception, BaseException) as ex:
        log_verbose_traceback(ex)
        # this ensures that exception can be re-raised in the parent thread
        lineno, file = get_exception_origin(ex, linemaps)
        raise RuleException(format_error(ex, lineno,
                                         linemaps=linemaps,
                                         snakefile=file,
                                         show_traceback=True))

    if benchmark is not None:
        try:
            with open(benchmark, "w") as f:
                print("s", "h:m:s", sep="\t", file=f)
                for t in wallclock:
                    print(t, str(datetime.timedelta(seconds=t)), sep="\t", file=f)
        except (Exception, BaseException) as ex:
            raise WorkflowError(ex)
Esempio n. 6
0
def run_wrapper(run, input, output, params, wildcards, threads, resources, log,
                version, benchmark, benchmark_repeats, linemaps, debug=False,
                shadow_dir=None):
    """
    Wrapper around the run method that handles exceptions and benchmarking.

    Arguments
    run        -- the run method
    input      -- list of input files
    output     -- list of output files
    wildcards  -- so far processed wildcards
    threads    -- usable threads
    log        -- list of log files
    shadow_dir -- optional shadow directory root
    """
    if os.name == "posix" and debug:
        sys.stdin = open('/dev/stdin')

    try:
        runs = 1 if benchmark is None else benchmark_repeats
        wallclock = []
        for i in range(runs):
            w = time.time()
            # execute the actual run method.
            with change_working_directory(shadow_dir):
                run(input, output, params, wildcards, threads, resources, log,
                    version)
            w = time.time() - w
            wallclock.append(w)

    except (KeyboardInterrupt, SystemExit) as e:
        # re-raise the keyboard interrupt in order to record an error in the scheduler but ignore it
        raise e
    except (Exception, BaseException) as ex:
        log_verbose_traceback(ex)
        # this ensures that exception can be re-raised in the parent thread
        lineno, file = get_exception_origin(ex, linemaps)
        raise RuleException(format_error(ex, lineno,
                                         linemaps=linemaps,
                                         snakefile=file,
                                         show_traceback=True))

    if benchmark is not None:
        try:
            with open(benchmark, "w") as f:
                print("s", "h:m:s", sep="\t", file=f)
                for t in wallclock:
                    print(t, str(datetime.timedelta(seconds=t)), sep="\t", file=f)
        except (Exception, BaseException) as ex:
            raise WorkflowError(ex)