Beispiel #1
0
def create_project_batch(  # pylint: disable=too-many-arguments
    project_id,
    batch_type,
    batch_name,
    sample_ids,
    host,
    email,
    password,
    api_key,
):
    """Create a batch in a project."""
    if sample_ids:
        sample_ids = [s_id.strip() for s_id in sample_ids.split(",")]
        echo_debug("Sample ids translation: {}".format(sample_ids))
    else:
        sample_ids = []

    CreateBatch(
        project_id,
        batch_type,
        batch_name,
        sample_ids,
        Credentials(email=email, password=password, api_key=api_key),
        Optionals(host=host),
    ).run()
Beispiel #2
0
def download_file(destination, download_url, no_progress=False):
    """Download a file to file system.

    Args:
        destination (File): file object
        download_url (str): url of the file to download
        no_progress (bool): don't show progress bar

    Returns:
        str : file path
            location of the downloaded file
    """
    stream_params = dict(stream=True,
                         allow_redirects=False,
                         headers=dict(),
                         timeout=30)

    with requests.get(download_url, **stream_params) as req:
        req.raise_for_status()
        echo_debug("Starting download")

        if not no_progress:
            pbar = get_progress_bar(int(req.headers["content-length"]),
                                    "Downloading: ")
            pbar.start()
        for chunk in req.iter_content(chunk_size=CHUNK_SIZE):
            destination.write(chunk)
            if not no_progress:
                pbar.update(pbar.value + len(chunk))
        if not no_progress:
            pbar.finish()

        echo_debug("Finished downloading")
Beispiel #3
0
def validate_credentials(credentials):
    """Validate user credentials."""
    if credentials.email and credentials.password and credentials.api_key:
        echo_debug("User provided 2 sets of credentials.")
        echo_warning("Multiple sets of credentials provided."
                     "Please provide either username/password or API key.")
        return False

    return True
Beispiel #4
0
def seek_files_to_upload(path, path_root=""):
    """Generate a list of valid fastq files."""
    for root, dirs, files in os.walk(path):
        files.sort()
        for file in files:
            file_path = os.path.join(path_root, root, file)

            if file_path.lower().endswith(FASTQ_EXTENSIONS):
                echo_debug("Found file to upload: {}".format(file_path))
                yield file_path

        dirs.sort()
        for folder in dirs:
            seek_files_to_upload(folder, root)
Beispiel #5
0
def build_file_path(deliverable, file_with_prefix, download_to, filename=None):
    """Create and return file system path where the file will be downloaded to.

    Args:
        deliverable (dict): used to get download url and file type
        file_with_prefix (str): used as a template for download path
        download_to (str): general location where to download the file to

    Returns:
         str : file path on current file system
    """
    prefix = _get_prefix_parts(file_with_prefix)
    download_url, file_type = "", ""
    if isinstance(deliverable, SampleFile):
        download_url, file_type = (
            deliverable.download_url,
            deliverable.file_type,
        )
    else:
        download_url, file_type = (
            deliverable.get("download_url"),
            deliverable.get("file_type"),
        )
    source_filename = (filename if filename else
                       get_filename_from_download_url(download_url))

    destination_filename = prefix.filename
    if prefix.file_extension:
        destination_filename = "{}.{}".format(prefix.filename,
                                              prefix.file_extension)

    # turning off formatting for improved code readability
    # fmt: off
    destination_filename = destination_filename.format(
        **{
            DownloadTemplateParts.FILE_TYPE.value:
            FILE_TYPES_MAPPER.get(file_type) or file_type,  # noqa: E501  # pylint: disable=line-too-long
            DownloadTemplateParts.FILE_EXTENSION.value:
            deliverable_type_from_filename(source_filename),  # noqa: E501  # pylint: disable=line-too-long
            DownloadTemplateParts.DEFAULT_FILENAME.value:
            source_filename,
        })
    # fmt: on

    echo_debug(
        "Calculated destination filename: {}".format(destination_filename))
    return _create_filepath(download_to, prefix.dirs, destination_filename)
Beispiel #6
0
def delete_project_samples(  # pylint: disable=too-many-arguments
    project_id,
    sample_ids,
    host,
    email,
    password,
    api_key,
):
    """Delete samples in a project."""
    sample_ids = ([s_id.strip()
                   for s_id in sample_ids.split(",")] if sample_ids else [])
    echo_debug("Sample ids translation: {}".format(sample_ids))

    DeleteSamples(
        project_id,
        sample_ids,
        Credentials(email=email, password=password, api_key=api_key),
        Optionals(host=host),
    ).run()
Beispiel #7
0
def get_filename_from_download_url(url):
    """Deduce filename from url.

    Args:
        url (str): URL string

    Returns:
        str: filename
    """
    try:
        filename = re.findall(
            FILENAME_RE,
            parse_qs(urlparse(url).query)["response-content-disposition"][0],
        )[0]
    except (KeyError, IndexError):
        echo_debug("URL didn't contain filename query argument. "
                   "Assume filename from url")
        filename = urlparse(url).path.split("/")[-1]

    return filename
Beispiel #8
0
def login(api_client, credentials):
    """Login user into Gencove's system."""
    if credentials.api_key:
        echo_debug("User authorized via api key")
        api_client.set_api_key(credentials.api_key)
        return True

    if not credentials.email or not credentials.password:
        echo_info("Login required")
        if not credentials.email:
            credentials.email = click.prompt("Email", type=str, err=True)
        if not credentials.password:
            credentials.password = click.prompt("Password",
                                                type=str,
                                                hide_input=True,
                                                err=True)
    try:
        api_client.login(credentials.email, credentials.password,
                         credentials.otp_token)
        echo_debug("User logged in successfully")
        return True
    except APIClientError as err:
        if "otp_token" in err.message:
            echo_info("One time password required")
            credentials.otp_token = click.prompt("One time password",
                                                 type=str,
                                                 err=True)
            return login(api_client, credentials)
        echo_debug("Failed to login: {}".format(err))
        echo_error(
            "Failed to login. Please verify your credentials and try again")
        return False
Beispiel #9
0
def basespace_import(  # pylint: disable=too-many-arguments
    basespace_project_ids,
    project_id,
    metadata_json,
    host,
    email,
    password,
    api_key,
):  # pylint: disable=line-too-long
    """Import all Biosamples from BaseSpace projects to a project. Optionally add
    metadata to the samples.

    Examples:

        Import Biosamples to a project:

            gencove basespace projects import 12345678 06a5d04b-526a-4471-83ba-fb54e0941758

        Import Biosamples from multiple BaseSpace projects to a project:

            gencove basespace projects import 12345678,87654321 06a5d04b-526a-4471-83ba-fb54e0941758

        Import Biosamples to a project with metadata:

            gencove basespace projects import 12345678 06a5d04b-526a-4471-83ba-fb54e0941758 --metadata-json='{"batch": "batch1"}'
    """  # noqa: E501
    basespace_project_ids = [
        basespace_project_id.strip()
        for basespace_project_id in basespace_project_ids.split(",")
    ]
    echo_debug(
        "BaseSpace project ids translation: {}".format(basespace_project_ids))

    BaseSpaceImport(
        basespace_project_ids,
        project_id,
        Credentials(email=email, password=password, api_key=api_key),
        BaseSpaceImportOptionals(host=host, metadata_json=metadata_json),
    ).run()
Beispiel #10
0
def _create_filepath(download_to, prefix_dirs, filename):
    """Build full file path and ensure that directory structure exists.

    Args:
        download_to (str): top level directory path
        prefix_dirs (str): subdirectories structure to create under
            download_to.
        filename (str): name of the file inside download_to/file_prefix
            structure.
    """
    echo_debug("_create_filepath Downloading to: {}".format(download_to))
    echo_debug("_create_filepath file prefix is: {}".format(prefix_dirs))

    path = os.path.join(download_to, prefix_dirs)
    # Cross-platform cross-python-version directory creation
    if not os.path.exists(path):
        echo_debug("creating path: {}".format(path))
        os.makedirs(path)

    file_path = os.path.join(path, filename)
    echo_debug("Deduced full file path is {}".format(file_path))
    return file_path
Beispiel #11
0
 def echo_debug(msg, **kwargs):
     """Output debug message."""
     echo_debug(msg, **kwargs)
Beispiel #12
0
def deliverable_type_from_filename(filename):
    """Deduce deliverable type based on dot notation."""
    filetype = ".".join(filename.split(".")[1:])
    echo_debug("Deduced filetype to be: {} "
               "from filename: {}".format(filetype, filename))
    return filetype
Beispiel #13
0
def download_file(file_path,
                  download_url,
                  skip_existing=True,
                  no_progress=False):
    """Download a file to file system.

    Args:
        file_path (str): full file path, according to destination
            and download template
        download_url (str): url of the file to download
        skip_existing (bool): skip already downloaded files
        no_progress (bool): don't show progress bar

    Returns:
        str : file path
            location of the downloaded file
    """

    file_path_tmp = "{}.tmp".format(file_path)
    if os.path.exists(file_path_tmp):
        file_mode = "ab"
        headers = dict(
            Range="bytes={}-".format(os.path.getsize(file_path_tmp)))
        echo_info("Resuming previous download: {}".format(file_path))
    else:
        file_mode = "wb"
        headers = dict()
        echo_info("Downloading file to {}".format(file_path))

    stream_params = dict(stream=True,
                         allow_redirects=False,
                         headers=headers,
                         timeout=30)

    with requests.get(download_url, **stream_params) as req:
        req.raise_for_status()
        total = int(req.headers["content-length"])
        # pylint: disable=E0012,C0330
        if (skip_existing and os.path.isfile(file_path)
                and os.path.getsize(file_path) == total):
            echo_info("Skipping existing file: {}".format(file_path))
            return file_path

        echo_debug("Starting to download file to: {}".format(file_path))

        with open(file_path_tmp, file_mode) as downloaded_file:
            if not no_progress:
                pbar = get_progress_bar(int(req.headers["content-length"]),
                                        "Downloading: ")
                pbar.start()
            for chunk in req.iter_content(chunk_size=CHUNK_SIZE):
                downloaded_file.write(chunk)
                if not no_progress:
                    pbar.update(pbar.value + len(chunk))
            if not no_progress:
                pbar.finish()

        # Cross-platform cross-python-version file overwriting
        if os.path.exists(file_path):
            echo_debug("Found old file under same name: {}. "
                       "Removing it.".format(file_path))
            os.remove(file_path)
        os.rename(file_path_tmp, file_path)
        echo_info("Finished downloading a file: {}".format(file_path))
        return file_path
Beispiel #14
0
 def _refresh_authentication(self):
     echo_debug("Refreshing authentication")
     jwt = self.refresh_token(self._jwt_refresh_token)
     self._set_jwt(jwt.access)
Beispiel #15
0
    def _request(
        self,
        endpoint="",
        params=None,
        method="get",
        custom_headers=None,
        timeout=60,
        sensitive=False,
    ):
        url = urljoin(text(self.host), text(endpoint))
        headers = {
            "content-type": "application/json",
            "date": None,
            "Gencove-cli-version": cli_version(),
        }
        if custom_headers:
            headers.update(custom_headers)

        if not params:
            params = {}

        echo_debug(
            "Contacting url: {} with payload: {}".format(
                url, "[SENSITIVE CONTENT]" if sensitive else params
            )
        )
        start = time.time()

        try:
            if method == "get":
                response = get(
                    url=url, params=params, headers=headers, timeout=timeout
                )
            elif method == "delete":
                post_payload = APIClient._serialize_post_payload(params)
                response = delete(
                    url=url,
                    data=post_payload,
                    headers=headers,
                    timeout=timeout,
                )
            else:
                post_payload = APIClient._serialize_post_payload(params)
                response = post(
                    url=url,
                    data=post_payload,
                    headers=headers,
                    timeout=timeout,
                )

            if response.status_code == 429:
                raise APIClientTooManyRequestsError("Too Many Requests")
        except (ConnectTimeout, ConnectionError):
            # If request timed out,
            # let upper level handle it the way it sees fit.
            # one place might want to retry another might not.
            raise APIClientTimeout(  # pylint: disable=W0707
                "Could not connect to the api server"
            )
        except ReadTimeout:
            raise APIClientTimeout(  # pylint: disable=W0707
                "API server did not respond in timely manner"
            )

        echo_debug(
            "API response is {} status is {} in {}ms".format(
                "[SENSITIVE CONTENT]" if sensitive else response.content,
                response.status_code,
                (time.time() - start) * 1000,
            )
        )

        # pylint: disable=no-member
        if response.status_code >= 200 and response.status_code < 300:
            return response.json() if response.text else {}

        http_error_msg = ""
        if 400 <= response.status_code < 500:
            http_error_msg = "API Client Error: {}".format(response.reason)
            if response.text:
                response_json = response.json()
                if "detail" in response_json:
                    http_error_msg += ": {}".format(response_json["detail"])
                else:
                    try:
                        error_msg = "\n".join(
                            [
                                # create-batch can return error details that
                                # is a dict, not a list
                                "  {}: {}".format(
                                    key,
                                    value[0]
                                    if isinstance(value, list)
                                    else str(value),
                                )
                                for key, value in response_json.items()
                            ]
                        )
                    except AttributeError:
                        error_msg = "\n".join(response_json)
                    http_error_msg += ":\n{}".format(error_msg)

        elif 500 <= response.status_code < 600:
            http_error_msg = "Server Error: {}".format(response.reason)

        raise APIClientError(http_error_msg, response.status_code)
Beispiel #16
0
def download(  # pylint: disable=E0012,C0330,R0913
    destination,
    project_id,
    sample_ids,
    file_types,
    skip_existing,
    download_urls,
    download_template,
    host,
    email,
    password,
    api_key,
    no_progress,
):  # noqa: D413,D301,D412 # pylint: disable=C0301
    """Download deliverables of a project.

    Must specify either project id or sample ids.

    Examples:

        Download all samples results:

            gencove download ./results --project-id d9eaa54b-aaac-4b85-92b0-0b564be6d7db

        Download some samples:

            gencove download ./results --sample-ids 59f5c1fd-cce0-4c4c-90e2-0b6c6c525d71,7edee497-12b5-4a1d-951f-34dc8dce1c1d

        Download specific deliverables:

            gencove download ./results --project-id d9eaa54b-aaac-4b85-92b0-0b564be6d7db --file-types alignment-bam,impute-vcf,fastq-r1,fastq-r2

        Skip download entirely and print out the deliverables as a JSON:

            gencove download - --project-id d9eaa54b-aaac-4b85-92b0-0b564be6d7db --download-urls

    \f

    Args:
        destination (str): path/to/save/deliverables/to.
        project_id (str): project id in Gencove's system.
        sample_ids (list(str), optional): specific samples for which
            to download the results. if not specified, download deliverables
            for all samples.
        file_types (list(str), optional): specific deliverables to download
            results for. if not specified, all file types will be downloaded.
        skip_existing (bool, optional, default True): skip downloading existing
            files.
        download_urls (bool, optional): output the files available for a
            download. if the destination parameter is "-", it goes to the
            stdout.
        no_progress (bool, optional, default False): do not show progress
            bar.
    """  # noqa: E501
    s_ids = tuple()
    if sample_ids:
        s_ids = tuple(s_id.strip() for s_id in sample_ids.split(","))
        echo_debug("Sample ids translation: {}".format(s_ids))

    f_types = tuple()
    if file_types:
        f_types = tuple(f_type.strip() for f_type in file_types.split(","))
        echo_debug("File types translation: {}".format(f_types))

    Download(
        destination,
        DownloadFilters(project_id=project_id,
                        sample_ids=s_ids,
                        file_types=f_types),
        Credentials(email=email, password=password, api_key=api_key),
        DownloadOptions(
            host=host,
            skip_existing=skip_existing,
            download_template=download_template,
        ),
        download_urls,
        no_progress,
    ).run()