Ejemplo n.º 1
0
def download_wgs_for_record(record, config):
    """Download all WGS records in a record."""
    if 'wgs_scafld' in record.annotations:
        # Biopython splits on '-' for us, but doesn't actually calculate the range
        # Also this is somehow a list of lists
        wgs_range = WgsRange.from_string('-'.join(
            record.annotations['wgs_scafld'][0]))
    elif 'wgs' in record.annotations:
        # Biopython splits on '-' for us, but doesn't actually calculate the range
        # Unlike WGS_SCAFLD, this is just a list
        wgs_range = WgsRange.from_string('-'.join(record.annotations['wgs']))
    else:
        return [record]

    handle = StringIO()
    id_list = wgs_range.get_ids()

    i = 0
    while i < len(id_list):
        dl_id = ",".join(id_list[i:i + STEP_SIZE])
        i += STEP_SIZE

        url = get_url_by_format(config)
        params = build_params(dl_id, config)

        r = get_stream(url, params)
        config.emit("Downloading {}\n".format(r.url))

        write_stream(r, handle, dl_id, config)

    # Rewind, so Biopython can parse this
    handle.seek(0)

    return list(SeqIO.parse(handle, config.format))
Ejemplo n.º 2
0
def fix_supercontigs(record, config):
    """Fix a record containing a CONTIG entry instead of a seq."""

    handle = StringIO()

    # Let the NCBI assemble the proper record for us by asking for the right format.
    dl_id = record.id
    url = get_url_by_format(config)
    params = build_params(dl_id, config)
    try:
        r = get_stream(url, params)
        config.emit("Downloading {}\n".format(r.url))
    except TooManyRequests as err:
        # Wait, and then retry
        config.emit(
            "Server requested us to slow down, waiting {} seconds\n".format(
                err.retry_after))
        time.sleep(int(err.retry_after))
        r = get_stream(url, params)
        config.emit("Downloading {}\n".format(r.url))

    write_stream(r, handle, dl_id, config)

    # Rewind, so Biopython can parse this
    handle.seek(0)

    return list(SeqIO.parse(handle, config.format))
Ejemplo n.º 3
0
def generate_url(dl_id, config):
    """Generate the Entrez URL to download a file using a separate tool"""
    # types: string, Config -> string

    url = get_url_by_format(config)
    params = build_params(dl_id, config)

    # remove the tool field, some other tool will do the download
    del params['tool']
    encoded_params = urlencode(params, doseq=True)
    return "?".join([url, encoded_params])
Ejemplo n.º 4
0
def download_to_file(dl_id, config, filename=None, append=False):
    """Download a single ID from NCBI and store it to a file."""
    # types: string, Config, string, bool -> None
    mode = 'a' if append else 'w'

    url = get_url_by_format(config)
    params = build_params(dl_id, config)

    r = get_stream(url, params)
    config.emit("Downloading {}\n".format(r.url))
    if config.keep_filename:
        outfile_name = filename
    else:
        outfile_name = _generate_filename(params, filename)

    with open(outfile_name, mode) as fh:
        _validate_and_write(r, fh, dl_id, config)
Ejemplo n.º 5
0
def fix_supercontigs(record, config):
    """Fix a record containing a CONTIG entry instead of a seq."""

    handle = StringIO()

    # Let the NCBI assemble the proper record for us by asking for the right format.
    dl_id = record.id
    url = get_url_by_format(config)
    params = build_params(dl_id, config)
    r = get_stream(url, params)
    config.emit("Downloading {}\n".format(r.url))

    write_stream(r, handle, dl_id, config)

    # Rewind, so Biopython can parse this
    handle.seek(0)

    return list(SeqIO.parse(handle, config.format))
Ejemplo n.º 6
0
def download_to_file(dl_id, config, filename=None, append=False):
    """Download a single ID from NCBI and store it to a file."""
    # types: string, Config, string, bool -> None
    mode = 'a' if append else 'w'

    url = get_url_by_format(config)
    params = build_params(dl_id, config)

    try:
        r = get_stream(url, params)
        config.emit("Downloading {}\n".format(r.url))
    except TooManyRequests as err:
        config.emit("Server requested us to slow down, waiting {} seconds.".format(err.retry_after))
        time.sleep(int(err.retry_after))
        r = get_stream(url, params)
        config.emit("Downloading {}\n".format(r.url))

    if config.keep_filename:
        outfile_name = filename
    else:
        outfile_name = _generate_filename(params, filename)

    with open(outfile_name, mode) as fh:
        _validate_and_write(r, fh, dl_id, config)