예제 #1
0
def test_fetch_subdirs():
    path = fetch_file(URL, decompress=True, subdir="datacache")
    assert path.endswith(FASTA_FILENAME)

    # if we change the subdir then data should end up in
    # something like /Users/me/Library/Caches/epitopes_test/
    other_path = fetch_file(URL, decompress=True, subdir="datacache_test")
    assert other_path.endswith(FASTA_FILENAME)
    assert other_path != path, other_path
예제 #2
0
def test_fetch_subdirs():
    path = fetch_file(URL, decompress=True, subdir="datacache")
    assert path.endswith(FASTA_FILENAME)

    # if we change the subdir then data should end up in
    # something like /Users/me/Library/Caches/epitopes_test/
    other_path = fetch_file(URL, decompress=True, subdir="datacache_test")
    assert other_path.endswith(FASTA_FILENAME)
    assert other_path != path, other_path
예제 #3
0
def test_fetch_decompress():
    path1 = fetch_file(
        URL,
        decompress=True, subdir="datacache")
    assert path1.endswith(FASTA_FILENAME)

    with open(path1, 'r') as f1:
        s1 = f1.read()
        assert "TCAATTTCGTGCCAG" in s1
예제 #4
0
def test_fetch_decompress():
    for use_wget_if_available in [True, False]:
        for timeout in [None, 10**6]:
            path1 = fetch_file(URL,
                               decompress=True,
                               subdir="datacache",
                               use_wget_if_available=use_wget_if_available,
                               timeout=timeout)
        assert path1.endswith(FASTA_FILENAME)
        with open(path1, 'r') as f1:
            s1 = f1.read()
            assert "TCAATTTCGTGCCAG" in s1
예제 #5
0
def test_fetch_decompress():
    for use_wget_if_available in [True, False]:
        for timeout in [None, 10**6]:
            path1 = fetch_file(
                URL,
                decompress=True,
                subdir="datacache",
                use_wget_if_available=use_wget_if_available,
                timeout=timeout)
        assert path1.endswith(FASTA_FILENAME)
        with open(path1, 'r') as f1:
            s1 = f1.read()
            assert "TCAATTTCGTGCCAG" in s1
예제 #6
0
def transcript_id_to_transcript_name(transcript_id, _table_cache = [None]):
	if _table_cache[0] is None:
		print ("Fetching Ensembl ID mappings from BioMart %s"
			) % _BIOMART_URL_TRANSCRIPT_ID_TO_TRANSCRIPT_NAME
		biomart_filename = \
			datacache.fetch_file(_BIOMART_URL_TRANSCRIPT_ID_TO_TRANSCRIPT_NAME,
				"biomart_transcript_name.tsv")
		df = pd.read_csv(biomart_filename, sep='\t')
		transcript_ids = df['Ensembl Transcript ID']
		transcript_names = df['Associated Transcript Name']
		mapping = dict(zip(transcript_ids, transcript_names))
		_table_cache[0] = mapping
	mapping = _table_cache[0]
	return mapping[transcript_id]
예제 #7
0
def transcript_id_to_gene_id(transcript_id, _table_cache = [None]):
	if _table_cache[0] is None:
		print ("Fetching Ensembl ID mappings from BioMart %s"
			) % _BIOMART_URL_TRANSCRIPT_ID_TO_GENE_ID
		biomart_filename = \
			datacache.fetch_file(_BIOMART_URL_TRANSCRIPT_ID_TO_GENE_ID,
				"biomart_transcript_gene.tsv")
		df = pd.read_csv(biomart_filename, sep='\t')
		gene_ids = df['Ensembl Gene ID']
		transcript_ids = df['Ensembl Transcript ID']
		mapping = dict(zip(transcript_ids, gene_ids))
		_table_cache[0] = mapping
	mapping = _table_cache[0]
	return mapping[transcript_id]
예제 #8
0
def make_blastdb(url, name=None, filename=None, overwrite=False):
    """Download protein sequences and a make blast db. Uses datacache module."""

    import datacache
    cachedir = datacache.get_data_dir()
    blastdb = os.path.join(cachedir, name)
    if os.path.exists(blastdb+'.phr') and overwrite==False:
        #print ('blast files found')
        return blastdb

    filename = datacache.fetch_file(url, filename=filename, decompress=True, subdir=None)
    #print filename
    cmd = 'makeblastdb -dbtype prot -in %s -out %s' %(filename,blastdb)
    #print cmd
    tmp=subprocess.check_output(cmd, shell=True)
    return blastdb
예제 #9
0
def fetch_fasta_dict(path_or_url):
    path = fetch_file(path_or_url)
    d = {}
    value_buffer = []
    key = None
    if path.endswith(".gz") or path.endswith(".gzip"):
        f = gzip.open(path, "r")
    else:
        f = open(path, "r")
    for line in f.readlines():
        if type(line) is bytes:
            line = line.decode("ascii")
        if line.startswith(">"):
            if key is not None:
                d[key] = "".join(value_buffer)
                value_buffer = []
            key = line.split()[0][1:]
        else:
            value_buffer.append(line.strip())
    if key and value_buffer:
        d[key] = "".join(value_buffer)
    f.close()
    return d
예제 #10
0
def fetch_fasta_dict(path_or_url):
    path = fetch_file(path_or_url)
    d = {}
    value_buffer = []
    key = None
    if path.endswith(".gz") or path.endswith(".gzip"):
        f = gzip.open(path, "r")
    else:
        f = open(path, "r")
    for line in f.readlines():
        if type(line) is bytes:
            line = line.decode("ascii")
        if line.startswith(">"):
            if key is not None:
                d[key] = "".join(value_buffer)
                value_buffer = []
            key = line.split()[0][1:]
        else:
            value_buffer.append(line.strip())
    if key and value_buffer:
        d[key] = "".join(value_buffer)
    f.close()
    return d
예제 #11
0
def fetch_file(url, decompress=True):
    return datacache.fetch_file(url, decompress=decompress, subdir="immuno")
예제 #12
0
def fetch_file(url, decompress = True):
    return datacache.fetch_file(url, decompress = decompress, subdir = "immuno")