def get_bloom_sequences(): """Get the filepath to the bloom sequences Returns ------- str The filepath to the bloom sequences Raises ------ IOError If the path does not exist """ repo = get_repository_dir() return get_existing_path(os.path.join(repo, 'data/AG/BLOOM.fasta'))
def get_reference_set(): """Get the reference set to use for OTU picking Returns ------- str The file path to the reference sequences. str The file path to the reference taxonomy. """ if ag.is_test_env(): repo = get_repository_dir() ref_seqs = os.path.join(repo, 'tests/data/otus.fna') ref_tax = os.path.join(repo, 'tests/data/otus.txt') return ref_seqs, ref_tax else: return qdr.get_reference_sequences(), qdr.get_reference_taxonomy()
def _stage_test_accessions(): """Stage test data Notes ----- Staging copies the test dataset into the working directory. This "tricks" the fetch_study mechanism as it'll appear that the data have already been sourced from EBI. """ repo = get_repository_dir() for acc in _TEST_ACCESSIONS: src = os.path.join(repo, 'tests/data/%s' % acc) dst = os.path.join(ag.WORKING_DIR, '01/%s' % acc) if not os.path.exists(os.path.join(ag.WORKING_DIR, '01')): os.mkdir('01') shutil.copytree(src, dst)
def _get_data(data_dir, tag): """Get a non-AG table and mapping file Parameters ---------- data_dir : str The base data path tag : str The filetag (e.g., HMPv35_100nt) Returns ------- (str, str) The filepath to the table, and the filepath to the mapping file. Notes ----- If $AG_TESTING == 'True', then the data returned will correspond to the test dataset. Raises ------ IOError If the filepaths are not accessible """ repo = get_repository_dir() data = 'tests/data' if ag.is_test_env() else 'data' base = os.path.join(repo, data) table = os.path.join(base, data_dir, '%s.biom' % tag) mapping = os.path.join(base, data_dir, '%s.txt' % tag) if not os.path.exists(table): raise IOError("Unable to access: %s" % table) if not os.path.exists(mapping): raise IOError("Unable to access: %s" % table) return table, mapping