def test_not_valid_file(): """Test to check for error if file is either not present or not a valid sqlite file""" path = "SRAmetadb.sqlite" try: db = SRAdb(path) except SystemExit: assert os.path.isfile(path) == False except OperationalError: assert True
def sradb_connection(conf_download_sradb_file): db_file = conf_download_sradb_file db = SRAdb(db_file) return db
"-o", "--output", type=argparse.FileType('w'), required=True, help= 'The .tsv file path that will stored the metadata for the given SRA Project ID.' ) args = parser.parse_args() # # Get the metadata # if args.sra_db is not None: db = SRAdb(args.sra_db.name) print(f"Using local SRA SQLite database to query...") else: print(f"Using NCBi's esearch and esummary interface to query...") db = SRAweb() metadata = db.sra_metadata(args.sra_project_id, detailed=True, expand_sample_attributes=True, sample_attribute=True) # Drop any None columns # pysradb does not lock the versions # pandas 0.25.3 generates an additional None column compared to pandas 0.25.0 # Bug in 0.25.3 ? metadata = metadata[metadata.columns.dropna()]
import csv import pandas from Bio import SeqIO from pysradb import SRAdb, download_sradb_file db = SRAdb('SRAmetadb.sqlite') def get_experiment_title(query_accession, db): """Retrieve experiment title (also title on biosample page) for the given query accession number (ID beginning with SRR, DRR, or ERR.) The 'db' input must be an SRAdb object (from the pysradb library) connected to a copy of the SRAmetadb.sqlite database.""" run_accession = query_accession.split(".")[0] df = db.query( 'select experiment_title from sra where run_accession="{run_accession_id}";' .format(run_accession_id=run_accession)) return df def get_biosample_attribute(query_accession, db): """Retrieve biosample record attributes for the given query accession number (ID beginning with SRR, DRR, or ERR.) The 'db' input must be an SRAdb object (from the pysradb library) connected to a copy of the SRAmetadb.sqlite database.""" run_accession = query_accession.split(".")[0] df = db.query( 'select sample.sample_attribute from sample INNER JOIN sra ON sra.sample_accession=sample.sample_accession WHERE sra.run_accession="{run_accession_id}";' .format(run_accession_id=run_accession)) return df def add_experiment_title(df, db): """The 'db' input must be an SRAdb object (from the pysradb library) connected to a copy of the SRAmetadb.sqlite database.""" df['experiment_title'] = df.apply(lambda x: get_experiment_title(