예제 #1
0
def test_not_valid_file():
    """Test to check for error if file is either not
        present or not a valid sqlite file"""
    path = "SRAmetadb.sqlite"
    try:
        db = SRAdb(path)
    except SystemExit:
        assert os.path.isfile(path) == False
    except OperationalError:
        assert True
예제 #2
0
def sradb_connection(conf_download_sradb_file):
    db_file = conf_download_sradb_file
    db = SRAdb(db_file)
    return db
예제 #3
0
    "-o",
    "--output",
    type=argparse.FileType('w'),
    required=True,
    help=
    'The .tsv file path that will stored the metadata for the given SRA Project ID.'
)

args = parser.parse_args()

#
# Get the metadata
#

if args.sra_db is not None:
    db = SRAdb(args.sra_db.name)
    print(f"Using local SRA SQLite database to query...")
else:
    print(f"Using NCBi's esearch and esummary interface to query...")
    db = SRAweb()

metadata = db.sra_metadata(args.sra_project_id,
                           detailed=True,
                           expand_sample_attributes=True,
                           sample_attribute=True)
# Drop any None columns
# pysradb does not lock the versions
# pandas 0.25.3 generates an additional None column compared to pandas 0.25.0
# Bug in 0.25.3 ?
metadata = metadata[metadata.columns.dropna()]
예제 #4
0
import csv
import pandas
from Bio import SeqIO
from pysradb import SRAdb, download_sradb_file

db = SRAdb('SRAmetadb.sqlite')


def get_experiment_title(query_accession, db):
    """Retrieve experiment title (also title on biosample page) for the given query accession number (ID beginning with SRR, DRR, or ERR.)
    The 'db' input must be an SRAdb object (from the pysradb library) connected to a copy of the SRAmetadb.sqlite database."""
    run_accession = query_accession.split(".")[0]
    df = db.query(
        'select experiment_title from sra where run_accession="{run_accession_id}";'
        .format(run_accession_id=run_accession))
    return df


def get_biosample_attribute(query_accession, db):
    """Retrieve biosample record attributes for the given query accession number (ID beginning with SRR, DRR, or ERR.)
    The 'db' input must be an SRAdb object (from the pysradb library) connected to a copy of the SRAmetadb.sqlite database."""
    run_accession = query_accession.split(".")[0]
    df = db.query(
        'select sample.sample_attribute from sample INNER JOIN sra ON sra.sample_accession=sample.sample_accession WHERE sra.run_accession="{run_accession_id}";'
        .format(run_accession_id=run_accession))
    return df


def add_experiment_title(df, db):
    """The 'db' input must be an SRAdb object (from the pysradb library) connected to a copy of the SRAmetadb.sqlite database."""
    df['experiment_title'] = df.apply(lambda x: get_experiment_title(