def test_1(self): agfusion_db = agfusion.AGFusionDB("agfusion.homo_sapiens.84.db", debug=False) all_fusions = ['Adamts9-Ano2','Trp53-Sat2','1700112E06Rik-Runx1','Runx1-1700112E06Rik','Rell1-Lhfpl3','Phc1-Smarca2','Lrrc8d-Gbp11','C920009B18Rik-H60b'] for fusion in agfusion.parsers['fusioncatcher']('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt',db.logger): fusion = agfusion.Fusion( gene5prime=fusion['gene5prime'], gene5primejunction=fusion['gene5prime_junction'], gene3prime=fusion['gene3prime'], gene3primejunction=fusion['gene3prime_junction'], db=db, pyensembl_data=data, protein_databases=['pfam'], noncanonical=False ) assert fusion.name in all_fusions, '%s not in list!' % fusion.name
def main(): """ Main function for processing command line options """ parser = argparse.ArgumentParser( description='Annotate Gene Fusion (AGFusion)') subparsers = parser.add_subparsers(help='AGFusion programs.', dest="subparser_name") annotate_parser = subparsers.add_parser( 'annotate', help='Annotate and visualize a single fusion.') annotate_parser.add_argument('-g5', '--gene5prime', type=str, required=True, help='5\' gene partner') annotate_parser.add_argument('-g3', '--gene3prime', type=str, required=True, help='3\' gene partner') annotate_parser.add_argument( '-j5', '--junction5prime', type=int, required=True, help='Genomic location of predicted fuins for the 5\' gene partner. ' + 'The 1-based position that is the last nucleotide included in ' + 'the fusion before the junction.') annotate_parser.add_argument( '-j3', '--junction3prime', type=int, required=True, help='Genomic location of predicted fuins for the 3\' gene partner. ' + 'The 1-based position that is the first nucleotide included in ' + 'the fusion after the junction.') add_common_flags(annotate_parser) annotate_parser.add_argument( '--scale', type=int, required=False, default=-1, help='(Optional) Set maximum width (in amino acids) of the ' + 'figure to rescale the fusion (default: max length of ' + 'fusion product)') # batch file parser batch_parser = subparsers.add_parser( 'batch', help='Annotate fusions from an output file from a fusion ' + 'finding algorithm.') batch_parser.add_argument( '-f', '--file', type=str, required=True, help='Output file from fusion-finding algorithm.') batch_parser.add_argument( '-a', '--algorithm', type=str, required=True, help='The fusion-finding algorithm. Can be one of the following: ' + ', '.join(agfusion.parsers.keys()) + '.') add_common_flags(batch_parser) # download database database_parser = subparsers.add_parser( 'download', help='Download database for a reference genome.') database_parser.add_argument( '-d', '--dir', type=str, default='', help='(Optional) Directory to the database will be downloaded ' + 'to (defaults to current working directory).') database_parser.add_argument( '-g', '--genome', type=str, default=None, help='Specify the genome shortcut (e.g. hg19). To see all' + 'available shortcuts run \'agfusion download -a\'. Either ' + 'specify this or --species and --release.') database_parser.add_argument('-s', '--species', type=str, default=None, help='The species (e.g. homo_sapiens).') database_parser.add_argument('-r', '--release', type=int, default=None, help='The ensembl release (e.g. 87).') database_parser.add_argument( '-a', '--available', action='store_true', required=False, help='List available species and ensembl releases.') # build database parser build_database_parser = subparsers.add_parser( 'build', help='Build database for a reference genome.') build_database_parser.add_argument( '-d', '--dir', type=str, required=True, help='Directory to write database file to.') build_database_parser.add_argument('-s', '--species', type=str, required=True, help='The species (e.g. homo_sapiens).') build_database_parser.add_argument('-r', '--release', type=int, required=True, help='The ensembl release (e.g. 87).') build_database_parser.add_argument( '--pfam', type=str, required=True, help='File containing PFAM ID mappings.') build_database_parser.add_argument( '--server', type=str, required=False, default='ensembldb.ensembl.org', help='(optional) Ensembl server (default ensembldb.ensembl.org)') # agfusion version number parser.add_argument('-v', '--version', action='version', version=agfusion.__version__) args = parser.parse_args() if args.subparser_name == 'build': builddb(args) exit() elif args.subparser_name == 'download': if args.available: list_available_databases() else: downloaddb(args) exit() # single or batch mode if not exists(args.out): mkdir(args.out) # if user does not specify a sqlite database then use the one provided # by the package db_file = split(args.database)[1] species = db_file.split('.')[1] release = db_file.split('.')[2] assert species in AVAILABLE_ENSEMBL_SPECIES, 'unsupported species!' agfusion_db = agfusion.AGFusionDB(args.database, debug=args.debug) agfusion_db.build = species + '_' + str(release) # get the pyensembl data pyensembl_data = pyensembl.EnsemblRelease(release, species) try: pyensembl_data.db except ValueError: agfusion_db.logger.error( "Missing pyensembl data. Run pyensembl install --release " + "{} --species {}".format(release, species)) exit() # parse the re-coloring and re-naming colors = {} rename = {} if args.rename is not None: for i in args.rename: pair = i.split(';') assert len(pair) == 2, " did not properly specify --rename" if pair[0] in rename: agfusion_db.logger.warn( "WARNING - you rename {} twice.".format(pair[0])) rename[pair[0]] = pair[1] if args.recolor is not None: for i in args.recolor: pair = i.split(';') assert len(pair) == 2, " did not properly specify --colors" if pair[0] in colors: agfusion_db.logger.warn( "You specified colors for {} twice.".format(pair[0])) if pair[0] in rename: colors[rename[pair[0]]] = pair[1] else: colors[pair[0]] = pair[1] # check image file type is valid if args.type not in ['png', 'pdf', 'jpeg']: agfusion_db.logger.error( "ERROR - provided an incorrect image file type: {}.".format( args.type)) exit() if args.subparser_name == 'annotate': annotate(gene5prime=args.gene5prime, junction5prime=args.junction5prime, gene3prime=args.gene3prime, junction3prime=args.junction3prime, agfusion_db=agfusion_db, pyensembl_data=pyensembl_data, args=args, outdir=args.out, colors=colors, rename=rename, scale=args.scale) elif args.subparser_name == 'batch': batch_mode(args, agfusion_db, pyensembl_data, rename, colors)
from os.path import join, expanduser, curdir, abspath import unittest import agfusion from agfusion import utils import pyensembl from Bio import SeqIO data = pyensembl.EnsemblRelease(84, 'mouse') db = agfusion.AGFusionDB(abspath(join(curdir, 'agfusion.mus_musculus.84.db'))) db.build = 'mus_musculus_84' class TestSequencePrediction(unittest.TestCase): def test_1(self): """ test CDS and cDNA correct for junction that is on exon boundaries and produces an in-frame protein. """ #test the dna and protein coding sequences are correct by comparing #with manually generally sequences fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770", gene5primejunction=31684294, gene3prime="ENSMUSG00000002413", gene3primejunction=39648486, db=db, pyensembl_data=data, protein_databases=['pfam', 'tmhmm'], noncanonical=True)
from os.path import join, expanduser, curdir, abspath import unittest import agfusion from agfusion import utils import pyensembl from Bio import SeqIO, Seq, Alphabet data = pyensembl.EnsemblRelease(84,'mouse') db = agfusion.AGFusionDB(abspath(join(curdir,'agfusion.mus_musculus.84.db'))) db.build = 'mus_musculus_84' data_human = pyensembl.EnsemblRelease(75,'human') db_human = agfusion.AGFusionDB(abspath(join(curdir,'agfusion.homo_sapiens.75.db'))) db_human.build = 'homo_sapiens_75' class TestSequencePrediction_human(unittest.TestCase): def test_1(self): """ test CDS and prortein correct for junction that is on exon boundaries and produces an out-of-frame protein. """ #test the dna and protein coding sequences are correct by comparing #with manually generally sequences fusion = agfusion.Fusion( gene5prime="TMEM87B", gene5primejunction=112843681, gene3prime="MERTK", gene3primejunction=112722768,
from flask_errormail import mail_on_500 parser = argparse.ArgumentParser(description='AGFusion web application') parser.add_argument('--database', type=str, required=True, help='Path to the database file (e.g. agfusion.db)') args = parser.parse_args() ADMINISTRATORS = ('*****@*****.**') app = Flask(__name__) app.config.from_object(__name__) mail_on_500(app, ADMINISTRATORS) db = agfusion.AGFusionDB(args.database) app.config.update( dict(DATABASE=None, SECRET_KEY='development key', USERNAME='******', PASSWORD='******')) app.config.from_envvar('FLASKR_SETTINGS', silent=True) def check_params(params): #check parameters if params['dpi'] == '': params['dpi'] = '100'