def test_3(self): """ """ #test the dna and protein coding sequences are correct by comparing #with manually generally sequences fusion = agfusion.Fusion( gene5prime="TMEM87B", gene5primejunction=112843681, gene3prime="MERTK", gene3primejunction=112722771, db=db_human, pyensembl_data=data_human, protein_databases=['pfam', 'tmhmm'], noncanonical=False ) fusion.save_transcript_cdna('TMEM87B-MERTK-case3') fusion.save_transcript_cds('TMEM87B-MERTK-case3') fusion.save_proteins('TMEM87B-MERTK-case3') #fusion.save_images('DLG1-BRAF_mouse') test_cds = open('./data/test-human-case-3.txt','r').read() test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate() test_protein = test_protein[0:test_protein.find('*')] trans=fusion.transcripts['ENST00000283206-ENST00000295408'] assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 3)" assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 3)"
def test_mouse_4(self): """ Test cDNA correctly produced for junctions being in UTRs """ fusion = agfusion.Fusion( gene5prime="ENSMUSG00000022770", gene5primejunction=31664851, gene3prime="ENSMUSG00000022770", gene3primejunction=31873343, db=db, pyensembl_data=data, protein_databases=['pfam', 'tmhmm'], noncanonical=True ) cdna = 'GGGGGTGCGGCCGCCGAAGGGGGAGCTCCTCCCCCGTCCCCTCACCCCCTCAGCTGAGCT' + \ 'CGGGGCGGGGCGGGGTACGTGGAGCGGGGCCGGGCGGGGAAGCTGCTCCGAGTCCGGCCG' + \ 'GAGCGCACCCGGGGCGCCCGCGTACGCCGCTCGCGGGAACTTTGCGGCGGAGCCGCAGGT' + \ 'GTGGAGGCCGCGGAGGGGGGTGCATGAGCGGCGCGGAGAGCGGCGGCTGTCCGGTCCGGC' + \ 'CCCTGCTGGAGTCGCCGCCGGGAGGAGACGAACGAGGAACCAG' + \ 'GTGTGTGCCGCCTTCCTGATTCTGGAGAAAA' + \ 'AAAA' assert str(fusion.transcripts['ENSMUST00000064477-ENSMUST00000064477'].cdna.seq)==cdna, "Test 4: cDNA wrong"
def annotate(gene5prime, junction5prime, gene3prime, junction3prime, agfusion_db, pyensembl_data, args, outdir=None, colors=None, rename=None, scale=None, batch_out_dir=None): """ Annotate the gene fusion """ try: fusion = agfusion.Fusion(gene5prime=gene5prime, gene5primejunction=junction5prime, gene3prime=gene3prime, gene3primejunction=junction3prime, db=agfusion_db, pyensembl_data=pyensembl_data, protein_databases=args.protein_databases, noncanonical=args.noncanonical) except IndexError: #import IPython ; IPython.embed() ; raise print('Problem for {} and {} fusion!'.format(gene5prime, gene3prime)) return if batch_out_dir is not None: outdir = join( batch_out_dir, fusion.gene5prime.gene.name + '-' + str(junction5prime) + '_' + fusion.gene3prime.gene.name + '-' + str(junction3prime)) fusion.save_transcript_cdna(out_dir=outdir, middlestar=args.middlestar) fusion.save_transcript_cds(out_dir=outdir, middlestar=args.middlestar) fusion.save_proteins(out_dir=outdir, middlestar=args.middlestar) fusion.save_images(out_dir=outdir, file_type=args.type, scale=scale, colors=colors, rename=rename, fontsize=args.fontsize, height=args.height, width=args.width, dpi=args.dpi, no_domain_labels=args.no_domain_labels, plot_WT=args.WT, exclude=args.exclude_domain) fusion.save_tables(out_dir=outdir)
def test_mouse_3(self): fusion = agfusion.Fusion( gene5prime="ENSMUSG00000022770", gene5primejunction=31664850, gene3prime="ENSMUSG00000002413", gene3primejunction=39610381, db=db, pyensembl_data=data, protein_databases=['pfam', 'tmhmm'], noncanonical=True ) assert fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].effect_5prime=='5UTR',"Test 7: Not found in 5'UTR" assert fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].effect_3prime=='3UTR (start)', "Test 7: Not found in at 3'UTR beginning"
def test_mouse_7(self): fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770", gene5primejunction=31664820, gene3prime="ENSMUSG00000002413", gene3primejunction=39610405, db=db, pyensembl_data=data, protein_databases=['pfam', 'tmhmm'], noncanonical=True) t = fusion.transcripts['ENSMUST00000023454-ENSMUST00000002487'] assert t.effect_5prime == "intron (before cds)", "Test 12: incorrect 5' effect: %s" % t.effect_5prime assert t.effect_3prime == "intron (cds)", "Test 12: incorrect 3' effect: %s" % t.effect_3prime
def test_1(self): """ test CDS and cDNA correct for junction that is on exon boundaries and produces an in-frame protein. """ #test the dna and protein coding sequences are correct by comparing #with manually generally sequences fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770", gene5primejunction=31684294, gene3prime="ENSMUSG00000002413", gene3primejunction=39648486, db=db, pyensembl_data=data, protein_databases=['pfam', 'tmhmm'], noncanonical=True) fusion.save_transcript_cdna('DLG1-BRAF_mouse') fusion.save_transcript_cds('DLG1-BRAF_mouse') fusion.save_proteins('DLG1-BRAF_mouse') #fusion.save_images('DLG1-BRAF_mouse') test_cdna = SeqIO.parse(open('./data/Dlg1-Braf_cdna_manual.fa', 'r'), 'fasta') test_cds = SeqIO.parse(open('./data/Dlg1-Braf_cds_manual.fa', 'r'), 'fasta') expected_transcript_combinations = [ 'ENSMUST00000100001-ENSMUST00000002487', 'ENSMUST00000064477-ENSMUST00000002487', 'ENSMUST00000115205-ENSMUST00000002487', 'ENSMUST00000023454-ENSMUST00000002487', 'ENSMUST00000115201-ENSMUST00000002487', 'ENSMUST00000132176-ENSMUST00000002487' ] assert len( set(fusion.transcripts.keys()).intersection( set(expected_transcript_combinations)) ) == 6, "Test 1: unexpected number protein coding transcripts." for seq in test_cdna: trans = fusion.transcripts[str(seq.id)] assert seq.seq == trans.cdna.seq, "cDNA is wrongly predicted: %s" % seq.id for seq in test_cds: trans = fusion.transcripts[str(seq.id)] assert seq.seq == trans.cds.seq, "cds is wrongly predicted: %s" % seq.id
def test_1(self): all_fusions = ['Adamts9-Ano2','Trp53-Sat2','1700112E06Rik-Runx1','Runx1-1700112E06Rik','Rell1-Lhfpl3','Phc1-Smarca2','Lrrc8d-Gbp11','C920009B18Rik-H60b'] for fusion in agfusion.parsers['fusioncatcher']('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt',db.logger): fusion = agfusion.Fusion( gene5prime=fusion['gene5prime'], gene5primejunction=fusion['gene5prime_junction'], gene3prime=fusion['gene3prime'], gene3primejunction=fusion['gene3prime_junction'], db=db, pyensembl_data=data, protein_databases=['pfam'], noncanonical=False ) assert fusion.name in all_fusions, '%s not in list!' % fusion.name
def test_mouse_1(self): """ Test that AGFusion determines if the effect on each individual transcript """ fusion = agfusion.Fusion( gene5prime="ENSMUSG00000022770", gene5primejunction=31664852, gene3prime="ENSMUSG00000002413", gene3primejunction=39651764, db=db, pyensembl_data=data, protein_databases=['pfam', 'tmhmm'], noncanonical=True ) assert fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].effect_5prime=='CDS (start)',"Test 5: not CDS start" assert fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487'].effect_3prime=='CDS',"Test 5: not CDS"
def test_3(self): """ Test CDS correct for junction within the exon (not on boundary) for one gene one the forward and one gene on the reverse strand """ fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770", gene5primejunction=31664869, gene3prime="ENSMUSG00000002413", gene3primejunction=39610402, db=db, pyensembl_data=data, protein_databases=['pfam', 'tmhmm'], noncanonical=True) cds = 'ATGCCGGTCCGGAAGCAAGAATTTGCAGCCTTCAAGTAG' assert str(fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487']. cds.seq) == cds, "Test 3: CDS wrong"
def annotate(gene5prime,junction5prime,gene3prime,junction3prime, outdir,colors,rename,scale,db,pyensembl_data,args): fusion = agfusion.Fusion( gene5prime=gene5prime, gene5primejunction=junction5prime, gene3prime=gene3prime, gene3primejunction=junction3prime, db=db, pyensembl_data=pyensembl_data, protein_databases=args.protein_databases, noncanonical=args.noncanonical ) fusion.save_transcript_cdna( out_dir=outdir, middlestar=args.middlestar ) fusion.save_transcript_cds( out_dir=outdir, middlestar=args.middlestar ) fusion.save_proteins( out_dir=outdir, middlestar=args.middlestar ) fusion.save_images( out_dir=outdir, scale=scale, colors=colors, rename=rename, fontsize=args.fontsize, height=args.height, width=args.width, dpi=args.dpi, no_domain_labels=args.no_domain_labels, plot_WT=args.WT, exclude=args.exclude_domain ) fusion.save_tables(out_dir=outdir)
def test_2(self): """ Test CDS correct for junction within the exon (not on boundary) for two genes on reverse strand """ fusion = agfusion.Fusion(gene5prime="ENSMUSG00000002413", gene5primejunction=39725110, gene3prime="ENSMUSG00000002413", gene3primejunction=39610402, db=db, pyensembl_data=data, protein_databases=['pfam', 'tmhmm'], noncanonical=True) cds = 'ATGGCGGCGCTGAGTGGCGGCGGTGGCAGCAGCAGCGGTGGCGGCGGCGGCGGTGGCGGCGGCGG' + \ 'TGGCGGTGGCGACGGCGGCGGCGGCGCCGAGCAGGGCCAGGCTCTGTTCAATGGCGACATGGAGC' + \ 'CGGAGGCCGGCGCTGGCGCCGCGGCCTCTTCGGCTGCGGACCCGGCCATTCCTGAAGAATTTGCAGCCTTCAAGTAG' assert str(fusion.transcripts['ENSMUST00000002487-ENSMUST00000002487']. cds.seq) == cds, "Test 2: CDS wrong"
def test_mouse_5(self): fusion = agfusion.Fusion(gene5prime="ENSMUSG00000022770", gene5primejunction=31664850, gene3prime="ENSMUSG00000002413", gene3primejunction=39603240, db=db, pyensembl_data=data, protein_databases=['pfam', 'tmhmm'], noncanonical=True) cdna = 'GGGGGTGCGGCCGCCGAAGGGGGAGCTCCTCCCCCGTCCCCTCACCCCCTCAGCTGAGCT' + \ 'CGGGGCGGGGCGGGGTACGTGGAGCGGGGCCGGGCGGGGAAGCTGCTCCGAGTCCGGCCG' + \ 'GAGCGCACCCGGGGCGCCCGCGTACGCCGCTCGCGGGAACTTTGCGGCGGAGCCGCAGGT' + \ 'GTGGAGGCCGCGGAGGGGGGTGCATGAGCGGCGCGGAGAGCGGCGGCTGTCCGGTCCGGC' + \ 'CCCTGCTGGAGTCGCCGCCGGGAGGAGACGAACGAGGAACCAG' + \ 'GTGTGTGCCGCCTTCCTGATTCTGGAGAAA' + \ 'GAAA' assert str(fusion.transcripts['ENSMUST00000064477-ENSMUST00000002487']. cdna.seq) == cdna, "Test 10: incorrect cDNA"
def index(): params = {} params['gene5prime'] = "" params['gene3prime'] = "" params['loc5prime'] = "" params['loc3prime'] = "" params['grcm38_color'] = "white" params['grch37_color'] = "white" params['grch38_color'] = "white" params['fontsize'] = "12" params['dpi'] = "100" params['imagewidth'] = "8" params['imageheight'] = "2" params['scale'] = "0" params['genome'] = "GRCm38" params['plotdisplay'] = 'none' params['inputerror'] = 'hidden' params['optionalparameterdisplay'] = 'none' if 'id' not in session: session['id'] = str(uuid.uuid4()) session['userdata'] = os.path.join(os.path.abspath(os.curdir), 'userdata', session['id']) if not os.path.exists(session['userdata']): os.mkdir(session['userdata']) if 'submitfusion' in request.form: params['gene5prime'] = str(request.form['gene5prime']) params['gene3prime'] = str(request.form['gene3prime']) params['loc5prime'] = str(request.form['loc5prime']) params['loc3prime'] = str(request.form['loc3prime']) params['fontsize'] = str(request.form['fontsize']) params['dpi'] = str(request.form['dpi']) params['imagewidth'] = str(request.form['imagewidth']) params['imageheight'] = str(request.form['imageheight']) params['scale'] = str(request.form['scale']) params['genome'] = str(request.form['genome']) params['inputerror'] = 'hidden' params, pyensembl_data = set_genome(params) params, error = check_fusion_input(params=params) if error is not None: return error params, error = check_params(params=params) if error is not None: return error #try to construct the fusion try: fusion = agfusion.Fusion( gene5prime=params['gene5prime'], gene5primejunction=int(params['loc5prime']), gene3prime=params['gene3prime'], gene3primejunction=int(params['loc3prime']), db=db, pyensembl_data=pyensembl_data) except agfusion.exceptions.GeneIDException5prime as e: params['inputerrormsg'] = e params['inputerror'] = 'visible' return render_template('index.html', params=params) except agfusion.exceptions.GeneIDException3prime as e: params['inputerrormsg'] = e params['inputerror'] = 'visible' return render_template('index.html', params=params) except agfusion.exceptions.JunctionException5prime as e: params['inputerrormsg'] = e params['inputerror'] = 'visible' return render_template('index.html', params=params) except agfusion.exceptions.JunctionException3prime as e: params['inputerrormsg'] = e params['inputerror'] = 'visible' return render_template('index.html', params=params) except agfusion.exceptions.TooManyGenesException as e: params['inputerrormsg'] = e params['inputerror'] = 'visible' return render_template('index.html', params=params) #save the fusion output and visualize middlestar = False fusion.save_transcript_cdna(out_dir=session['userdata'], middlestar=middlestar) fusion.save_transcript_cds(out_dir=session['userdata'], middlestar=middlestar) fusion.save_proteins(out_dir=session['userdata'], middlestar=middlestar) pickle.dump(fusion, open(session['userdata'] + '/fusion.pk', 'wb')) session['name'] = fusion.name dict_of_plots, plot_key = fusion.output_to_html( fontsize=params['fontsize'], dpi=params['dpi'], width=params['imagewidth'], height=params['imageheight'], scale=params['scale'], mpld3=mpld3) effects = {} for name, transcript in fusion.transcripts.items(): effects[name] = { 'effect': transcript.effect, '5prime_effect': transcript.effect_5prime, '3prime_effect': transcript.effect_3prime } session['plot_key'] = plot_key params['plotdisplay'] = 'visible' params['name'] = fusion.name params['gene5prime'] = params['gene5prime'].upper() params['gene3prime'] = params['gene3prime'].upper() return render_template('index.html', dict_of_plots=dict_of_plots, effects=effects, params=params) elif 'downloadseq' in request.form: #download sequence data if request.form['downloadseq'] == 'cdna': return send_file(session['userdata'] + '/' + session['name'] + '_cdna.fa', as_attachment=True) elif request.form['downloadseq'] == 'cds': return send_file(session['userdata'] + '/' + session['name'] + '_cds.fa', as_attachment=True) else: return send_file(session['userdata'] + '/' + session['name'] + '_protein.fa', as_attachment=True) elif 'downloadimage' in request.form: params['fontsize'] = str(request.form['fontsize']) params['dpi'] = str(request.form['dpi']) params['imagewidth'] = str(request.form['imagewidth']) params['imageheight'] = str(request.form['imageheight']) params['scale'] = str(request.form['scale']) params['inputerror'] = 'hidden' params['plotdisplay'] = 'visible' params, error = check_params(params=params) if error is not None: return error #download image data fusion = pickle.load(open(session['userdata'] + '/fusion.pk', 'rb')) fusion_name = str(request.form['image_key']) image_file = fusion.save_image(transcript=fusion_name, out_dir=session['userdata'], file_type=str( request.form['downloadimage']), fontsize=params['fontsize'], dpi=params['dpi'], width=params['imagewidth'], height=params['imageheight'], scale=params['scale']) return send_file(image_file, as_attachment=True) else: # default index load return render_template('index.html', params=params)