예제 #1
0
    def test_04(self):
        samplename = 'SRR207111_HeLa18-30'
        sampledir = '../share/small_RNA-seq_alignments/SRP006788/'

        subprocess.call(['tar', '-xzf', sampledir + samplename + '.tar.gz'])
        output_file = 'test.gtf'

        command = ['flaimapper', '-o', output_file, '-f', '2', samplename + '.bam']
        with subprocess.Popen(command) as pipe:
            pipe.wait()
            exit_code = pipe.poll()

            self.assertEqual(exit_code, 0)
            data = parse_gff(output_file)
            u81_14 = False
            u81_46 = False
            u81_54 = False

            i = 0
            k = 0
            for chunk in data:  # range(len(data)):
                chunk = data[i]
                if chunk[0] == 'HGNC=10101&HUGO-Symbol=SNORD81&HUGO-Name=small_nucleolar_RNA,_C/D_box_81&LOCI=[chr1:173833274-173833370:strand=-]&SOURCE=RefSeq&SOURCE-ACCESSION=NR_003938&GENOME=hg19':
                    k += 1
                    if chunk[1] == 14 and chunk[2] == 36:
                        u81_14 = True
                    elif chunk[1] == 46 and chunk[2] == 67:
                        u81_46 = True
                    elif chunk[1] == 54 and chunk[2] == 79:
                        u81_54 = True
                i += 1

            if u81_14 and u81_46 and u81_54:
                os.remove(output_file)

            self.assertTrue(u81_14, "The first of the three SNORD81 fragments was not detected")
            self.assertTrue(u81_46, "The second of the three SNORD81 fragments was not detected")
            self.assertTrue(u81_54, "The third of the three SNORD81 fragments was not detected")
            self.assertTrue(k == (3 * 2), "More than 3 fragments (%i) of SNORD81 were detected" % k)  # *2 because every entry generates two GTF lines
from flaimapper.ncRNA import ncRNA

from flaimapper.utils import fasta_entry_names
from flaimapper.utils import parse_gff
from flaimapper.utils import link_mirbase_to_ncrnadb09
from flaimapper.FlaiMapperObject import FlaiMapperObject

import sys
tmp_dir = sys.argv[1].rstrip("/") + "/"

verbosity = "quiet"

miRNAs = miRBase("../../../../share/annotations/miRBase_20/miRNA.dat")
ncrna_library_names = fasta_entry_names(
    "../../../../share/annotations/ncRNA_annotation/ncrnadb09.fa")
regions = parse_gff(
    "../../../../share/annotations/ncRNA_annotation/ncrnadb09.gtf")
links = link_mirbase_to_ncrnadb09(
    miRNAs, ncrna_library_names)  # Crosslink miRBase with reference ncRNAs

dataset_id = "SRP041082"

experiments = ['SRR1232072', 'SRR1232073']
for experiment in experiments:
    alignments = [
        "../../../../share/small_RNA-seq_alignments/" + dataset_id + "/" +
        experiment
    ]

    # Load flaimapper
    flaimapper = FlaiMapperObject('sslm', verbosity)
    for alignment in alignments:
from flaimapper.miRBase import miRBase
from flaimapper.ncRNA import ncRNA

from flaimapper.utils import fasta_entry_names
from flaimapper.utils import parse_gff
from flaimapper.utils import link_mirbase_to_ncrnadb09
from flaimapper.FlaiMapperObject import FlaiMapperObject

import sys
tmp_dir = sys.argv[1].rstrip("/")+"/"

verbosity = "quiet"

miRNAs = miRBase("../../../../share/annotations/miRBase_20/miRNA.dat")
ncrna_library_names = fasta_entry_names("../../../../share/annotations/ncRNA_annotation/ncrnadb09.fa")
regions = parse_gff("../../../../share/annotations/ncRNA_annotation/ncrnadb09.gtf")
links = link_mirbase_to_ncrnadb09(miRNAs,ncrna_library_names)			# Crosslink miRBase with reference ncRNAs

dataset_id = "SRP028959"

experiments = ['SRR954957', 'SRR954958', 'SRR954959']
for experiment in experiments:
	alignments = ["../../../../share/small_RNA-seq_alignments/"+dataset_id+"/"+experiment]
	
	# Load flaimapper
	flaimapper = FlaiMapperObject('sslm',verbosity)
	for alignment in alignments:
		flaimapper.add_alignment(alignment)
	results = flaimapper.count_reads_per_region(miRNAs,links,regions,10)
	
	keys = sorted(results.keys())