Beispiel #1
0
    def test_go_annotation_analysis():
        "We can annotate gos"
        test_dir = NamedTemporaryDir()
        project_name = "backbone"
        nr_path = os.path.join(TEST_DATA_DIR, "blast", "arabidopsis_genes+")
        b2g = os.path.join(TEST_DATA_DIR, "b2gPipe.properties")
        b2gpipe_bin = os.path.join(guess_jar_dir("blast2go.jar"), "blast2go.jar")
        if not b2gpipe_bin:
            print "Do not run b2gppe tests, blast2go jar file not found "
            return
        config = {
            "blast": {"nr": {"path": nr_path, "species": "nr"}},
            "Annotation": {
                "go_annotation": {
                    "blast_database": "nr",
                    "create_dat_file": True,
                    "java_memory": 2048,
                    "b2g_properties_file": b2g,
                    "blast2go_path": b2gpipe_bin,
                }
            },
            "General_settings": {"threads": THREADS},
        }

        settings_path = create_project(directory=test_dir.name, name=project_name, configuration=config)
        project_dir = join(test_dir.name, project_name)
        seq = "CTTCATCCATTCTCTCATCCGCCGNTGTGGCCTTTGNCAACAGGGCTTCCCCTGCTCAAGCT"
        seq += "AGCATGGGGGCACCATTCACTGGCCTAAAATCCGCCGCTGCTTTCCCNGTNACTCGCANGACC"
        seq += "AACGACATCACCACTTTGGTTAGCAATGGGGGAAGAGTTCAGGGCNTGAAGGTGTGCCCACCA"
        seq += "CTTGGATTGAAGAAGTTCGAGACTCTTTCTTACCTTCCTGATATGAGTAACGAGCAATTGGGA"
        seq += "AAGGAAGTTGACTACCTTCTCAGGAAGGGATGGATTCCCTGCATTGAATTCGACATTCACAGT"
        seq += "GGATTCGTTTACCGTGAGACCCACAGGTCACCAGG"

        annot_input_dir = join(project_dir, "annotations", "input")
        os.makedirs(annot_input_dir)

        # create some seqs to annotate
        fasta = ">seq1\n%s\n" % seq
        fhand = open(os.path.join(annot_input_dir, "seqs.st_nucl.pl_454.fasta"), "w")
        fhand.write(fasta)
        fhand.close()
        bdir = join(project_dir, "annotations", "blast", "seqs.st_nucl.pl_454", "arabidopsis_genes+")
        os.makedirs(bdir)
        shutil.copy(join(TEST_DATA_DIR, "blastResult.xml"), join(bdir, "blast.tblastx.xml"))

        do_analysis(project_settings=settings_path, kind="annotate_gos", silent=True)
        repr_fpath = join(project_dir, BACKBONE_DIRECTORIES["annotation_dbs"], "seqs.st_nucl.pl_454.0.pickle")
        result = open(repr_fpath).read()
        assert "GO:0043094" in result
        assert os.path.exists(os.path.join(project_dir, "annotations", "features", "seqs.st_nucl.pl_454.b2g.dat"))
        assert os.path.exists(os.path.join(project_dir, "annotations", "features", "seqs.st_nucl.pl_454.b2g.annot"))

        do_analysis(project_settings=settings_path, kind="annotate_gos", silent=True)

        do_analysis(project_settings=settings_path, kind="annotation_stats", silent=True)
        stats_fpath = join(project_dir, "annotations", "features", "stats", "seqs.st_nucl.pl_454.txt")
        result = open(stats_fpath).read()
        expected = """Sequences with GOs: 1
Number of GOs: 10"""
        assert expected in result
Beispiel #2
0
def bamsam_converter(input_fhand, output_fhand, java_conf=None):
    'Converts between sam and bam'
    picard_path = guess_jar_dir('SortSam.jar', java_conf)
    picard_jar = os.path.join(picard_path, 'SamFormatConverter.jar')
    cmd = java_cmd(java_conf)
    cmd.extend(['-jar', picard_jar, 'INPUT=' + input_fhand,
                'OUTPUT=' + output_fhand])
    call(cmd, raise_on_error=True, add_ext_dir=False)
Beispiel #3
0
def create_picard_dict(reference_fpath, java_conf=None):
    'It creates a picard dict if if it does not exist'
    dict_path = os.path.splitext(reference_fpath)[0] + '.dict'
    if os.path.exists(dict_path):
        return
    picard_path = guess_jar_dir('SortSam.jar', java_conf)
    picard_jar = os.path.join(picard_path, 'CreateSequenceDictionary.jar')
    cmd = ['java', '-jar', picard_jar,
           'R=%s' % reference_fpath,
           'O=%s' % dict_path]
    call(cmd, raise_on_error=True, add_ext_dir=False)
Beispiel #4
0
    def test_go_annotator():
        'It test the go annotator'
        blast = open(os.path.join(TEST_DATA_DIR, 'blastResult.xml'))
        prop_fpath = os.path.join(TEST_DATA_DIR, 'b2gPipe.properties')
        b2gpipe_bin = os.path.join(guess_jar_dir('blast2go.jar'),
                                   'blast2go.jar')
        fhand, annot_fpath = tempfile.mkstemp()
        os.close(fhand)
        if not b2gpipe_bin:
            print "Do not run b2gppe tests, blast2go jar file not found "
            return
        b2gpipe_runner(blast, annot_fpath, b2gpipe_bin, prop_fpath=prop_fpath)
        blast2go = annot_fpath
        go_annotator = create_go_annotator(blast2go)
        seq = SeqWithQuality(name='seq1', seq=Seq('aaaa'))

        go_annotator(seq)
        assert 'GO:0009853' in seq.annotations['GOs']

        os.remove(annot_fpath)
    def test_run_b2g4pipe():
        'It test the runner of b2g4pipe'

        blast = open(os.path.join(TEST_DATA_DIR, 'blast2.xml'))
        fhand, annot_fpath = tempfile.mkstemp()
        os.close(fhand)
        fhand, dat_fpath = tempfile.mkstemp()
        os.close(fhand)
        prop_fpath = os.path.join(TEST_DATA_DIR, 'b2gPipe.properties')
        b2gpipe_bin = os.path.join(guess_jar_dir('blast2go.jar'),
                                   'blast2go.jar')
        if not b2gpipe_bin:
            print "Do not run b2gppe tests, blast2go jar file not found "
            return
        b2gpipe_runner(blast, annot_fpath, b2gpipe_bin, prop_fpath, dat_fpath)

        assert os.path.exists(annot_fpath)
        assert os.path.exists(dat_fpath)
        os.remove(annot_fpath)
        os.remove(dat_fpath)
Beispiel #6
0
def realign_bam(bam_fpath, reference_fpath, out_bam_fpath, java_conf=None,
                threads=False, tmp_dir=None):
    'It realigns the bam using GATK Local realignment around indels'
    #reference sam index
    create_sam_reference_index(reference_fpath)

    #reference picard dict
    create_picard_dict(reference_fpath, java_conf=java_conf)

    #bam index
    create_bam_index(bam_fpath)

    #the intervals to realign
    gatk_path = guess_jar_dir('GenomeAnalysisTK.jar', java_conf)
    gatk_jar = os.path.join(gatk_path, 'GenomeAnalysisTK.jar')
    intervals_fhand = tempfile.NamedTemporaryFile(suffix='.intervals')
    cmd = java_cmd(java_conf=java_conf)
    cmd.extend(['-jar', gatk_jar, '-T', 'RealignerTargetCreator',
           '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name])

    #according to GATK this is experimental, so it might be a good idea to
    #do it in just one thread. In version 1.0.4498. This options is removed
    # so parallel = False
    parallel = False
    if parallel and threads and threads > 1:
        cmd.extend(['-nt', str(get_num_threads(threads))])
    call(cmd, raise_on_error=True, add_ext_dir=False)

    #the realignment itself
    unsorted_bam = NamedTemporaryFile(suffix='.bam')
    cmd = java_cmd(java_conf=java_conf)
    cmd.extend(['-Djava.io.tmpdir=%s' % tempfile.gettempdir(),
           '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath,
           '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name,
           '-o', unsorted_bam.name])
    if parallel and threads and threads > 1:
        cmd.extend(['-nt', str(get_num_threads(threads))])
    call(cmd, raise_on_error=True, add_ext_dir=False)
    # now we have to realign the bam
    sort_bam_sam(unsorted_bam.name, out_bam_fpath, java_conf=java_conf,
                 tmp_dir=tmp_dir)
Beispiel #7
0
def sort_bam_sam(in_fpath, out_fpath, sort_method='coordinate',
                 java_conf=None, tmp_dir=None, strict_validation=True):
    'It sorts a bam file using picard'
    picard_path = guess_jar_dir('SortSam.jar', java_conf)
    picard_sort_jar = os.path.join(picard_path, 'SortSam.jar')
    java_cmd_ = java_cmd(java_conf)
    java_cmd_.extend(['-jar', picard_sort_jar, 'INPUT=' + in_fpath,
           'OUTPUT=' + out_fpath, 'SORT_ORDER=' + sort_method])

    if not strict_validation:
        java_cmd_.append('VALIDATION_STRINGENCY=LENIENT')

    if tmp_dir:
        java_cmd_.append('TMP_DIR=%s' % tmp_dir)

    stdout, stderr, retcode = call(java_cmd_, raise_on_error=False, add_ext_dir=False)
    err_msg = 'No space left on device'
    if retcode and (err_msg in stdout or err_msg in stderr):
        raise RuntimeError('Picard sort consumed all space in device.' + stderr)
    elif retcode:
        msg = 'Error running picard: %s\n stderr: %s\n stdout: %s' % \
                                                (' '.join(java_cmd_), stderr,
                                                 stdout)
        raise RuntimeError(msg)