def test_go_annotation_analysis(): "We can annotate gos" test_dir = NamedTemporaryDir() project_name = "backbone" nr_path = os.path.join(TEST_DATA_DIR, "blast", "arabidopsis_genes+") b2g = os.path.join(TEST_DATA_DIR, "b2gPipe.properties") b2gpipe_bin = os.path.join(guess_jar_dir("blast2go.jar"), "blast2go.jar") if not b2gpipe_bin: print "Do not run b2gppe tests, blast2go jar file not found " return config = { "blast": {"nr": {"path": nr_path, "species": "nr"}}, "Annotation": { "go_annotation": { "blast_database": "nr", "create_dat_file": True, "java_memory": 2048, "b2g_properties_file": b2g, "blast2go_path": b2gpipe_bin, } }, "General_settings": {"threads": THREADS}, } settings_path = create_project(directory=test_dir.name, name=project_name, configuration=config) project_dir = join(test_dir.name, project_name) seq = "CTTCATCCATTCTCTCATCCGCCGNTGTGGCCTTTGNCAACAGGGCTTCCCCTGCTCAAGCT" seq += "AGCATGGGGGCACCATTCACTGGCCTAAAATCCGCCGCTGCTTTCCCNGTNACTCGCANGACC" seq += "AACGACATCACCACTTTGGTTAGCAATGGGGGAAGAGTTCAGGGCNTGAAGGTGTGCCCACCA" seq += "CTTGGATTGAAGAAGTTCGAGACTCTTTCTTACCTTCCTGATATGAGTAACGAGCAATTGGGA" seq += "AAGGAAGTTGACTACCTTCTCAGGAAGGGATGGATTCCCTGCATTGAATTCGACATTCACAGT" seq += "GGATTCGTTTACCGTGAGACCCACAGGTCACCAGG" annot_input_dir = join(project_dir, "annotations", "input") os.makedirs(annot_input_dir) # create some seqs to annotate fasta = ">seq1\n%s\n" % seq fhand = open(os.path.join(annot_input_dir, "seqs.st_nucl.pl_454.fasta"), "w") fhand.write(fasta) fhand.close() bdir = join(project_dir, "annotations", "blast", "seqs.st_nucl.pl_454", "arabidopsis_genes+") os.makedirs(bdir) shutil.copy(join(TEST_DATA_DIR, "blastResult.xml"), join(bdir, "blast.tblastx.xml")) do_analysis(project_settings=settings_path, kind="annotate_gos", silent=True) repr_fpath = join(project_dir, BACKBONE_DIRECTORIES["annotation_dbs"], "seqs.st_nucl.pl_454.0.pickle") result = open(repr_fpath).read() assert "GO:0043094" in result assert os.path.exists(os.path.join(project_dir, "annotations", "features", "seqs.st_nucl.pl_454.b2g.dat")) assert os.path.exists(os.path.join(project_dir, "annotations", "features", "seqs.st_nucl.pl_454.b2g.annot")) do_analysis(project_settings=settings_path, kind="annotate_gos", silent=True) do_analysis(project_settings=settings_path, kind="annotation_stats", silent=True) stats_fpath = join(project_dir, "annotations", "features", "stats", "seqs.st_nucl.pl_454.txt") result = open(stats_fpath).read() expected = """Sequences with GOs: 1 Number of GOs: 10""" assert expected in result
def bamsam_converter(input_fhand, output_fhand, java_conf=None): 'Converts between sam and bam' picard_path = guess_jar_dir('SortSam.jar', java_conf) picard_jar = os.path.join(picard_path, 'SamFormatConverter.jar') cmd = java_cmd(java_conf) cmd.extend(['-jar', picard_jar, 'INPUT=' + input_fhand, 'OUTPUT=' + output_fhand]) call(cmd, raise_on_error=True, add_ext_dir=False)
def create_picard_dict(reference_fpath, java_conf=None): 'It creates a picard dict if if it does not exist' dict_path = os.path.splitext(reference_fpath)[0] + '.dict' if os.path.exists(dict_path): return picard_path = guess_jar_dir('SortSam.jar', java_conf) picard_jar = os.path.join(picard_path, 'CreateSequenceDictionary.jar') cmd = ['java', '-jar', picard_jar, 'R=%s' % reference_fpath, 'O=%s' % dict_path] call(cmd, raise_on_error=True, add_ext_dir=False)
def test_go_annotator(): 'It test the go annotator' blast = open(os.path.join(TEST_DATA_DIR, 'blastResult.xml')) prop_fpath = os.path.join(TEST_DATA_DIR, 'b2gPipe.properties') b2gpipe_bin = os.path.join(guess_jar_dir('blast2go.jar'), 'blast2go.jar') fhand, annot_fpath = tempfile.mkstemp() os.close(fhand) if not b2gpipe_bin: print "Do not run b2gppe tests, blast2go jar file not found " return b2gpipe_runner(blast, annot_fpath, b2gpipe_bin, prop_fpath=prop_fpath) blast2go = annot_fpath go_annotator = create_go_annotator(blast2go) seq = SeqWithQuality(name='seq1', seq=Seq('aaaa')) go_annotator(seq) assert 'GO:0009853' in seq.annotations['GOs'] os.remove(annot_fpath)
def test_run_b2g4pipe(): 'It test the runner of b2g4pipe' blast = open(os.path.join(TEST_DATA_DIR, 'blast2.xml')) fhand, annot_fpath = tempfile.mkstemp() os.close(fhand) fhand, dat_fpath = tempfile.mkstemp() os.close(fhand) prop_fpath = os.path.join(TEST_DATA_DIR, 'b2gPipe.properties') b2gpipe_bin = os.path.join(guess_jar_dir('blast2go.jar'), 'blast2go.jar') if not b2gpipe_bin: print "Do not run b2gppe tests, blast2go jar file not found " return b2gpipe_runner(blast, annot_fpath, b2gpipe_bin, prop_fpath, dat_fpath) assert os.path.exists(annot_fpath) assert os.path.exists(dat_fpath) os.remove(annot_fpath) os.remove(dat_fpath)
def realign_bam(bam_fpath, reference_fpath, out_bam_fpath, java_conf=None, threads=False, tmp_dir=None): 'It realigns the bam using GATK Local realignment around indels' #reference sam index create_sam_reference_index(reference_fpath) #reference picard dict create_picard_dict(reference_fpath, java_conf=java_conf) #bam index create_bam_index(bam_fpath) #the intervals to realign gatk_path = guess_jar_dir('GenomeAnalysisTK.jar', java_conf) gatk_jar = os.path.join(gatk_path, 'GenomeAnalysisTK.jar') intervals_fhand = tempfile.NamedTemporaryFile(suffix='.intervals') cmd = java_cmd(java_conf=java_conf) cmd.extend(['-jar', gatk_jar, '-T', 'RealignerTargetCreator', '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name]) #according to GATK this is experimental, so it might be a good idea to #do it in just one thread. In version 1.0.4498. This options is removed # so parallel = False parallel = False if parallel and threads and threads > 1: cmd.extend(['-nt', str(get_num_threads(threads))]) call(cmd, raise_on_error=True, add_ext_dir=False) #the realignment itself unsorted_bam = NamedTemporaryFile(suffix='.bam') cmd = java_cmd(java_conf=java_conf) cmd.extend(['-Djava.io.tmpdir=%s' % tempfile.gettempdir(), '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath, '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name, '-o', unsorted_bam.name]) if parallel and threads and threads > 1: cmd.extend(['-nt', str(get_num_threads(threads))]) call(cmd, raise_on_error=True, add_ext_dir=False) # now we have to realign the bam sort_bam_sam(unsorted_bam.name, out_bam_fpath, java_conf=java_conf, tmp_dir=tmp_dir)
def sort_bam_sam(in_fpath, out_fpath, sort_method='coordinate', java_conf=None, tmp_dir=None, strict_validation=True): 'It sorts a bam file using picard' picard_path = guess_jar_dir('SortSam.jar', java_conf) picard_sort_jar = os.path.join(picard_path, 'SortSam.jar') java_cmd_ = java_cmd(java_conf) java_cmd_.extend(['-jar', picard_sort_jar, 'INPUT=' + in_fpath, 'OUTPUT=' + out_fpath, 'SORT_ORDER=' + sort_method]) if not strict_validation: java_cmd_.append('VALIDATION_STRINGENCY=LENIENT') if tmp_dir: java_cmd_.append('TMP_DIR=%s' % tmp_dir) stdout, stderr, retcode = call(java_cmd_, raise_on_error=False, add_ext_dir=False) err_msg = 'No space left on device' if retcode and (err_msg in stdout or err_msg in stderr): raise RuntimeError('Picard sort consumed all space in device.' + stderr) elif retcode: msg = 'Error running picard: %s\n stderr: %s\n stdout: %s' % \ (' '.join(java_cmd_), stderr, stdout) raise RuntimeError(msg)