def mark_duplicates(bam_path, output_bam_path, output_metrics_path): cmd = "java -Xmx1g -jar {} \ INPUT={} \ OUTPUT={} \ METRICS_FILE={}"\ .format(_MARK_DUPLICATES, bam_path, output_bam_path, output_metrics_path) common.system(cmd) common.assert_file(output_bam_path, cmd) common.assert_file(output_metrics_path, cmd) return output_bam_path
def do_errors(args): """Compare the existence of output.gd files with that of *.log.txt, if there is not an output.gd for for a log file then it must have failed and needs to be brought to the users attention.""" output_search = os.path.join(args.output_dir, "*/output/output.gd") log_search = os.path.join(args.log_dir,"*.log.txt") print >> sys.stderr, "GenomeDiff search:", output_search print >> sys.stderr, "Log search:", log_search output_paths = glob.glob(output_search) assert output_paths, "No output/output.gd's found for search: " + output_search output_paths = (path.replace("/output/output.gd", "") for path in output_paths) output_paths = map(os.path.basename, output_paths) output_names = set(output_paths) log_paths = glob.glob(log_search) assert log_paths, "No *.log.txt's found for search: " + log_search log_paths = map(os.path.basename, log_paths) log_paths = (path.replace(".log.txt", "") for path in log_paths) log_names = set(log_paths) error_names = log_names.difference(output_names) if not len(error_names): print >> sys.stderr, "No errors!" return 0 error_paths = [os.path.join(args.log_dir, name) + ".log.txt" for name in error_names] for path in error_paths: print 20 * '*' common.system("tail " + path) print 20 * '*' print return -1
def view(bam_path, sam_path): cmd = samtools_exe + " view -hbS -o {} {}".format(bam_path, sam_path) common.system(cmd) common.assert_file(bam_path, cmd) return bam_path
def faidx(fasta_path): cmd = samtools_exe + " faidx {}".format(fasta_path) common.system(cmd) return fasta_path
def mpileup(fasta_path, bam_sorted_path, out_vcf_path): cmd = samtools_exe + " mpileup -uf {} {} | bcftools view -vcg - > {}".format(fasta_path, bam_sorted_path, out_vcf_path) common.system(cmd) common.assert_file(out_vcf_path, cmd) return out_vcf_path
def index(bam_path): cmd = samtools_exe + " index {}".format(bam_path) common.system(cmd) common.assert_file(bam_path, cmd) return bam_path
def merge(sam_paths, merged_bam_path, bam_paths): cmd = samtools_exe + " merge -r -n -h {} {} {}".format(" -h ".join(sam_paths), merged_bam_path, " ".join(bam_paths)) common.system(cmd) common.assert_file(merged_bam_path, cmd) return merged_bam_path
def sort(bam_path, bam_sorted_prefix): cmd = samtools_exe + " sort {} {}".format(bam_path, bam_sorted_prefix) sorted_bam_path = bam_sorted_prefix + ".bam" common.system(cmd) common.assert_file(sorted_bam_path, cmd) return sorted_bam_path
def merge_sams(sam_paths, output, merge_seq_dicts = True): cmd = "java -jar {} INPUT={} OUTPUT={} MERGE_SEQUENCE_DICTIONARIES={}".format(_MERGE_SAMS," INPUT=".join(sam_paths), output, "true" if merge_seq_dicts else "false") common.system(cmd) common.assert_file(output, cmd) return output
def sort_sam(aln_path, output_aln_path, sort_option = "coordinate"): cmd = "java -jar {} INPUT={} OUTPUT={} SORT_ORDER={}".format(_SORT_SAM, aln_path, output_aln_path, sort_option) common.system(cmd) common.assert_file(output_aln_path, cmd) return output_aln_path
def create_sequence_dictionary(ref_path, output_path): cmd = "java -jar {} R={} O={}".format(_CREATE_SEQUENCE_DICTIONARY, ref_path, output_path) common.system(cmd) common.assert_file(output_path, cmd) return output_path
def add_or_replace_read_groups(input_path, output_path): cmd = "java -jar {} I={} O={} LB=FOO PL=ILLUMINA PU=BAR SM=NEE".format(_ADD_OR_REPLACE_READ_GROUPS, input_path, output_path) common.system(cmd) common.assert_file(output_path, cmd) return output_path
def validate_alignment(aln_path): cmd = "java -jar {} I={}".format(_VALIDATE_SAM_FILE, aln_path) common.system(cmd) return aln_path