def main(args): bcf = ps.bcf(args.bcf) bcf.get_mean_genotype() bcf.get_genesum() geno_file = bcf.prefix+".geno" genesum_file = bcf.prefix+".genesum" meta = {} for s in bcf.samples: meta[s] = {} for row in csv.DictReader(open(args.csv)): for pheno in row.keys(): if pheno=="id": continue if row['id'] not in meta: continue meta[row["id"]][pheno] = row[pheno] phenos = [x.rstrip() for x in open(args.phenos).readlines()] cmd_file = ps.get_random_file() X = open(cmd_file,"w") for pheno in phenos: pheno_file = "%s.pheno" % pheno if pheno not in row: ps.log("%s not in CSV file"%pheno,True) P = open(pheno_file,"w") P.write("\n".join([meta[s][pheno] if pheno in meta[s] else "NA" for s in bcf.samples])) P.close() X.write("gemma -p %s -g %s -gk 1 -o %s -maf 0.00005 -miss 0.99 && gemma -lmm 1 -p %s -g %s -k output/%s.cXX.txt -o %s -maf 0.00005 -miss 0.99 && gemma -lmm 1 -p %s -g %s -k output/%s.cXX.txt -o %s.genesum -notsnp\n" % (pheno_file,geno_file,pheno,pheno_file,geno_file,pheno,pheno,pheno_file,genesum_file,pheno,pheno)) X.close() if args.preprocess: ps.log("Preprocessing finished\n", True) else: ps.run_cmd("cat %s | parallel -j %s" % (cmd_file,args.threads))
def main(args): if not args.prefix: ps.log("Please specify prefix with -p") quit(1) if not args.ref: ps.log("Please use --ref to provide a reference... Exiting",ext=T) x= ps.fastq(args.prefix,args.ref,args.r1,args.r2,threads=args.threads) x.illumina(mapper=args.mapper)
def main(args): if not args.prefix: ps.log("Please specify prefix with -p") quit(1) if not args.ref: ps.log("Please use --ref to provide a reference... Exiting", ext=T) x = ps.fastq(args.prefix, args.ref, args.r1, args.r2, threads=args.threads) x.illumina(mapper=args.mapper)
def main(args): if not args.bam: ps.log("Please provide bam file") quit() else: ps.filecheck(args.bam) if not args.ref: ps.log("Please provide reference") quit() else: ps.filecheck(args.ref) fasta = ps.fasta(args.ref) if not args.prefix: ps.log("Please provide prefix") quit() if args.gff and args.bed: ps.log( "Please provide either a GFF file or BED file but not both...Exiting!" ) quit() if args.gff: if not args.gffkey: ps.log( "Please provide the key to look for in the GFF file...Exiting!" ) quit() else: ps.filecheck(args.gff) if args.bed: ps.filecheck(args.bed) cov_json = "%s.cov.json" % args.prefix stats_json = "%s.bam_stats.json" % args.prefix region_json = "%s.regions.cov.json" % args.prefix stats = {} bamqc = ps.qc_bam(args.bam, args.ref) for s in fasta.fa_dict: cov_plot = "%s.%s.cov.png" % (args.prefix, s) bamqc.plot_cov(s, cov_plot) bamqc.save_cov(cov_json) stats["pct_reads_mapped"] = bamqc.pct_reads_mapped stats["med_dp"] = bamqc.med_dp region_cov = {} if args.gff: region_cov = bamqc.gff_cov(args.gff, args.gffkey) elif args.bed: region_cov = bamqc.bed_cov(args.bed) json.dump(stats, open(stats_json, "w")) json.dump(region_cov, open(region_json, "w"))
def main(args): if not args.bam: ps.log("Please provide bam file") quit() else: ps.filecheck(args.bam) if not args.ref: ps.log("Please provide reference") quit() else: ps.filecheck(args.ref) fasta = ps.fasta(args.ref) if not args.prefix: ps.log("Please provide prefix") quit() if args.gff and args.bed: ps.log("Please provide either a GFF file or BED file but not both...Exiting!") quit() if args.gff: if not args.gffkey: ps.log("Please provide the key to look for in the GFF file...Exiting!") quit() else: ps.filecheck(args.gff) if args.bed: ps.filecheck(args.bed) cov_json = "%s.cov.json" % args.prefix stats_json = "%s.bam_stats.json" % args.prefix region_json = "%s.regions.cov.json" % args.prefix stats = {} bamqc = ps.qc_bam(args.bam,args.ref) for s in fasta.fa_dict: cov_plot = "%s.%s.cov.png" % (args.prefix,s) bamqc.plot_cov(s,cov_plot) bamqc.save_cov(cov_json) stats["pct_reads_mapped"] = bamqc.pct_reads_mapped stats["med_dp"] = bamqc.med_dp region_cov = {} if args.gff: region_cov = bamqc.gff_cov(args.gff,args.gffkey) elif args.bed: region_cov = bamqc.bed_cov(args.bed) json.dump(stats,open(stats_json,"w")) json.dump(region_cov,open(region_json,"w"))
def main(args): if not args.r1: ps.log("Please provide at least one fastq file with -1...Exiting") quit() else: ps.filecheck(args.r1) if args.r2: ps.filecheck(args.r2) if not args.prefix: ps.log("Please provide a file output prefix...Exiting") quit() stats = {} fastqqc = ps.qc_fastq(args.prefix,args.r1,args.r2) if args.r2 else ps.qc_fastq(args.prefix,args.r1) stats["mean_read_len"] = fastqqc.mean_read_len stats["read_num"] = fastqqc.read_num stats_json = "%s.fastq_stats.json" % args.prefix json.dump(stats,open(stats_json,"w"))
def main(args): if not args.r1: ps.log("Please provide at least one fastq file with -1...Exiting") quit() else: ps.filecheck(args.r1) if args.r2: ps.filecheck(args.r2) if not args.prefix: ps.log("Please provide a file output prefix...Exiting") quit() stats = {} fastqqc = ps.qc_fastq(args.prefix, args.r1, args.r2) if args.r2 else ps.qc_fastq( args.prefix, args.r1) stats["mean_read_len"] = fastqqc.mean_read_len stats["read_num"] = fastqqc.read_num stats_json = "%s.fastq_stats.json" % args.prefix json.dump(stats, open(stats_json, "w"))
def main(args): bcf = ps.bcf(args.bcf) bcf.get_mean_genotype() bcf.get_genesum() geno_file = bcf.prefix + ".geno" genesum_file = bcf.prefix + ".genesum" meta = {} for s in bcf.samples: meta[s] = {} for row in csv.DictReader(open(args.csv)): for pheno in row.keys(): if pheno == "id": continue if row['id'] not in meta: continue meta[row["id"]][pheno] = row[pheno] phenos = [x.rstrip() for x in open(args.phenos).readlines()] cmd_file = ps.get_random_file() X = open(cmd_file, "w") for pheno in phenos: pheno_file = "%s.pheno" % pheno if pheno not in row: ps.log("%s not in CSV file" % pheno, True) P = open(pheno_file, "w") P.write("\n".join([ meta[s][pheno] if pheno in meta[s] else "NA" for s in bcf.samples ])) P.close() X.write( "gemma -p %s -g %s -gk 1 -o %s -maf 0.00005 -miss 0.99 && gemma -lmm 1 -p %s -g %s -k output/%s.cXX.txt -o %s -maf 0.00005 -miss 0.99 && gemma -lmm 1 -p %s -g %s -k output/%s.cXX.txt -o %s.genesum -notsnp\n" % (pheno_file, geno_file, pheno, pheno_file, geno_file, pheno, pheno, pheno_file, genesum_file, pheno, pheno)) X.close() if args.preprocess: ps.log("Preprocessing finished\n", True) else: ps.run_cmd("cat %s | parallel -j %s" % (cmd_file, args.threads))
def main(args): if not args.samples_file and not args.sample: ps.log("Provide either --sample or --samples_file... Exiting", ext=True) if args.samples_file and args.sample: ps.log( "Provide either --sample or --samples_file but not both... Exiting", ext=True) if args.cram and not args.reference: ps.log("Provide reference for cram compression... Exiting", ext=True) if args.sample: merge_sample(args.sample, args.cram, args.reference) else: for l in open(args.samples_file): merge_sample(l.rstrip(), args.cram, args.reference)
#! /usr/bin/env python import pathogenseq as ps import sys if len(sys.argv)!=5: ps.log("sambamba_depth.py <ref> <bam> <prefix> <threads>",True) ref_file = sys.argv[1] bam_file = sys.argv[2] prefix = sys.argv[3] threads = sys.argv[4] bam = ps.bam(bam_file,prefix,ref_file,threads=threads) out_file = "%s.sambamba.depth" % prefix bam.sambamba_depth(out_file)