Esempio n. 1
0
def main(args):
	bcf = ps.bcf(args.bcf)
	bcf.get_mean_genotype()
	bcf.get_genesum()
	geno_file = bcf.prefix+".geno"
	genesum_file = bcf.prefix+".genesum"
	meta = {}
	for s in bcf.samples:
		meta[s] = {}
	for row in csv.DictReader(open(args.csv)):
		for pheno in row.keys():
			if pheno=="id": continue
			if row['id'] not in meta: continue
			meta[row["id"]][pheno] = row[pheno]
	phenos = [x.rstrip() for x in open(args.phenos).readlines()]
	cmd_file = ps.get_random_file()
	X = open(cmd_file,"w")
	for pheno in phenos:
		pheno_file = "%s.pheno" % pheno
		if pheno not in row:
			ps.log("%s not in CSV file"%pheno,True)
		P = open(pheno_file,"w")
		P.write("\n".join([meta[s][pheno] if pheno in meta[s] else "NA" for s in bcf.samples]))
		P.close()
		X.write("gemma -p %s -g %s -gk 1 -o %s -maf 0.00005 -miss 0.99 && gemma  -lmm 1 -p %s -g %s  -k output/%s.cXX.txt  -o %s -maf 0.00005 -miss 0.99 && gemma  -lmm 1 -p %s -g %s  -k output/%s.cXX.txt  -o %s.genesum -notsnp\n" % (pheno_file,geno_file,pheno,pheno_file,geno_file,pheno,pheno,pheno_file,genesum_file,pheno,pheno))
	X.close()

	if args.preprocess:
		ps.log("Preprocessing finished\n", True)
	else:
		ps.run_cmd("cat %s | parallel -j %s" % (cmd_file,args.threads))
Esempio n. 2
0
def main(args):
	if not args.prefix:
		ps.log("Please specify prefix with -p")
		quit(1)
	if not args.ref:
		ps.log("Please use --ref to provide a reference... Exiting",ext=T)
	x= ps.fastq(args.prefix,args.ref,args.r1,args.r2,threads=args.threads)
	x.illumina(mapper=args.mapper)
Esempio n. 3
0
def main(args):
    if not args.prefix:
        ps.log("Please specify prefix with -p")
        quit(1)
    if not args.ref:
        ps.log("Please use --ref to provide a reference... Exiting", ext=T)
    x = ps.fastq(args.prefix, args.ref, args.r1, args.r2, threads=args.threads)
    x.illumina(mapper=args.mapper)
Esempio n. 4
0
def main(args):
    if not args.bam:
        ps.log("Please provide bam file")
        quit()
    else:
        ps.filecheck(args.bam)
    if not args.ref:
        ps.log("Please provide reference")
        quit()
    else:
        ps.filecheck(args.ref)
        fasta = ps.fasta(args.ref)
    if not args.prefix:
        ps.log("Please provide prefix")
        quit()
    if args.gff and args.bed:
        ps.log(
            "Please provide either a GFF file or BED file but not both...Exiting!"
        )
        quit()
    if args.gff:
        if not args.gffkey:
            ps.log(
                "Please provide the key to look for in the GFF file...Exiting!"
            )
            quit()
        else:
            ps.filecheck(args.gff)
    if args.bed: ps.filecheck(args.bed)

    cov_json = "%s.cov.json" % args.prefix
    stats_json = "%s.bam_stats.json" % args.prefix
    region_json = "%s.regions.cov.json" % args.prefix

    stats = {}

    bamqc = ps.qc_bam(args.bam, args.ref)
    for s in fasta.fa_dict:
        cov_plot = "%s.%s.cov.png" % (args.prefix, s)
        bamqc.plot_cov(s, cov_plot)
    bamqc.save_cov(cov_json)
    stats["pct_reads_mapped"] = bamqc.pct_reads_mapped
    stats["med_dp"] = bamqc.med_dp
    region_cov = {}
    if args.gff:
        region_cov = bamqc.gff_cov(args.gff, args.gffkey)
    elif args.bed:
        region_cov = bamqc.bed_cov(args.bed)
    json.dump(stats, open(stats_json, "w"))
    json.dump(region_cov, open(region_json, "w"))
Esempio n. 5
0
def main(args):
    if not args.bam:
        ps.log("Please provide bam file")
        quit()
    else:
         ps.filecheck(args.bam)
    if not args.ref:
        ps.log("Please provide reference")
        quit()
    else:
        ps.filecheck(args.ref)
        fasta = ps.fasta(args.ref)
    if not args.prefix:
        ps.log("Please provide prefix")
        quit()
    if args.gff and args.bed:
        ps.log("Please provide either a GFF file or BED file but not both...Exiting!")
        quit()
    if args.gff:
        if not args.gffkey:
            ps.log("Please provide the key to look for in the GFF file...Exiting!")
            quit()
        else:
            ps.filecheck(args.gff)
    if args.bed: ps.filecheck(args.bed)

    cov_json = "%s.cov.json" % args.prefix
    stats_json = "%s.bam_stats.json" % args.prefix
    region_json = "%s.regions.cov.json" % args.prefix

    stats = {}

    bamqc = ps.qc_bam(args.bam,args.ref)
    for s in fasta.fa_dict:
        cov_plot = "%s.%s.cov.png" % (args.prefix,s)
        bamqc.plot_cov(s,cov_plot)
    bamqc.save_cov(cov_json)
    stats["pct_reads_mapped"] = bamqc.pct_reads_mapped
    stats["med_dp"] = bamqc.med_dp
    region_cov = {}
    if args.gff:
        region_cov = bamqc.gff_cov(args.gff,args.gffkey)
    elif args.bed:
        region_cov = bamqc.bed_cov(args.bed)
    json.dump(stats,open(stats_json,"w"))
    json.dump(region_cov,open(region_json,"w"))
Esempio n. 6
0
def main(args):
    if not args.r1:
        ps.log("Please provide at least one fastq file with -1...Exiting")
        quit()
    else:
        ps.filecheck(args.r1)
    if args.r2: ps.filecheck(args.r2)
    if not args.prefix:
        ps.log("Please provide a file output prefix...Exiting")
        quit()

    stats = {}
    fastqqc = ps.qc_fastq(args.prefix,args.r1,args.r2) if args.r2 else ps.qc_fastq(args.prefix,args.r1)
    stats["mean_read_len"] = fastqqc.mean_read_len
    stats["read_num"] = fastqqc.read_num

    stats_json = "%s.fastq_stats.json" % args.prefix
    json.dump(stats,open(stats_json,"w"))
Esempio n. 7
0
def main(args):
    if not args.r1:
        ps.log("Please provide at least one fastq file with -1...Exiting")
        quit()
    else:
        ps.filecheck(args.r1)
    if args.r2: ps.filecheck(args.r2)
    if not args.prefix:
        ps.log("Please provide a file output prefix...Exiting")
        quit()

    stats = {}
    fastqqc = ps.qc_fastq(args.prefix, args.r1,
                          args.r2) if args.r2 else ps.qc_fastq(
                              args.prefix, args.r1)
    stats["mean_read_len"] = fastqqc.mean_read_len
    stats["read_num"] = fastqqc.read_num

    stats_json = "%s.fastq_stats.json" % args.prefix
    json.dump(stats, open(stats_json, "w"))
Esempio n. 8
0
def main(args):
    bcf = ps.bcf(args.bcf)
    bcf.get_mean_genotype()
    bcf.get_genesum()
    geno_file = bcf.prefix + ".geno"
    genesum_file = bcf.prefix + ".genesum"
    meta = {}
    for s in bcf.samples:
        meta[s] = {}
    for row in csv.DictReader(open(args.csv)):
        for pheno in row.keys():
            if pheno == "id": continue
            if row['id'] not in meta: continue
            meta[row["id"]][pheno] = row[pheno]
    phenos = [x.rstrip() for x in open(args.phenos).readlines()]
    cmd_file = ps.get_random_file()
    X = open(cmd_file, "w")
    for pheno in phenos:
        pheno_file = "%s.pheno" % pheno
        if pheno not in row:
            ps.log("%s not in CSV file" % pheno, True)
        P = open(pheno_file, "w")
        P.write("\n".join([
            meta[s][pheno] if pheno in meta[s] else "NA" for s in bcf.samples
        ]))
        P.close()
        X.write(
            "gemma -p %s -g %s -gk 1 -o %s -maf 0.00005 -miss 0.99 && gemma  -lmm 1 -p %s -g %s  -k output/%s.cXX.txt  -o %s -maf 0.00005 -miss 0.99 && gemma  -lmm 1 -p %s -g %s  -k output/%s.cXX.txt  -o %s.genesum -notsnp\n"
            % (pheno_file, geno_file, pheno, pheno_file, geno_file, pheno,
               pheno, pheno_file, genesum_file, pheno, pheno))
    X.close()

    if args.preprocess:
        ps.log("Preprocessing finished\n", True)
    else:
        ps.run_cmd("cat %s | parallel -j %s" % (cmd_file, args.threads))
Esempio n. 9
0
def main(args):
    if not args.samples_file and not args.sample:
        ps.log("Provide either --sample or --samples_file... Exiting",
               ext=True)
    if args.samples_file and args.sample:
        ps.log(
            "Provide either --sample or --samples_file but not both... Exiting",
            ext=True)
    if args.cram and not args.reference:
        ps.log("Provide reference for cram compression... Exiting", ext=True)
    if args.sample:
        merge_sample(args.sample, args.cram, args.reference)
    else:
        for l in open(args.samples_file):
            merge_sample(l.rstrip(), args.cram, args.reference)
Esempio n. 10
0
#! /usr/bin/env python
import pathogenseq as ps
import sys

if len(sys.argv)!=5:
	ps.log("sambamba_depth.py <ref> <bam> <prefix> <threads>",True)


ref_file = sys.argv[1]
bam_file = sys.argv[2]
prefix = sys.argv[3]
threads = sys.argv[4]


bam = ps.bam(bam_file,prefix,ref_file,threads=threads)
out_file = "%s.sambamba.depth" % prefix
bam.sambamba_depth(out_file)