def process_sample(parse_functions, sample, samples, config, amplicon_list): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-s', '--samples_file', help="Input configuration file for samples") parser.add_argument('-c', '--configuration', help="Configuration file for various settings") parser.add_argument('-l', '--list', help="List file of SNPs to process") args = parser.parse_args() sys.stdout.write("Parsing configuration data\n") config = configuration.configure_runtime(args.configuration) sys.stdout.write("Parsing sample data\n") samples = configuration.configure_samples(args.samples_file, config) sample_cov_data = defaultdict(lambda: defaultdict()) for sample in samples: sys.stdout.write("Processing sample {}\n".format(sample)) sample_cov_data[sample] = process_sample(parse_functions, sample, samples, config, snps) sys.stdout.write("Writing out data\n") with open("glioma_snp_data.txt", 'wb') as out: out.write("SNP\tChr\tPos") for sample in samples: out.write("\t{} - AAF\t{} - Depth".format(sample, sample)) out.write("\n") for snp in snps: out.write("{}".format(snp)) for sample in samples: if sample_snp_data[sample][snp]: out.write("\t{}\t{}".format(sample_snp_data[sample][snp]['freq'], sample_snp_data[sample][snp]['depth'])) else: out.write("\t-\t-") out.write("\n")
parser.add_argument('-s', '--samples_file', help="Input configuration file for samples") parser.add_argument('-c', '--configuration', help="Configuration file for various settings") parser.add_argument('-a', '--address', help="IP Address for Cassandra connection", default='127.0.0.1') parser.add_argument('-u', '--username', help='Cassandra username for login', default=None) argcomplete.autocomplete(parser) Job.Runner.addToilOptions(parser) args = parser.parse_args() fractions = [50, 33, 25] sys.stdout.write("Parsing configuration data\n") config = configuration.configure_runtime(args.configuration) sys.stdout.write("Parsing sample data\n") samples = configuration.configure_samples(args.samples_file, config) # Workflow Graph definition. The following workflow definition should create a valid Directed Acyclic Graph (DAG) root_job = Job.wrapJobFn(pipeline.spawn_batch_jobs, cores=1) if args.username: password = getpass.getpass() auth_provider = PlainTextAuthProvider(username=args.username, password=password) else: auth_provider = None for sample in samples: for fraction in fractions: iteration = 0 while iteration < int(args.number): job = Job.wrapJobFn(subsample_bam, [args.address], "coveragestore", auth_provider, sample, samples,
from ddb import configuration if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help="Input config file for samples") parser.add_argument('-c', '--configuration', help="Configuration file for various settings") parser.add_argument('-o', '--output', help="Output file name for CSV file") args = parser.parse_args() sys.stdout.write("Parsing configuration data\n") config = configuration.configure_runtime(args.configuration) sys.stdout.write("Parsing sample data\n") samples = configuration.configure_samples(args.input, config) transcript_counts = defaultdict( lambda: defaultdict(lambda: defaultdict(int))) for sample in samples: sys.stderr.write("Processing sample {}\n".format(sample)) gtf_file = HTSeq.GFF_Reader(samples[sample]['gtf'], end_included=True) for feature in gtf_file: # sys.stderr.write("Processing entry: {}\n".format(feature)) if feature.type is 'transcript': transcript_counts[feature.attr['transcript_id']][sample][ 'FPKM'] = feature.attr['FPKM'] transcript_counts[feature.attr['transcript_id']][sample][ 'TPM'] = feature.attr['TPM']
parser = argparse.ArgumentParser() parser.add_argument('-s', '--samples_file', help="Input configuration file for samples") parser.add_argument('-c', '--configuration', help="Configuration file for various settings") Job.Runner.addToilOptions(parser) args = parser.parse_args() args.logLevel = "INFO" sys.stdout.write("Parsing configuration data\n") config = configuration.configure_runtime(args.configuration) sys.stdout.write("Parsing sample data\n") samples = configuration.configure_samples(args.samples_file, config) # Workflow Graph definition. The following workflow definition should create a valid Directed Acyclic Graph (DAG) root_job = Job.wrapJobFn(pipeline.spawn_batch_jobs, cores=1) transcripts_list = list() flags = ["keep_retained", "max_intron", "stranded"] # Per sample jobs for sample in samples: # Alignment and Refinement Stages align_job = Job.wrapJobFn(hisat.hisat_unpaired, config, sample, samples, flags, cores=int(config['hisat']['num_cores']),
if __name__ == "__main__": type = "colorectal" parser = argparse.ArgumentParser() parser.add_argument('-c', '--configuration', help="Configuration file for various settings") args = parser.parse_args() sys.stdout.write("Parsing configuration data\n") config = configuration.configure_runtime(args.configuration) # type_cases = defaultdict(defaultdict(list)) # counts = defaultdict(int) for root, dirs, files in os.walk("."): for samples_file in fnmatch.filter(files, "1*_M0373?.config"): sys.stderr.write("Reading file: {}\n".format( os.path.join(root, samples_file))) sys.stdout.write("Parsing sample data\n") libraries = configuration.configure_samples( os.path.join(root, samples_file), config) samples = configuration.merge_library_configs_samples(libraries) for sample in samples: for library in samples[sample]: if samples[sample][library]['report'].startswith(type): print "Colorectal case found: {}\n".format(sample) # sys.stderr.write("Type\tCount\n") # for report_type in counts: # sys.stdout.write("{}\t{}\n".format(report_type, counts[report_type]))
from collections import defaultdict from ddb import configuration if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help="Input config file for samples") parser.add_argument('-c', '--configuration', help="Configuration file for various settings") parser.add_argument('-o', '--output', help="Output file name for CSV file") args = parser.parse_args() sys.stdout.write("Parsing configuration data\n") config = configuration.configure_runtime(args.configuration) sys.stdout.write("Parsing sample data\n") samples = configuration.configure_samples(args.input, config) transcript_counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) for sample in samples: sys.stderr.write("Processing sample {}\n".format(sample)) gtf_file = HTSeq.GFF_Reader(samples[sample]['gtf'], end_included=True) for feature in gtf_file: # sys.stderr.write("Processing entry: {}\n".format(feature)) if feature.type is 'transcript': transcript_counts[feature.attr['transcript_id']][sample]['FPKM'] = feature.attr['FPKM'] transcript_counts[feature.attr['transcript_id']][sample]['TPM'] = feature.attr['TPM'] with open(args.output, 'w') as output: output.write("Transcript") for sample in samples: