def preprocess_reads(args): """ Preprocesses fastq files by removing UMIs from reads and appending them to the read names. """ if args.config: config = configparser.ConfigParser() config.read(args.config) #config.read('../config/demo_config.ini') config_validation(conf_paths=dict( config.items('PATHS'))) ##Check whether PATHS in config file exist else: config = None prepfile = handle_arg(args.prepfile, config['PATHS']['prep_file'] if config else None, 'ERR: No prepfile provided in args or config.') output_path = handle_arg( args.output_path, config['PATHS']['output_path'] if config else None, 'ERR: No output path provided in args or config.') arg_exists(sys.argv) ##Check whether args directories/files exist reheader_fastqs(r1_file=args.read1, r2_file=args.read2, r3_file=args.read3, output_path=output_path, prepname=args.prepname, prepfile=prepfile)
def run_scripts(args): bamfile = args.bam_file bedfile = args.bed_file dir = args.output_path id = str(args.run_id) output_dir = dir + id + "/" if args.config: config_path = args.config config = configparser.ConfigParser() config.read(args.config) config_validation(conf_paths=dict( config.items('PATHS'))) ##Check whether PATHS in config file exist else: config_path = None config = None arg_exists(sys.argv) ##Check whether args directories/files exist #Make directories if not os.path.exists(output_dir + "umifiles"): os.makedirs(output_dir + "umifiles") if not os.path.exists(output_dir + "consfiles"): os.makedirs(output_dir + "consfiles") if not os.path.exists(output_dir + "vcffiles"): os.makedirs(output_dir + "vcffiles") debarcer_path = os.getcwd() + "/" #Read bedfile with open(bedfile) as textFile: lines = [line.split() for line in textFile] index = find_pos(lines) #Create and run scripts for all subprocesses submit_jobs(bamfile, bedfile, output_dir, config_path, index, debarcer_path) #Check UMI job status before merging files print("Checking UMI job status...") umi_job_flag = False while umi_job_flag == False: umi_job_flag = check_job_status(output_dir, flag='umi', file='temp_umi_jobs.txt') print("Merging UMI datafiles...") merge_umi_datafiles(output_dir, id) print("Checking CONS job status...") cons_job_flag = False while cons_job_flag == False: cons_job_flag = check_job_status(output_dir, flag='cons', file='temp_cons_jobs.txt') print("Merging cons...") concat_cons(output_dir, config, id) print("Finished. Output written to: " + output_dir)
def collapse(args): """Base collapses from given BAM and umi family file.""" if args.config: config = configparser.ConfigParser() config.read(args.config) config_validation(conf_paths=dict( config.items('PATHS'))) ##Check whether PATHS in config file exist else: config = None region = args.region if any(item not in region for item in ["chr", ":", "-"]): raise ValueError( 'ERR: Incorrect region string (should look like chr1:1200000-1250000).' ) sys.exit(1) contig = region.split(":")[0] region_start = int(region.split(":")[1].split("-")[0]) region_end = int(region.split(":")[1].split("-")[1]) bam_file = handle_arg(args.bam_file, config['PATHS']['bam_file'] if config else None, 'ERR: No BAM file provided in args or config.') output_path = handle_arg( args.output_path, config['PATHS']['output_path'] if config else None, 'ERR: No output path provided in args or config.') arg_exists(sys.argv) ##Check whether args directories/files exist if args.umi_file: umi_file = args.umi_file elif config: umi_file = config['PATHS']['umi_file'] if 'umi_file' in config[ 'PATHS'] else None if umi_file: try: umi_table = pickle.load(open(umi_file, "rb")) except IOError: print("ERR: Unable to load .umis file.", file=sys.stderr) sys.exit(1) else: umi_table = None print(timestamp() + "Generating consensus...") generate_consensus_output(contig=contig, region_start=region_start, region_end=region_end, bam_file=bam_file, umi_table=umi_table, output_path=output_path, config=config) print(timestamp() + "Consensus generated. Consensus file written to {}.".format( output_path))
def call_variants(args): """Generates VCF files from given cons file.""" if args.config: config = configparser.ConfigParser() config.read(args.config) config_validation(conf_paths=dict( config.items('PATHS'))) ##Check whether PATHS in config file exist else: config = None cons_file = args.cons_file f_sizes = args.f_sizes.split(',') output_path = handle_arg( args.output_path, config['PATHS']['output_path'] if config else None, 'No output path provided in args or config.') region = args.region arg_exists(sys.argv) ##Check whether args directories/files exist cons_is_merged = check_consfile(cons_file) if cons_is_merged: region_start = region.split("_")[0] region_end = region.split("_")[1] else: if any(x not in region for x in ["chr", ":", "-"]): raise ValueError( 'Incorrect region string (should look like chr1:1200000-1250000).' ) sys.exit(1) contig = region.split(":")[0] region_start = int(region.split(":")[1].split("-")[0]) region_end = int(region.split(":")[1].split("-")[1]) print(timestamp() + "Generating VCFs...") get_vcf_output(cons_file=cons_file, region_start=region_start, region_end=region_end, output_path=output_path, config=config, run_id=run_id) print(timestamp() + "VCFs generated. VCF files written to {}.".format(output_path))
def group_umis(args): """Groups and error-corrects UMIs into families.""" if args.config: config = configparser.ConfigParser() config.read(args.config) config_validation(conf_paths=dict( config.items('PATHS'))) ##Check whether PATHS in config file exist else: config = None region = args.region if any(item not in region for item in ["chr", ":", "-"]): raise ValueError( 'ERR: Incorrect region string (should look like chr1:1200000-1250000).' ) sys.exit(1) contig = region.split(":")[0] region_start = int(region.split(":")[1].split("-")[0]) region_end = int(region.split(":")[1].split("-")[1]) bam_file = handle_arg(args.bam_file, config['PATHS']['bam_file'] if config else None, 'ERR: No BAM file provided in args or config.') output_path = handle_arg( args.output_path, config['PATHS']['output_path'] if config else None, 'ERR: No output path provided in args or config.') arg_exists(sys.argv) ##Check whether args directories/files exist print(timestamp() + "Grouping UMIs...") ## Generate an error-corrected list of UMI families umi_families, umi_groups = get_umi_families(contig=contig, region_start=region_start, region_end=region_end, bam_file=bam_file, config=config) total_parent_umi_count, total_child_umi_count, num_of_children, freq_of_parent_umis = umi_datafile( umi_groups) filename = "{}/datafile_{}.csv".format(output_path, region) headers = [ 'CHR', 'START', 'END', 'PTU', 'CTU', 'CHILD_NUMS', 'FREQ_PARENTS' ] csv.register_dialect('myDialect', delimiter='\t', quoting=csv.QUOTE_NONE) csvrow = { 'CHR': contig, 'START': str(region_start), 'END': str(region_end), 'PTU': str(total_parent_umi_count), 'CTU': str(total_child_umi_count), 'CHILD_NUMS': num_of_children, 'FREQ_PARENTS': freq_of_parent_umis } info = [ contig, region_start, region_end, total_parent_umi_count, total_child_umi_count, num_of_children, freq_of_parent_umis ] file = open(filename, "w") writer = csv.DictWriter(file, dialect='myDialect', fieldnames=headers) writer.writeheader() writer.writerow(csvrow) umi_file = "{}/{}.umis".format(output_path, region) pickle.dump(umi_families, open(umi_file, "wb")) print(timestamp() + "UMI grouping complete. Output written to {}.".format(output_path))