Example #1
0
def preprocess_reads(args):
    """
	Preprocesses fastq files by removing UMIs from reads and appending
	them to the read names.
	"""

    if args.config:
        config = configparser.ConfigParser()
        config.read(args.config)
        #config.read('../config/demo_config.ini')
        config_validation(conf_paths=dict(
            config.items('PATHS')))  ##Check whether PATHS in config file exist
    else:
        config = None

    prepfile = handle_arg(args.prepfile,
                          config['PATHS']['prep_file'] if config else None,
                          'ERR: No prepfile provided in args or config.')
    output_path = handle_arg(
        args.output_path, config['PATHS']['output_path'] if config else None,
        'ERR: No output path provided in args or config.')

    arg_exists(sys.argv)  ##Check whether args directories/files exist

    reheader_fastqs(r1_file=args.read1,
                    r2_file=args.read2,
                    r3_file=args.read3,
                    output_path=output_path,
                    prepname=args.prepname,
                    prepfile=prepfile)
Example #2
0
def run_scripts(args):

    bamfile = args.bam_file
    bedfile = args.bed_file
    dir = args.output_path
    id = str(args.run_id)
    output_dir = dir + id + "/"

    if args.config:
        config_path = args.config
        config = configparser.ConfigParser()
        config.read(args.config)
        config_validation(conf_paths=dict(
            config.items('PATHS')))  ##Check whether PATHS in config file exist
    else:
        config_path = None
        config = None

    arg_exists(sys.argv)  ##Check whether args directories/files exist

    #Make directories
    if not os.path.exists(output_dir + "umifiles"):
        os.makedirs(output_dir + "umifiles")
    if not os.path.exists(output_dir + "consfiles"):
        os.makedirs(output_dir + "consfiles")
    if not os.path.exists(output_dir + "vcffiles"):
        os.makedirs(output_dir + "vcffiles")

    debarcer_path = os.getcwd() + "/"

    #Read bedfile
    with open(bedfile) as textFile:
        lines = [line.split() for line in textFile]
    index = find_pos(lines)

    #Create and run scripts for all subprocesses
    submit_jobs(bamfile, bedfile, output_dir, config_path, index,
                debarcer_path)

    #Check UMI job status before merging files
    print("Checking UMI job status...")
    umi_job_flag = False
    while umi_job_flag == False:
        umi_job_flag = check_job_status(output_dir,
                                        flag='umi',
                                        file='temp_umi_jobs.txt')

    print("Merging UMI datafiles...")
    merge_umi_datafiles(output_dir, id)

    print("Checking CONS job status...")
    cons_job_flag = False
    while cons_job_flag == False:
        cons_job_flag = check_job_status(output_dir,
                                         flag='cons',
                                         file='temp_cons_jobs.txt')

    print("Merging cons...")
    concat_cons(output_dir, config, id)
    print("Finished. Output written to: " + output_dir)
Example #3
0
def collapse(args):
    """Base collapses from given BAM and umi family file."""

    if args.config:
        config = configparser.ConfigParser()
        config.read(args.config)
        config_validation(conf_paths=dict(
            config.items('PATHS')))  ##Check whether PATHS in config file exist
    else:
        config = None

    region = args.region

    if any(item not in region for item in ["chr", ":", "-"]):
        raise ValueError(
            'ERR: Incorrect region string (should look like chr1:1200000-1250000).'
        )
        sys.exit(1)

    contig = region.split(":")[0]
    region_start = int(region.split(":")[1].split("-")[0])
    region_end = int(region.split(":")[1].split("-")[1])

    bam_file = handle_arg(args.bam_file,
                          config['PATHS']['bam_file'] if config else None,
                          'ERR: No BAM file provided in args or config.')
    output_path = handle_arg(
        args.output_path, config['PATHS']['output_path'] if config else None,
        'ERR: No output path provided in args or config.')

    arg_exists(sys.argv)  ##Check whether args directories/files exist

    if args.umi_file:
        umi_file = args.umi_file
    elif config:
        umi_file = config['PATHS']['umi_file'] if 'umi_file' in config[
            'PATHS'] else None

    if umi_file:
        try:
            umi_table = pickle.load(open(umi_file, "rb"))
        except IOError:
            print("ERR: Unable to load .umis file.", file=sys.stderr)
            sys.exit(1)
    else:
        umi_table = None

    print(timestamp() + "Generating consensus...")

    generate_consensus_output(contig=contig,
                              region_start=region_start,
                              region_end=region_end,
                              bam_file=bam_file,
                              umi_table=umi_table,
                              output_path=output_path,
                              config=config)

    print(timestamp() +
          "Consensus generated. Consensus file written to {}.".format(
              output_path))
Example #4
0
def call_variants(args):
    """Generates VCF files from given cons file."""

    if args.config:
        config = configparser.ConfigParser()
        config.read(args.config)
        config_validation(conf_paths=dict(
            config.items('PATHS')))  ##Check whether PATHS in config file exist
    else:
        config = None

    cons_file = args.cons_file
    f_sizes = args.f_sizes.split(',')

    output_path = handle_arg(
        args.output_path, config['PATHS']['output_path'] if config else None,
        'No output path provided in args or config.')

    region = args.region
    arg_exists(sys.argv)  ##Check whether args directories/files exist

    cons_is_merged = check_consfile(cons_file)

    if cons_is_merged:
        region_start = region.split("_")[0]
        region_end = region.split("_")[1]

    else:
        if any(x not in region for x in ["chr", ":", "-"]):
            raise ValueError(
                'Incorrect region string (should look like chr1:1200000-1250000).'
            )
            sys.exit(1)

        contig = region.split(":")[0]
        region_start = int(region.split(":")[1].split("-")[0])
        region_end = int(region.split(":")[1].split("-")[1])

    print(timestamp() + "Generating VCFs...")

    get_vcf_output(cons_file=cons_file,
                   region_start=region_start,
                   region_end=region_end,
                   output_path=output_path,
                   config=config,
                   run_id=run_id)

    print(timestamp() +
          "VCFs generated. VCF files written to {}.".format(output_path))
Example #5
0
def group_umis(args):
    """Groups and error-corrects UMIs into families."""

    if args.config:
        config = configparser.ConfigParser()
        config.read(args.config)
        config_validation(conf_paths=dict(
            config.items('PATHS')))  ##Check whether PATHS in config file exist
    else:
        config = None

    region = args.region
    if any(item not in region for item in ["chr", ":", "-"]):
        raise ValueError(
            'ERR: Incorrect region string (should look like chr1:1200000-1250000).'
        )
        sys.exit(1)

    contig = region.split(":")[0]
    region_start = int(region.split(":")[1].split("-")[0])
    region_end = int(region.split(":")[1].split("-")[1])

    bam_file = handle_arg(args.bam_file,
                          config['PATHS']['bam_file'] if config else None,
                          'ERR: No BAM file provided in args or config.')
    output_path = handle_arg(
        args.output_path, config['PATHS']['output_path'] if config else None,
        'ERR: No output path provided in args or config.')

    arg_exists(sys.argv)  ##Check whether args directories/files exist

    print(timestamp() + "Grouping UMIs...")

    ## Generate an error-corrected list of UMI families
    umi_families, umi_groups = get_umi_families(contig=contig,
                                                region_start=region_start,
                                                region_end=region_end,
                                                bam_file=bam_file,
                                                config=config)

    total_parent_umi_count, total_child_umi_count, num_of_children, freq_of_parent_umis = umi_datafile(
        umi_groups)

    filename = "{}/datafile_{}.csv".format(output_path, region)
    headers = [
        'CHR', 'START', 'END', 'PTU', 'CTU', 'CHILD_NUMS', 'FREQ_PARENTS'
    ]
    csv.register_dialect('myDialect', delimiter='\t', quoting=csv.QUOTE_NONE)
    csvrow = {
        'CHR': contig,
        'START': str(region_start),
        'END': str(region_end),
        'PTU': str(total_parent_umi_count),
        'CTU': str(total_child_umi_count),
        'CHILD_NUMS': num_of_children,
        'FREQ_PARENTS': freq_of_parent_umis
    }
    info = [
        contig, region_start, region_end, total_parent_umi_count,
        total_child_umi_count, num_of_children, freq_of_parent_umis
    ]

    file = open(filename, "w")
    writer = csv.DictWriter(file, dialect='myDialect', fieldnames=headers)
    writer.writeheader()
    writer.writerow(csvrow)

    umi_file = "{}/{}.umis".format(output_path, region)
    pickle.dump(umi_families, open(umi_file, "wb"))

    print(timestamp() +
          "UMI grouping complete. Output written to {}.".format(output_path))