Exemple #1
0
def main():
    """
    Collapse beta file to blocks binary file, of the same beta format
    """

    args = parse_args()
    files = args.input_files
    validate_file_list(files, '.beta')
    validate_out_dir(args.out_dir)

    if not args.force:
        files = filter_existing_files(files, args.out_dir, args.lbeta)

    # load blocks:
    # b2b_log('load blocks...')
    df = load_blocks_file(args.blocks_file)
    is_nice, msg = is_block_file_nice(df)
    if not is_nice:
        b2b_log(msg)
    p = Pool(args.threads)
    params = [(b, df, is_nice, args.lbeta, args.out_dir, args.bedGraph)
              for b in files]
    arr = p.starmap(collapse_process, params)
    p.close()
    p.join()
Exemple #2
0
def main():
    """
    Generage homog files. Given a blocks file and pat[s],
    count the number of U,X,M reads for each block for each file
    """

    args = parse_args()
    if args.nr_bits not in (8 , 16):
        raise IllegalArgumentError('nr_bits must be in {8, 16}')
    if args.rlen < 3:
        raise IllegalArgumentError('rlen must be >= 3')
    if args.thresholds is not None:
        th = args.thresholds.split(',')
        if not len(th) == 2: # and th[0].is_number():
            raise IllegalArgumentError('Invalid thresholds')
        th = float(th[0]), float(th[1])
        if not (1 > th[1] > th[0] > 0):
            raise IllegalArgumentError('Invalid thresholds')
    # make sure homog tool is valid:
    validate_local_exe(homog_tool)

    pats = args.input_files
    validate_file_list(pats, '.pat.gz')

    outdir, prefix = parse_outdir_prefix(args)

    # load blocks:
    blocks_df = load_blocks_file(args.blocks_file)
    is_nice, msg = is_block_file_nice(blocks_df)
    if not is_nice:
        homog_log(msg)
        raise IllegalArgumentError(f'Invalid blocks file: {args.blocks_file}')

    for pat in sorted(pats):
        homog_process(pat, blocks_df, args, outdir, prefix)
Exemple #3
0
def main():
    """
    Plot histogram of reads lengths (in sites) of pat file
    Output to stdout the histogram values if requested
    """
    args = parse_args()
    validate_file_list(args.pat_paths, '.pat.gz')
    multi_FragLen(args)
Exemple #4
0
def main():
    """
    Convert bed[.gz] file[s] to beta file[s].
    bed file should be of the format (tab-separated):
    chr    start    end    #meth    #total
    """
    args = parse_args()
    validate_file_list(args.bed_paths)
    bed2betas(args)
Exemple #5
0
def main():
    """
    Mix samples from K different pat files.
    Output a single mixed pat.gz[.csi] file - sorted, bgzipped and indexed -
    with an informative name.
    """
    args = parse_args()
    validate_file_list(args.pat_files, 'pat.gz', 2)
    mult_mix(args)
    return
Exemple #6
0
def main():
    """
    Compare between pairs of beta files, by plotting a 2d histogram
    for every pair.
    Drop sites with low coverage (< cov_thresh argument),
    for performance and robustness.
    """
    args = parse_args()
    validate_file_list(args.betas, '.beta', min_len=2)
    compare_all_paires(args)
Exemple #7
0
def main(args):
    validate_file_list(args.input_files, '.pat.gz')

    # drop duplicated files, while keeping original order
    input_files = drop_dup_keep_order(args.input_files)

    gr = GenomicRegion(args)
    print(gr)
    for pat_file in input_files:
        print(splitextgz(op.basename(pat_file))[0])  # print file name
        PatVis(args, pat_file).print_results()
Exemple #8
0
def main():
    """
    Convert beta file[s] to Illumina-450K format.
    Output: a csv file with ~480K rows, for the ~480K Illumina sites,
            and with columns corresponding to the beta files.
            all values are in range [0, 1], or NA.
            Only works for hg19.
    """
    args = parse_args()
    validate_file_list(args.input_files, '.beta')
    betas2csv(args)
Exemple #9
0
    def validate_args(self):

        # validate integers
        if self.min_cpg < 0:
            raise IllegalArgumentError('min_cpg must be non negative')
        if self.max_cpg < 1:
            raise IllegalArgumentError('max_cpg must larger than 0')
        if self.min_bp < 0:
            raise IllegalArgumentError('min_bp must be non negative')
        if self.max_bp < 2:
            raise IllegalArgumentError('max_bp must larger than 1')
        if self.chunk_size < 1:
            raise IllegalArgumentError('chunk_size must larger than 1')

        # validate the [0.0, 1.0] fractions
        for key in ('na_rate_tg', 'na_rate_bg', 'delta', 'tg_quant', \
                    'bg_quant', 'unmeth_thresh', 'meth_thresh', \
                    'unmeth_mean_thresh', 'meth_mean_thresh'):
            if not (1.0 >= getattr(self, key) >= 0):
                eprint(
                    f'Invalid value for {key} ({val}): must be in ({low}, {high})'
                )
                raise IllegalArgumentError()

        # validate hyper hypo:
        if self.only_hyper and self.only_hypo:
            eprint(f'at most one of (only_hyper, only_hypo) can be specified')
            raise IllegalArgumentError()

        # validate input files
        for key in ('blocks_path', 'groups_file'):
            val = getattr(self, key)
            if val is None:
                eprint(f'[wt fm] missing required parameter: {key}')
                raise IllegalArgumentError()
            validate_single_file(val)
            # change path to absolute path
            setattr(self, key, op.abspath(val))

        # validate betas
        if (self.betas is None and self.beta_list_file is None) or \
           (self.betas is not None and self.beta_list_file is not None):
            eprint(
                f'[wt fm] Exactly one of the following must be specified: betas, beta_list_file'
            )
            raise IllegalArgumentError()

        if self.beta_list_file:
            validate_single_file(self.beta_list_file)
            with open(self.beta_list_file, 'r') as f:
                self.betas = [l.strip() for l in f.readlines()]
        validate_file_list(self.betas)
Exemple #10
0
def main():
    """
    Convert beta file[s] to bigwig file[s].
    Assuming bedGraphToBigWig is installed and in PATH
    """
    args = parse_args()
    validate_file_list(args.beta_paths, '.beta')
    if not check_executable('bedGraphToBigWig', verbose=True):
        return

    b = BetaToBigWig(args)
    for beta in args.beta_paths:
        b.run_beta_to_bw(beta)
Exemple #11
0
def groups_load_wrap(groups_file, betas):
    if groups_file is not None:
        validate_single_file(groups_file)
        validate_file_list(betas)
        gf = load_gfile_helper(groups_file)
    else:
        # otherwise, generate dummy group file for all binary files in input_dir
        # first drop duplicated files, while keeping original order
        betas = drop_dup_keep_order(betas.copy())
        fnames = [op.splitext(op.basename(b))[0] for b in betas]
        gf = pd.DataFrame(columns=['fname'], data=fnames)
        gf['group'] = gf['fname']
    gf['full_path'] = match_prefix_to_bin(gf['fname'], betas, '.beta')
    return gf
Exemple #12
0
def merge_betas(betas, opath):
    """
    Merge all betas by summing their values element-wise, while keeping the dimensions
    :param betas: list of beta files
    :param opath: merged beta file
    """
    validate_file_list(betas, force_suff='.beta')
    data = load_beta_data(betas[0]).astype(np.int)
    for b in betas[1:]:
        data += load_beta_data(b)

    # Trim / normalize to range [0, 256)
    data = trim_to_uint8(data)
    # Dump
    data.tofile(opath)
    return data
Exemple #13
0
def parse_betas_input(args):
    """
    parse user input to get the list of beta files to segment
    Either args.betas is a list of beta files,
    or args.beta_file is a text file in which each line is a beta file
    return: list of beta files
    """
    if args.betas:
        betas = args.betas
    elif args.beta_file:
        validate_single_file(args.beta_file)
        with open(args.beta_file, 'r') as f:
            betas = [
                b.strip() for b in f.readlines()
                if b.strip() and not b.startswith('#')
            ]
        if not betas:
            raise IllegalArgumentError(
                f'no beta files found in file {args.beta_file}')
    validate_file_list(betas)
    return betas
Exemple #14
0
 def __init__(self, pats, outpath, labels, args):
     self.args = args
     self.pats = pats
     validate_file_list(self.pats, force_suff='.pat.gz')
     self.outpath = outpath
     self.labels = labels
Exemple #15
0
def main(args):
    validate_file_list(args.input_files)  #, '.beta')
    try:
        BetaVis(args)
    except BrokenPipeError:
        catch_BrokenPipeError()
Exemple #16
0
def load_group_file(groups_file, betas):
    validate_single_file(groups_file)
    validate_file_list(betas)
    gf = load_gfile_helper(groups_file)
    gf['full_path'] = match_prefix_to_bin(gf['fname'], betas, '.beta')
    return gf