Esempio n. 1
0
def parsingCLIP(args):
    """Parse the CLIP (BG) BAM files."""
    GeneAnnotation = gffutils.FeatureDB(args.db_file, keep_order=True)
    LoadReads.load_data(
        bam_files=args.clip_libs,
        genome_dir=args.genome_dir,
        gene_annotation=GeneAnnotation,
        out_file=args.out_file,
        Collapse=args.collapsed,
        mask_flank_variants=args.mask_flank_variants,
        max_mm=args.max_mm,
        ign_out_rds=args.ign_out_rds,
        rev_strand=args.rev_strand,
        CLIP_exp=True,
        min_coverage=int(args.min_coverage),
        min_peak=int(args.min_peak)
    )

    Sequences = LoadReads.get_data_handle(args.out_file, write=True)

    if args.mask_miRNA:
        print('Removing miRNA-coverage')
        ParsingPositions.mask_miRNA_positions(Sequences, GeneAnnotation)

    if args.mask_ovrlp:
        print('Masking overlapping positions')
        ParsingPositions.mask_overlapping_positions(Sequences, GeneAnnotation)
Esempio n. 2
0
def parsing_files(args, params):
    """Parse arguments that are function of the data files."""
    # Loading Sequence and Background
    Sequences = LoadReads.get_data_handle(params['dat_file_clip'], write=True)
    Background = LoadReads.get_data_handle(params['dat_file_bg'], write=True)

    # Estimate the library size
    params['LibrarySize'] = tools.estimate_library_size(Sequences)
    params['BckLibrarySize'] = tools.estimate_library_size(Background)

    # Estimate to number of replicates
    gene = list(Sequences.keys())[0]
    params['NrOfReplicates'] = len(Sequences[gene]['Coverage'])
    params['NrOfBckReplicates'] = len(Background[gene]['Coverage'])

    # Initialise the parameter vector alpha
    alphashape = (Sequences[gene]['Variants']['0']['shape'][0] +
                  Sequences[gene]['Coverage']['0'][()].shape[0] +
                  Sequences[gene]['Read-ends']['0'][()].shape[0])
    for state in range(params['NrOfStates']):
        params['Diag_event_params']['alpha'][state] = np.random.uniform(
            0.9, 1.1, size=(alphashape, args.nr_mix_comp))

    return params