Beispiel #1
0
        log('-- Getting gff features')
        features = defaultdict(list)
        z = GFFReader('../' + gff_file)
        for i in z.parse_gff():
            features[i.seqname].append(i)

    # Break chimeras if desired
    if break_chimeras:
        # Record how many contigs are broken
        total_inter_broken = 0
        total_intra_broken = 0

        alns = clean_alignments(alns, l=10000, in_exclude_file=exclude_file, uniq_anchor_filter=True)
        # Process contigs
        log('-- Getting contigs')
        contigs_dict = read_contigs('../' + contigs_file)

        log('-- Finding interchromosomally chimeric contigs')
        all_chimeras = dict()
        for i in alns.keys():
            ref_parts = get_ref_parts(alns[i], min_len, min_break_pct, min_range)
            if len(ref_parts) > 1:
                all_chimeras[i] = ref_parts

        log('-- Finding break points and breaking interchromosomally chimeric contigs')
        break_intervals = dict()
        for i in all_chimeras.keys():
            break_intervals[i] = cluster_contig_alns(i, alns, all_chimeras[i], min_len)

            # If its just going to break it into the same thing, skip it.
            if len(break_intervals[i]) <= 1:
Beispiel #2
0
        for i in z.parse_gff():
            features[i.seqname].append(i)

    # Break chimeras if desired
    if break_chimeras:
        # Record how many contigs are broken
        total_inter_broken = 0
        total_intra_broken = 0

        alns = clean_alignments(alns,
                                l=10000,
                                in_exclude_file=exclude_file,
                                uniq_anchor_filter=True)
        # Process contigs
        log('Getting contigs')
        contigs_dict = read_contigs(contigs_file)

        log('Finding interchromosomally chimeric contigs')
        all_chimeras = dict()
        for i in alns.keys():
            ref_parts = get_ref_parts(alns[i], min_len, min_break_pct,
                                      min_range)
            if len(ref_parts) > 1:
                all_chimeras[i] = ref_parts

        log('Finding break points and breaking interchromosomally chimeric contigs'
            )
        break_intervals = dict()
        for i in all_chimeras.keys():
            break_intervals[i] = cluster_contig_alns(i, alns, all_chimeras[i],
                                                     min_len)