log('-- Getting gff features') features = defaultdict(list) z = GFFReader('../' + gff_file) for i in z.parse_gff(): features[i.seqname].append(i) # Break chimeras if desired if break_chimeras: # Record how many contigs are broken total_inter_broken = 0 total_intra_broken = 0 alns = clean_alignments(alns, l=10000, in_exclude_file=exclude_file, uniq_anchor_filter=True) # Process contigs log('-- Getting contigs') contigs_dict = read_contigs('../' + contigs_file) log('-- Finding interchromosomally chimeric contigs') all_chimeras = dict() for i in alns.keys(): ref_parts = get_ref_parts(alns[i], min_len, min_break_pct, min_range) if len(ref_parts) > 1: all_chimeras[i] = ref_parts log('-- Finding break points and breaking interchromosomally chimeric contigs') break_intervals = dict() for i in all_chimeras.keys(): break_intervals[i] = cluster_contig_alns(i, alns, all_chimeras[i], min_len) # If its just going to break it into the same thing, skip it. if len(break_intervals[i]) <= 1:
for i in z.parse_gff(): features[i.seqname].append(i) # Break chimeras if desired if break_chimeras: # Record how many contigs are broken total_inter_broken = 0 total_intra_broken = 0 alns = clean_alignments(alns, l=10000, in_exclude_file=exclude_file, uniq_anchor_filter=True) # Process contigs log('Getting contigs') contigs_dict = read_contigs(contigs_file) log('Finding interchromosomally chimeric contigs') all_chimeras = dict() for i in alns.keys(): ref_parts = get_ref_parts(alns[i], min_len, min_break_pct, min_range) if len(ref_parts) > 1: all_chimeras[i] = ref_parts log('Finding break points and breaking interchromosomally chimeric contigs' ) break_intervals = dict() for i in all_chimeras.keys(): break_intervals[i] = cluster_contig_alns(i, alns, all_chimeras[i], min_len)