def align_misasm_broken(out_prefix): current_path = os.getcwd() os.chdir('ctg_alignments') ctgs_file = out_prefix + ".misasm.break.fa" cmd = '{} -k19 -w19 -t{} ../../{} {} ' \ '> contigs_brk_against_ref.paf 2> contigs_brk_against_ref.paf.log'.format(minimap_path, t, reference_file, ctgs_file) if not os.path.isfile('contigs_brk_against_ref.paf'): run(cmd) os.chdir(current_path)
def align_pms(m_path, num_threads, in_reference_file): current_path = os.getcwd() output_path = current_path + '/pm_alignments' if not os.path.exists(output_path): os.makedirs(output_path) os.chdir('pm_alignments') cmd = '{} -ax asm5 --cs -t{} ../../{} {} ' \ '> pm_against_ref.sam 2> pm_contigs_against_ref.sam.log'.format(m_path, num_threads, in_reference_file, '../ragoo.fasta') if not os.path.isfile('pm_against_ref.sam'): run(cmd) os.chdir(current_path)
def align_breaks(break_type, m_path, in_reference_file, in_contigs_file, in_num_threads): current_path = os.getcwd() os.chdir('chimera_break') if break_type == 'inter': cmd = '{} -k19 -w19 -t{} ../../{} {} ' \ '> inter_contigs_against_ref.paf 2> inter_contigs_against_ref.paf.log'.format(m_path, in_num_threads, in_reference_file, in_contigs_file) if not os.path.isfile('inter_contigs_against_ref.paf'): run(cmd) else: cmd = '{} -k19 -w19 -t{} ../../{} {} ' \ '> intra_contigs_against_ref.paf 2> intra_contigs_against_ref.paf.log'.format(m_path, in_num_threads, in_reference_file, in_contigs_file) if not os.path.isfile('intra_contigs_against_ref.paf'): run(cmd) os.chdir(current_path)
def align_pms(m_path, num_threads, in_reference_file, args): current_path = os.getcwd() output_path = os.path.join(current_path, 'pm_alignments') if not os.path.exists(output_path): os.makedirs(output_path) assert os.path.exists("ragoo.fasta") query = os.path.abspath("ragoo.fasta") os.chdir('pm_alignments') cmd = '{} -ax asm5 --cs -t{} -I {} {} {} ' \ '> pm_against_ref.sam 2> pm_contigs_against_ref.sam.log'.format(m_path, num_threads, args.I, os.path.relpath(in_reference_file), os.path.relpath(query)) if not os.path.isfile('pm_against_ref.sam'): run(cmd) os.chdir(current_path)
def get_SVs(sv_min, sv_max, in_ref_file): current_path = os.getcwd() os.chdir('pm_alignments') # Change this when setup.py is ready. Just call script directly cmd = 'sam2delta.py pm_against_ref.sam' if not os.path.isfile('pm_against_ref.sam.delta'): run(cmd) cmd_2 = 'Assemblytics_uniq_anchor.py --delta pm_against_ref.sam.delta --unique-length 10000 --out assemblytics_out --keep-small-uniques' if not os.path.isfile( 'assemblytics_out.Assemblytics.unique_length_filtered_l10000.delta' ): run(cmd_2) cmd_3 = 'Assemblytics_between_alignments.pl assemblytics_out.coords.tab %r %r all-chromosomes exclude-longrange bed > assemblytics_out.variants_between_alignments.bed' % ( sv_min, sv_max) if not os.path.isfile('assemblytics_out.variants_between_alignments.bed'): run(cmd_3) cmd_4 = 'Assemblytics_within_alignment.py --delta assemblytics_out.Assemblytics.unique_length_filtered_l10000.delta --min %r > assemblytics_out.variants_within_alignments.bed' % ( sv_min) if not os.path.isfile('assemblytics_out.variants_within_alignments.bed'): run(cmd_4) header = "reference\tref_start\tref_stop\tID\tsize\tstrand\ttype\tref_gap_size\tquery_gap_size\tquery_coordinates\tmethod\n" with open('assemblytics_out.variants_between_alignments.bed', 'r') as f1: b1 = f1.read() with open('assemblytics_out.variants_within_alignments.bed', 'r') as f2: b2 = f2.read() with open('assemblytics_out.Assemblytics_structural_variants.bed', 'w') as f: f.write(header) # Might need to add newlines here f.write(b1) f.write(b2) # Filter out SVs caused by gaps cmd_5 = 'filter_gap_SVs.py %s' % (in_ref_file) run(cmd_5) os.chdir(current_path)
def align_reads(m_path, num_threads, in_ctg_file, reads, tech='ont'): current_path = os.getcwd() output_path = current_path + '/ctg_alignments' if not os.path.exists(output_path): os.makedirs(output_path) os.chdir('ctg_alignments') if tech == 'sr': cmd = '{} -x sr -t{} ../../{} ../../{} ' \ '> reads_against_ctg.paf 2> reads_against_ctg.paf.log'.format(m_path, num_threads, in_ctg_file, reads) elif tech == 'corr': cmd = '{} -x asm10 -t{} ../../{} ../../{} ' \ '> reads_against_ctg.paf 2> reads_against_ctg.paf.log'.format(m_path, num_threads, in_ctg_file, reads) else: raise ValueError("Only 'sr' or 'corr' are accepted for read type.") if not os.path.isfile('reads_against_ctg.paf'): run(cmd) os.chdir(current_path)
with open(skip_file) as f: for line in f: skip_ctg.append(line.rstrip()) current_path = os.getcwd() output_path = current_path + '/ragoo_output' if not os.path.exists(output_path): os.makedirs(output_path) os.chdir(output_path) # Run minimap2 cmd = '{} -k19 -w19 -t{} ../{} ../{} ' \ '> contigs_against_ref.paf 2> contigs_against_ref.paf.log'.format(minimap_path, t, reference_file, contigs_file) if not os.path.isfile('contigs_against_ref.paf'): run(cmd) # Read in the minimap2 alignments just generated log('-- Reading alignments') alns = read_paf_alignments('contigs_against_ref.paf') alns = clean_alignments(alns, l=1000, in_exclude_file=exclude_file) # Process the gff file if gff_file: log('-- Getting gff features') features = defaultdict(list) z = GFFReader('../' + gff_file) for i in z.parse_gff(): features[i.seqname].append(i) # Break chimeras if desired