Ejemplo n.º 1
0
def align_misasm_broken(out_prefix):
    current_path = os.getcwd()
    os.chdir('ctg_alignments')

    ctgs_file = out_prefix + ".misasm.break.fa"
    cmd = '{} -k19 -w19 -t{} ../../{}  {} ' \
          '> contigs_brk_against_ref.paf 2> contigs_brk_against_ref.paf.log'.format(minimap_path, t, reference_file,
                                                                            ctgs_file)
    if not os.path.isfile('contigs_brk_against_ref.paf'):
        run(cmd)
    os.chdir(current_path)
Ejemplo n.º 2
0
def align_pms(m_path, num_threads, in_reference_file):
    current_path = os.getcwd()
    output_path = current_path + '/pm_alignments'
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    os.chdir('pm_alignments')

    cmd = '{} -ax asm5 --cs -t{} ../../{} {} ' \
          '> pm_against_ref.sam 2> pm_contigs_against_ref.sam.log'.format(m_path, num_threads,
                                                                                        in_reference_file, '../ragoo.fasta')
    if not os.path.isfile('pm_against_ref.sam'):
        run(cmd)

    os.chdir(current_path)
Ejemplo n.º 3
0
def align_breaks(break_type, m_path, in_reference_file, in_contigs_file, in_num_threads):
    current_path = os.getcwd()
    os.chdir('chimera_break')
    if break_type == 'inter':
        cmd = '{} -k19 -w19 -t{} ../../{} {} ' \
          '> inter_contigs_against_ref.paf 2> inter_contigs_against_ref.paf.log'.format(m_path, in_num_threads, in_reference_file, in_contigs_file)
        if not os.path.isfile('inter_contigs_against_ref.paf'):
            run(cmd)
    else:
        cmd = '{} -k19 -w19 -t{} ../../{} {} ' \
              '> intra_contigs_against_ref.paf 2> intra_contigs_against_ref.paf.log'.format(m_path, in_num_threads, in_reference_file, in_contigs_file)
        if not os.path.isfile('intra_contigs_against_ref.paf'):
            run(cmd)

    os.chdir(current_path)
Ejemplo n.º 4
0
def align_pms(m_path, num_threads, in_reference_file, args):
    current_path = os.getcwd()
    output_path = os.path.join(current_path, 'pm_alignments')
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    assert os.path.exists("ragoo.fasta")
    query = os.path.abspath("ragoo.fasta")
    os.chdir('pm_alignments')

    cmd = '{} -ax asm5 --cs -t{} -I {}  {} {} ' \
          '> pm_against_ref.sam 2> pm_contigs_against_ref.sam.log'.format(m_path, num_threads, args.I,
                                                                          os.path.relpath(in_reference_file),
                                                                          os.path.relpath(query))
    if not os.path.isfile('pm_against_ref.sam'):
        run(cmd)

    os.chdir(current_path)
Ejemplo n.º 5
0
def get_SVs(sv_min, sv_max, in_ref_file):
    current_path = os.getcwd()
    os.chdir('pm_alignments')
    # Change this when setup.py is ready. Just call script directly
    cmd = 'sam2delta.py pm_against_ref.sam'
    if not os.path.isfile('pm_against_ref.sam.delta'):
        run(cmd)

    cmd_2 = 'Assemblytics_uniq_anchor.py --delta pm_against_ref.sam.delta --unique-length 10000 --out assemblytics_out --keep-small-uniques'
    if not os.path.isfile(
            'assemblytics_out.Assemblytics.unique_length_filtered_l10000.delta'
    ):
        run(cmd_2)

    cmd_3 = 'Assemblytics_between_alignments.pl assemblytics_out.coords.tab %r %r all-chromosomes exclude-longrange bed > assemblytics_out.variants_between_alignments.bed' % (
        sv_min, sv_max)
    if not os.path.isfile('assemblytics_out.variants_between_alignments.bed'):
        run(cmd_3)

    cmd_4 = 'Assemblytics_within_alignment.py --delta assemblytics_out.Assemblytics.unique_length_filtered_l10000.delta --min %r > assemblytics_out.variants_within_alignments.bed' % (
        sv_min)
    if not os.path.isfile('assemblytics_out.variants_within_alignments.bed'):
        run(cmd_4)

    header = "reference\tref_start\tref_stop\tID\tsize\tstrand\ttype\tref_gap_size\tquery_gap_size\tquery_coordinates\tmethod\n"

    with open('assemblytics_out.variants_between_alignments.bed', 'r') as f1:
        b1 = f1.read()

    with open('assemblytics_out.variants_within_alignments.bed', 'r') as f2:
        b2 = f2.read()

    with open('assemblytics_out.Assemblytics_structural_variants.bed',
              'w') as f:
        f.write(header)
        # Might need to add newlines here
        f.write(b1)
        f.write(b2)

    # Filter out SVs caused by gaps
    cmd_5 = 'filter_gap_SVs.py %s' % (in_ref_file)
    run(cmd_5)

    os.chdir(current_path)
Ejemplo n.º 6
0
def align_reads(m_path, num_threads, in_ctg_file, reads, tech='ont'):
    current_path = os.getcwd()
    output_path = current_path + '/ctg_alignments'
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    os.chdir('ctg_alignments')

    if tech == 'sr':
        cmd = '{} -x sr -t{} ../../{} ../../{} ' \
              '> reads_against_ctg.paf 2> reads_against_ctg.paf.log'.format(m_path, num_threads, in_ctg_file, reads)
    elif tech == 'corr':
        cmd = '{} -x asm10 -t{} ../../{} ../../{} ' \
              '> reads_against_ctg.paf 2> reads_against_ctg.paf.log'.format(m_path, num_threads, in_ctg_file, reads)
    else:
        raise ValueError("Only 'sr' or 'corr' are accepted for read type.")

    if not os.path.isfile('reads_against_ctg.paf'):
        run(cmd)

    os.chdir(current_path)
Ejemplo n.º 7
0
        with open(skip_file) as f:
            for line in f:
                skip_ctg.append(line.rstrip())

    current_path = os.getcwd()
    output_path = current_path + '/ragoo_output'
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    os.chdir(output_path)

    # Run minimap2
    cmd = '{} -k19 -w19 -t{} ../{} ../{} ' \
          '> contigs_against_ref.paf 2> contigs_against_ref.paf.log'.format(minimap_path, t, reference_file, contigs_file)

    if not os.path.isfile('contigs_against_ref.paf'):
        run(cmd)

    # Read in the minimap2 alignments just generated
    log('-- Reading alignments')
    alns = read_paf_alignments('contigs_against_ref.paf')
    alns = clean_alignments(alns, l=1000, in_exclude_file=exclude_file)

    # Process the gff file
    if gff_file:
        log('-- Getting gff features')
        features = defaultdict(list)
        z = GFFReader('../' + gff_file)
        for i in z.parse_gff():
            features[i.seqname].append(i)

    # Break chimeras if desired