Example #1
0
def create_mismatches_plot(assembly, window_size, ref_len, root_dir, output_dir):
    assembly_label = qutils.label_from_fpath_for_fname(assembly.fpath)
    nucmer_dirpath = join(root_dir, '..', 'contigs_reports')
    nucmer_fpath = join(create_nucmer_output_dir(nucmer_dirpath), assembly_label)
    _, _, _, _, used_snps_fpath = get_nucmer_aux_out_fpaths(nucmer_fpath)
    if not exists(used_snps_fpath):
        return None

    mismatches_fpath = join(output_dir, assembly_label + '.mismatches.txt')
    mismatch_density_by_chrom = defaultdict(lambda : [0] * (ref_len // window_size + 1))
    for line in open_gzipsafe(used_snps_fpath):
        chrom, contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos = line.split('\t')
        if ref_nucl != '.' and ctg_nucl != '.':
            mismatch_density_by_chrom[chrom][int(ref_pos) // window_size] += 1
    with open(mismatches_fpath, 'w') as out_f:
        for chrom, density_list in mismatch_density_by_chrom.items():
            start, end = 0, 0
            for i, density in enumerate(density_list):
                if density == 0:
                    end = (i + 1) * window_size
                else:
                    if end:
                        out_f.write('\t'.join([chrom, str(start), str(end), '0']) + '\n')
                    out_f.write('\t'.join([chrom, str(i * window_size), str(((i + 1) * window_size)), str(density)]) + '\n')
                    start = (i + 1) * window_size
                    end = None
            out_f.write('\t'.join([chrom, str(start), str(end), '0']) + '\n')
    return mismatches_fpath
Example #2
0
def merge_gffs(gffs, out_path):
    '''Merges all GFF files into a single one, dropping GFF header.'''
    out_file = open_gzipsafe(out_path, 'w')
    out_file.write('##gff-version 3\n')
    for gff_path in gffs:
        with open(gff_path) as gff_file:
            out_file.writelines(itertools.islice(gff_file, 2, None))
    out_file.close()
    return out_path
Example #3
0
def merge_gffs(gffs, out_path):
    '''Merges all GFF files into a single one, dropping GFF header.'''
    out_file = open_gzipsafe(out_path, 'w')
    out_file.write('##gff-version 3\n')
    for gff_path in gffs:
        with open(gff_path) as gff_file:
            out_file.writelines(itertools.islice(gff_file, 2, None))
    out_file.close()
    return out_path
Example #4
0
def parse_gff(gff_path):
    gff_file = open_gzipsafe(gff_path)
    r = csv.reader(list(filter(lambda l: not l.startswith("#"), gff_file)),
                   delimiter='\t')
    for index, _source, type, start, end, score, strand, phase, extra in r:
        if type != 'mRNA':
            continue  # We're only interested in genes here.

        attrs = dict(kv.split("=") for kv in extra.split(";"))
        yield index, attrs.get('Name'), int(start), int(end), strand
    gff_file.close()
Example #5
0
def parse_gff(gff_path):
    gff_file = open_gzipsafe(gff_path)
    r = csv.reader(list(filter(lambda l: not l.startswith("#"), gff_file)),
        delimiter='\t')
    for index, _source, type, start, end, score, strand, phase, extra in r:
        if type != 'mRNA':
            continue  # We're only interested in genes here.

        attrs = dict(kv.split("=") for kv in extra.split(";"))
        yield index, attrs.get('Name'), int(start), int(end), strand
    gff_file.close()
Example #6
0
def get_genes_from_file(fpath, feature):
    if not fpath or not os.path.exists(fpath):
        # it is already checked in quast,py, so we need no more notification
        #print '  Warning! ' + feature + '\'s file not specified or doesn\'t exist!'
        return []

    genes_file = open_gzipsafe(fpath, 'r')
    genes = []

    line = genes_file.readline().rstrip()
    while line == '' or line.startswith('#'):
        line = genes_file.readline().rstrip()

    genes_file.seek(0)

    if fpath.endswith('bed') or fpath.endswith('bed.gz'):
        genes = parse_bed(genes_file)

    elif txt_pattern_gi.match(line) or txt_pattern.match(line):
        genes = parse_txt(genes_file)

    elif gff_pattern.match(line):
        genes = parse_gff(genes_file, feature)

    elif ncbi_start_pattern.match(line):
        try:
            genes = parse_ncbi(genes_file)
        except ParseException:
            exc_type, exc_value, _ = sys.exc_info()
            logger.warning('Parsing exception ' + exc_value)
            logger.warning(fpath + ' was skipped')
            genes = []
    else:
        logger.warning(
            'Incorrect format of ' + feature +
            '\'s file! GFF, NCBI and the plain TXT format accepted. See manual.'
        )
        logger.warning(fpath + ' was skipped')

    genes_file.close()
    return genes
Example #7
0
def get_genes_from_file(fpath, feature):
    if not fpath or not os.path.exists(fpath):
        # it is already checked in quast,py, so we need no more notification
        #print '  Warning! ' + feature + '\'s file not specified or doesn\'t exist!'
        return []

    genes_file = open_gzipsafe(fpath, 'r')
    genes = []

    line = genes_file.readline().rstrip()
    while line == '' or line.startswith('#'):
        line = genes_file.readline().rstrip()

    genes_file.seek(0)

    if fpath.endswith('bed') or fpath.endswith('bed.gz'):
        genes = parse_bed(genes_file)

    elif txt_pattern_gi.match(line) or txt_pattern.match(line):
        genes = parse_txt(genes_file)

    elif gff_pattern.match(line):
        genes = parse_gff(genes_file, feature)

    elif ncbi_start_pattern.match(line):
        try:
            genes = parse_ncbi(genes_file)
        except ParseException:
            exc_type, exc_value, _ = sys.exc_info()
            logger.warning('Parsing exception ' + exc_value)
            logger.warning(fpath + ' was skipped')
            genes = []
    else:
        logger.warning('Incorrect format of ' + feature + '\'s file! GFF, NCBI and the plain TXT format accepted. See manual.')
        logger.warning(fpath + ' was skipped')

    genes_file.close()
    return genes
Example #8
0
def add_genes_to_gff(genes, gff_fpath, prokaryote):
    gff = open_gzipsafe(gff_fpath, 'w')
    if prokaryote:
        if qconfig.metagenemark:
            gff.write('##gff out for MetaGeneMark\n')
        else:
            gff.write('##gff out for GeneMarkS PROKARYOTIC\n')
    else:
        gff.write('##gff out for GeneMark-ES EUKARYOTIC\n')
    gff.write('##gff-version 3\n')

    for id, gene in enumerate(genes):
        gff.write('%s\tGeneMark\tgene\t%d\t%d\t.\t%s\t.\tID=%d\n' %
                  (gene.contig, gene.start, gene.end, gene.strand, id + 1))
        if gene.seq:
            gff.write('##Nucleotide sequence:\n')
            for i in range(0, len(gene.seq), 60):
                gff.write('##' + gene.seq[i:i + 60] + '\n')
        if gene.protein:
            gff.write('##Protein sequence:\n')
            for i in range(0, len(gene.protein), 60):
                gff.write('##' + gene.protein[i:i + 60] + '\n')
            gff.write('\n')
    gff.close()
Example #9
0
def add_genes_to_gff(genes, gff_fpath, prokaryote):
    gff = open_gzipsafe(gff_fpath, 'w')
    if prokaryote:
        if qconfig.meta:
            gff.write('##gff out for MetaGeneMark\n')
        else:
            gff.write('##gff out for GeneMarkS PROKARYOTIC\n')
    else:
        gff.write('##gff out for GeneMark-ES EUKARYOTIC\n')
    gff.write('##gff-version 3\n')

    for id, gene in enumerate(genes):
        gff.write('%s\tGeneMark\tgene\t%d\t%d\t.\t%s\t.\tID=%d\n' %
            (gene.contig, gene.start, gene.end, gene.strand, id + 1))
        if gene.seq:
            gff.write('##Nucleotide sequence:\n')
            for i in range(0, len(gene.seq), 60):
                gff.write('##' + gene.seq[i:i + 60] + '\n')
        if gene.protein:
            gff.write('##Protein sequence:\n')
            for i in range(0, len(gene.protein), 60):
                gff.write('##' + gene.protein[i:i + 60] + '\n')
            gff.write('\n')
    gff.close()
Example #10
0
def align_and_analyze(is_cyclic, index, contigs_fpath, output_dirpath, ref_fpath,
                      old_contigs_fpath, bed_fpath, parallel_by_chr=False, threads=1):
    nucmer_output_dirpath = create_nucmer_output_dir(output_dirpath)
    assembly_label = qutils.label_from_fpath(contigs_fpath)
    corr_assembly_label = qutils.label_from_fpath_for_fname(contigs_fpath)
    nucmer_fpath = join(nucmer_output_dirpath, corr_assembly_label)

    logger.info('  ' + qutils.index_to_str(index) + assembly_label)

    if not qconfig.space_efficient:
        log_out_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.stdout')
        log_err_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.stderr')
        icarus_out_fpath = join(output_dirpath, qconfig.icarus_report_fname_pattern % corr_assembly_label)
        misassembly_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.mis_contigs.info')
        unaligned_info_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.unaligned.info')
    else:
        log_out_fpath = '/dev/null'
        log_err_fpath = '/dev/null'
        icarus_out_fpath = '/dev/null'
        misassembly_fpath = '/dev/null'
        unaligned_info_fpath = '/dev/null'

    icarus_out_f = open(icarus_out_fpath, 'w')
    icarus_header_cols = ['S1', 'E1', 'S2', 'E2', 'Reference', 'Contig', 'IDY', 'Ambiguous', 'Best_group']
    icarus_out_f.write('\t'.join(icarus_header_cols) + '\n')
    misassembly_f = open(misassembly_fpath, 'w')

    if not qconfig.space_efficient:
        logger.info('  ' + qutils.index_to_str(index) + 'Logging to files ' + log_out_fpath +
                ' and ' + os.path.basename(log_err_fpath) + '...')
    else:
        logger.info('  ' + qutils.index_to_str(index) + 'Logging is disabled.')

    coords_fpath, coords_filtered_fpath, unaligned_fpath, show_snps_fpath, used_snps_fpath = \
        get_nucmer_aux_out_fpaths(nucmer_fpath)

    nucmer_status = align_contigs(nucmer_fpath, ref_fpath, contigs_fpath, old_contigs_fpath, index,
                                  parallel_by_chr, threads, log_out_fpath, log_err_fpath)
    if nucmer_status != NucmerStatus.OK:
        with open(log_err_fpath, 'a') as log_err_f:
            if nucmer_status == NucmerStatus.ERROR:
                logger.error('  ' + qutils.index_to_str(index) +
                         'Failed aligning contigs ' + qutils.label_from_fpath(contigs_fpath) +
                         ' to the reference (non-zero exit code). ' +
                         ('Run with the --debug flag to see additional information.' if not qconfig.debug else ''))
            elif nucmer_status == NucmerStatus.FAILED:
                log_err_f.write(qutils.index_to_str(index) + 'Alignment failed for ' + contigs_fpath + ':' + coords_fpath + 'doesn\'t exist.\n')
                logger.info('  ' + qutils.index_to_str(index) + 'Alignment failed for ' + '\'' + assembly_label + '\'.')
            elif nucmer_status == NucmerStatus.NOT_ALIGNED:
                log_err_f.write(qutils.index_to_str(index) + 'Nothing aligned for ' + contigs_fpath + '\n')
                logger.info('  ' + qutils.index_to_str(index) + 'Nothing aligned for ' + '\'' + assembly_label + '\'.')
        clean_tmp_files(nucmer_fpath)
        return nucmer_status, {}, [], [], []

    log_out_f = open(log_out_fpath, 'a')
    # Loading the alignment files
    log_out_f.write('Parsing coords...\n')
    aligns = {}
    coords_file = open(coords_fpath)
    coords_filtered_file = open(coords_filtered_fpath, 'w')
    coords_filtered_file.write(coords_file.readline())
    coords_filtered_file.write(coords_file.readline())
    for line in coords_file:
        if line.strip() == '':
            break
        assert line[0] != '='
        #Clear leading spaces from nucmer output
        #Store nucmer lines in an array
        mapping = Mapping.from_line(line)
        aligns.setdefault(mapping.contig, []).append(mapping)

    # Loading the reference sequences
    log_out_f.write('Loading reference...\n') # TODO: move up
    ref_lens = {}
    ref_features = {}
    for name, seq in fastaparser.read_fasta(ref_fpath):
        name = name.split()[0]  # no spaces in reference header
        ref_lens[name] = len(seq)
        log_out_f.write('\tLoaded [%s]\n' % name)

    #Loading the SNP calls
    if qconfig.show_snps:
        log_out_f.write('Loading SNPs...\n')

    used_snps_file = None
    snps = {}
    if qconfig.show_snps:
        prev_line = None
        for line in open_gzipsafe(show_snps_fpath):
            #print "$line";
            line = line.split()
            if not line[0].isdigit():
                continue
            if prev_line and line == prev_line:
                continue
            ref = line[10]
            ctg = line[11]
            pos = int(line[0]) # Kolya: python don't convert int<->str types automatically
            loc = int(line[3]) # Kolya: same as above

            # if (! exists $line[11]) { die "Malformed line in SNP file.  Please check that show-snps has completed succesfully.\n$line\n[$line[9]][$line[10]][$line[11]]\n"; }
            if pos in snps.setdefault(ref, {}).setdefault(ctg, {}):
                snps.setdefault(ref, {}).setdefault(ctg, {})[pos].append(SNP(ref_pos=pos, ctg_pos=loc, ref_nucl=line[1], ctg_nucl=line[2]))
            else:
                snps.setdefault(ref, {}).setdefault(ctg, {})[pos] = [SNP(ref_pos=pos, ctg_pos=loc, ref_nucl=line[1], ctg_nucl=line[2])]
            prev_line = line
        used_snps_file = open_gzipsafe(used_snps_fpath, 'w')

    # Loading the regions (if any)
    regions = {}
    total_reg_len = 0
    total_regions = 0
    # # TODO: gff
    # log_out_f.write('Loading regions...\n')
    # log_out_f.write('\tNo regions given, using whole reference.\n')
    for name, seq_len in ref_lens.items():
        regions.setdefault(name, []).append([1, seq_len])
        total_regions += 1
        total_reg_len += seq_len
    log_out_f.write('\tTotal Regions: %d\n' % total_regions)
    log_out_f.write('\tTotal Region Length: %d\n' % total_reg_len)

    ca_output = CAOutput(stdout_f=log_out_f, misassembly_f=misassembly_f, coords_filtered_f=coords_filtered_file,
                         used_snps_f=used_snps_file, icarus_out_f=icarus_out_f)

    log_out_f.write('Analyzing contigs...\n')
    result, ref_aligns, total_indels_info, aligned_lengths, misassembled_contigs, misassemblies_in_contigs, aligned_lengths_by_contigs =\
        analyze_contigs(ca_output, contigs_fpath, unaligned_fpath, unaligned_info_fpath, aligns, ref_features, ref_lens, is_cyclic)

    # if qconfig.large_genome:
    #     log_out_f.write('Analyzing large blocks...\n')
    #     large_misassembly_fpath = add_suffix(misassembly_fpath, 'large_blocks') if not qconfig.space_efficient else '/dev/null'
    #     ca_large_output = CAOutput(stdout_f=log_out_f, misassembly_f=open(large_misassembly_fpath, 'w'),
    #                                coords_filtered_f=coords_filtered_file, used_snps_f=open('/dev/null', 'w'), icarus_out_f=open('/dev/null', 'w'))
    #     min_alignment, extensive_mis_threshold = qconfig.min_alignment, qconfig.extensive_misassembly_threshold
    #     qconfig.min_alignment, qconfig.extensive_misassembly_threshold = qconfig.LARGE_MIN_ALIGNMENT, qconfig.LARGE_EXTENSIVE_MIS_THRESHOLD
    #     result.update(analyze_contigs(ca_large_output, contigs_fpath, '/dev/null', '/dev/null',
    #                                   aligns, ref_features, ref_lens, is_cyclic, large_misassemblies_search=True)[0])
    #     qconfig.min_alignment, qconfig.extensive_misassembly_threshold = min_alignment, extensive_mis_threshold

    log_out_f.write('Analyzing coverage...\n')
    if qconfig.show_snps:
        log_out_f.write('Writing SNPs into ' + used_snps_fpath + '\n')
    result.update(analyze_coverage(ca_output, regions, ref_aligns, ref_features, snps, total_indels_info))
    result = print_results(contigs_fpath, log_out_f, used_snps_fpath, total_indels_info, result)

    if not qconfig.space_efficient:
        ## outputting misassembled contigs to separate file
        fasta = [(name, seq) for name, seq in fastaparser.read_fasta(contigs_fpath)
                 if name in misassembled_contigs.keys()]
        fastaparser.write_fasta(join(output_dirpath, qutils.name_from_fpath(contigs_fpath) + '.mis_contigs.fa'), fasta)

    if qconfig.is_combined_ref:
        alignment_tsv_fpath = join(output_dirpath, "alignments_" + corr_assembly_label + '.tsv')
        unique_contigs_fpath = join(output_dirpath, qconfig.unique_contigs_fname_pattern % corr_assembly_label)
        logger.debug('  ' + qutils.index_to_str(index) + 'Alignments: ' + qutils.relpath(alignment_tsv_fpath))
        used_contigs = set()
        with open(unique_contigs_fpath, 'w') as unique_contigs_f:
            with open(alignment_tsv_fpath, 'w') as alignment_tsv_f:
                for chr_name, aligns in ref_aligns.items():
                    alignment_tsv_f.write(chr_name)
                    contigs = set([align.contig for align in aligns])
                    for contig in contigs:
                        alignment_tsv_f.write('\t' + contig)

                    if qconfig.is_combined_ref:
                        ref_name = ref_labels_by_chromosomes[chr_name]
                        align_by_contigs = defaultdict(int)
                        for align in aligns:
                            align_by_contigs[align.contig] += align.len2
                        for contig, aligned_len in align_by_contigs.items():
                            if contig in used_contigs:
                                continue
                            used_contigs.add(contig)
                            len_cov_pattern = re.compile(r'_length_([\d\.]+)_cov_([\d\.]+)')
                            if len_cov_pattern.findall(contig):
                                contig_len = len_cov_pattern.findall(contig)[0][0]
                                contig_cov = len_cov_pattern.findall(contig)[0][1]
                                if aligned_len / float(contig_len) > 0.9:
                                    unique_contigs_f.write(ref_name + '\t' + str(aligned_len) + '\t' + contig_cov + '\n')
                    alignment_tsv_f.write('\n')

    close_handlers(ca_output)
    logger.info('  ' + qutils.index_to_str(index) + 'Analysis is finished.')
    logger.debug('')
    clean_tmp_files(nucmer_fpath)
    if not qconfig.no_gzip:
        compress_nucmer_output(logger, nucmer_fpath)
    if not ref_aligns:
        return NucmerStatus.NOT_ALIGNED, result, aligned_lengths, misassemblies_in_contigs, aligned_lengths_by_contigs
    else:
        return NucmerStatus.OK, result, aligned_lengths, misassemblies_in_contigs, aligned_lengths_by_contigs
Example #11
0
def align_and_analyze(is_cyclic, index, contigs_fpath, output_dirpath, ref_fpath,
                      old_contigs_fpath, bed_fpath, parallel_by_chr=False, threads=1):
    nucmer_output_dirpath = create_nucmer_output_dir(output_dirpath)
    assembly_label = qutils.label_from_fpath(contigs_fpath)
    corr_assembly_label = qutils.label_from_fpath_for_fname(contigs_fpath)
    nucmer_fpath = join(nucmer_output_dirpath, corr_assembly_label)

    logger.info('  ' + qutils.index_to_str(index) + assembly_label)

    if not qconfig.space_efficient:
        log_out_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.stdout')
        log_err_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.stderr')
        icarus_out_fpath = join(output_dirpath, qconfig.icarus_report_fname_pattern % corr_assembly_label)
        misassembly_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.mis_contigs.info')
        unaligned_info_fpath = join(output_dirpath, qconfig.contig_report_fname_pattern % corr_assembly_label + '.unaligned.info')
    else:
        log_out_fpath = '/dev/null'
        log_err_fpath = '/dev/null'
        icarus_out_fpath = '/dev/null'
        misassembly_fpath = '/dev/null'
        unaligned_info_fpath = '/dev/null'

    icarus_out_f = open(icarus_out_fpath, 'w')
    icarus_header_cols = ['S1', 'E1', 'S2', 'E2', 'Reference', 'Contig', 'IDY', 'Ambiguous', 'Best_group']
    icarus_out_f.write('\t'.join(icarus_header_cols) + '\n')
    misassembly_f = open(misassembly_fpath, 'w')

    if not qconfig.space_efficient:
        logger.info('  ' + qutils.index_to_str(index) + 'Logging to files ' + log_out_fpath +
                ' and ' + os.path.basename(log_err_fpath) + '...')
    else:
        logger.info('  ' + qutils.index_to_str(index) + 'Logging is disabled.')

    coords_fpath, coords_filtered_fpath, unaligned_fpath, show_snps_fpath, used_snps_fpath = \
        get_nucmer_aux_out_fpaths(nucmer_fpath)

    nucmer_status = align_contigs(nucmer_fpath, ref_fpath, contigs_fpath, old_contigs_fpath, index,
                                  parallel_by_chr, threads, log_out_fpath, log_err_fpath)
    if nucmer_status != NucmerStatus.OK:
        with open(log_err_fpath, 'a') as log_err_f:
            if nucmer_status == NucmerStatus.ERROR:
                logger.error('  ' + qutils.index_to_str(index) +
                         'Failed aligning contigs ' + qutils.label_from_fpath(contigs_fpath) +
                         ' to the reference (non-zero exit code). ' +
                         ('Run with the --debug flag to see additional information.' if not qconfig.debug else ''))
            elif nucmer_status == NucmerStatus.FAILED:
                log_err_f.write(qutils.index_to_str(index) + 'Alignment failed for ' + contigs_fpath + ':' + coords_fpath + 'doesn\'t exist.\n')
                logger.info('  ' + qutils.index_to_str(index) + 'Alignment failed for ' + '\'' + assembly_label + '\'.')
            elif nucmer_status == NucmerStatus.NOT_ALIGNED:
                log_err_f.write(qutils.index_to_str(index) + 'Nothing aligned for ' + contigs_fpath + '\n')
                logger.info('  ' + qutils.index_to_str(index) + 'Nothing aligned for ' + '\'' + assembly_label + '\'.')
        clean_tmp_files(nucmer_fpath)
        return nucmer_status, {}, [], [], []

    log_out_f = open(log_out_fpath, 'a')
    # Loading the alignment files
    log_out_f.write('Parsing coords...\n')
    aligns = {}
    coords_file = open(coords_fpath)
    coords_filtered_file = open(coords_filtered_fpath, 'w')
    coords_filtered_file.write(coords_file.readline())
    coords_filtered_file.write(coords_file.readline())
    for line in coords_file:
        if line.strip() == '':
            break
        assert line[0] != '='
        #Clear leading spaces from nucmer output
        #Store nucmer lines in an array
        mapping = Mapping.from_line(line)
        aligns.setdefault(mapping.contig, []).append(mapping)

    # Loading the reference sequences
    log_out_f.write('Loading reference...\n') # TODO: move up
    references = {}
    ref_features = {}
    for name, seq in fastaparser.read_fasta(ref_fpath):
        name = name.split()[0]  # no spaces in reference header
        references[name] = seq
        log_out_f.write('\tLoaded [%s]\n' % name)

    #Loading the SNP calls
    if qconfig.show_snps:
        log_out_f.write('Loading SNPs...\n')

    used_snps_file = None
    snps = {}
    if qconfig.show_snps:
        prev_line = None
        for line in open_gzipsafe(show_snps_fpath):
            #print "$line";
            line = line.split()
            if not line[0].isdigit():
                continue
            if prev_line and line == prev_line:
                continue
            ref = line[10]
            ctg = line[11]
            pos = int(line[0]) # Kolya: python don't convert int<->str types automatically
            loc = int(line[3]) # Kolya: same as above

            # if (! exists $line[11]) { die "Malformed line in SNP file.  Please check that show-snps has completed succesfully.\n$line\n[$line[9]][$line[10]][$line[11]]\n"; }
            if pos in snps.setdefault(ref, {}).setdefault(ctg, {}):
                snps.setdefault(ref, {}).setdefault(ctg, {})[pos].append(SNP(ref_pos=pos, ctg_pos=loc, ref_nucl=line[1], ctg_nucl=line[2]))
            else:
                snps.setdefault(ref, {}).setdefault(ctg, {})[pos] = [SNP(ref_pos=pos, ctg_pos=loc, ref_nucl=line[1], ctg_nucl=line[2])]
            prev_line = line
        used_snps_file = open_gzipsafe(used_snps_fpath, 'w')

    # Loading the regions (if any)
    regions = {}
    ref_lens = {}
    total_reg_len = 0
    total_regions = 0
    # # TODO: gff
    # log_out_f.write('Loading regions...\n')
    # log_out_f.write('\tNo regions given, using whole reference.\n')
    for name, seq in references.items():
        regions.setdefault(name, []).append([1, len(seq)])
        ref_lens[name] = len(seq)
        total_regions += 1
        total_reg_len += ref_lens[name]
    log_out_f.write('\tTotal Regions: %d\n' % total_regions)
    log_out_f.write('\tTotal Region Length: %d\n' % total_reg_len)

    ca_output = CAOutput(stdout_f=log_out_f, misassembly_f=misassembly_f, coords_filtered_f=coords_filtered_file,
                         used_snps_f=used_snps_file, icarus_out_f=icarus_out_f)

    log_out_f.write('Analyzing contigs...\n')
    result, ref_aligns, total_indels_info, aligned_lengths, misassembled_contigs, misassemblies_in_contigs, aligned_lengths_by_contigs =\
        analyze_contigs(ca_output, contigs_fpath, unaligned_fpath, unaligned_info_fpath, aligns, ref_features, ref_lens, is_cyclic)

    log_out_f.write('Analyzing coverage...\n')
    if qconfig.show_snps:
        log_out_f.write('Writing SNPs into ' + used_snps_fpath + '\n')
    result.update(analyze_coverage(ca_output, regions, ref_aligns, ref_features, snps, total_indels_info))
    result = print_results(contigs_fpath, log_out_f, used_snps_fpath, total_indels_info, result)

    if not qconfig.space_efficient:
        ## outputting misassembled contigs to separate file
        fasta = [(name, seq) for name, seq in fastaparser.read_fasta(contigs_fpath)
                 if name in misassembled_contigs.keys()]
        fastaparser.write_fasta(join(output_dirpath, qutils.name_from_fpath(contigs_fpath) + '.mis_contigs.fa'), fasta)

    if qconfig.is_combined_ref:
        alignment_tsv_fpath = join(output_dirpath, "alignments_" + corr_assembly_label + '.tsv')
        unique_contigs_fpath = join(output_dirpath, qconfig.unique_contigs_fname_pattern % corr_assembly_label)
        logger.debug('  ' + qutils.index_to_str(index) + 'Alignments: ' + qutils.relpath(alignment_tsv_fpath))
        used_contigs = set()
        with open(unique_contigs_fpath, 'w') as unique_contigs_f:
            with open(alignment_tsv_fpath, 'w') as alignment_tsv_f:
                for chr_name, aligns in ref_aligns.items():
                    alignment_tsv_f.write(chr_name)
                    contigs = set([align.contig for align in aligns])
                    for contig in contigs:
                        alignment_tsv_f.write('\t' + contig)

                    if qconfig.is_combined_ref:
                        ref_name = ref_labels_by_chromosomes[chr_name]
                        align_by_contigs = defaultdict(int)
                        for align in aligns:
                            align_by_contigs[align.contig] += align.len2
                        for contig, aligned_len in align_by_contigs.items():
                            if contig in used_contigs:
                                continue
                            used_contigs.add(contig)
                            len_cov_pattern = re.compile(r'_length_([\d\.]+)_cov_([\d\.]+)')
                            if len_cov_pattern.findall(contig):
                                contig_len = len_cov_pattern.findall(contig)[0][0]
                                contig_cov = len_cov_pattern.findall(contig)[0][1]
                                if aligned_len / float(contig_len) > 0.9:
                                    unique_contigs_f.write(ref_name + '\t' + str(aligned_len) + '\t' + contig_cov + '\n')
                    alignment_tsv_f.write('\n')

    close_handlers(ca_output)
    logger.info('  ' + qutils.index_to_str(index) + 'Analysis is finished.')
    logger.debug('')
    clean_tmp_files(nucmer_fpath)
    if not qconfig.no_gzip:
        compress_nucmer_output(logger, nucmer_fpath)
    if not ref_aligns:
        return NucmerStatus.NOT_ALIGNED, result, aligned_lengths, misassemblies_in_contigs, aligned_lengths_by_contigs
    else:
        return NucmerStatus.OK, result, aligned_lengths, misassemblies_in_contigs, aligned_lengths_by_contigs