Python pidの例、lingpy.sequence.sound_classes.pid Pythonの例

コード例 #1

0

ファイルを表示

 def test_pid(self):
     assert pid('mattis', 'maTTIs', 1) == 0.5
     assert pid('mattis', 'maTTIs', 2) == 0.5
     assert pid('mattis', 'maTTIs', 3) == 0.5
     assert pid('mattis', 'maTTIs', 4) == 0.5
     assert pid('m-', '-m', mode=1) == 0
     assert pid('m-', '-m', mode=2) == 0
     assert pid('m', '-', mode=3) == 0
     assert pid('m-', '-m', mode=4) == 0

コード例 #2

0

ファイルを表示

ファイル: test_sound_classes.py プロジェクト: LinguList/lingpy

 def test_pid(self):
     assert pid('mattis', 'maTTIs', 1) == 0.5
     assert pid('mattis', 'maTTIs', 2) == 0.5
     assert pid('mattis', 'maTTIs', 3) == 0.5
     assert pid('mattis', 'maTTIs', 4) == 0.5
     assert pid('m-', '-m', mode=1) == 0
     assert pid('m-', '-m', mode=2) == 0
     assert pid('m', '-', mode=3) == 0
     assert pid('m-', '-m', mode=4) == 0

コード例 #3

0

ファイルを表示

def test_pid():

    assert pid('mattis', 'maTTIs') == 0.5

コード例 #4

0

ファイルを表示

ファイル: test_sound_classes.py プロジェクト: sflavier/lingpy

def test_pid():

    assert pid('mattis', 'maTTIs') == 0.5

コード例 #5

0

ファイルを表示

ファイル: html.py プロジェクト: vermillionbee/lingpy

def psa2html(infile, **kw):
    """
    Function converts a PSA-file into colored html-format.
    """
    util.setdefaults(kw,
                     template=False,
                     css=False,
                     comment='#',
                     filename=infile[:-4] + '.html',
                     compact=True)

    template = util.read_text_file(kw['template'] or template_path('psa.html'))
    css = util.read_text_file(kw['css'] or template_path('psa.css'))

    data = []
    for line in util.read_text_file(infile, lines=True):
        if not line.startswith(kw['comment']):
            data.append(line)

    seq_ids = []
    pairs = []
    taxa = []
    alignments = []

    del data[0]

    i = 0
    while i <= len(data) - 3:
        try:
            seq_ids.append(data[i])

            datA = data[i + 1].split('\t')
            datB = data[i + 2].split('\t')

            taxonA = datA[0].strip('.')
            taxonB = datB[0].strip('.')
            almA = datA[1:]
            almB = datB[1:]

            taxa.append((taxonA, taxonB))
            pairs.append(('.'.join([k for k in almA if k != '-']),
                          '.'.join([k for k in almB if k != '-'])))
            alignments.append(
                ([str(a) for a in almA], [str(b) for b in almB], 0))
            assert len(alignments[-1][0]) == len(alignments[-1][1])
            i += 4
        except AssertionError:
            log.warning("Line {0} of the data is probably miscoded.".format(i +
                                                                            1))
            i += 1

    def get_classes(alm):
        classes = []
        residue = '<div class="residue {1}">{0}</div>'
        for j, char in enumerate(alm):
            if char == '-':
                d = 'dolgo_GAP'
            else:
                d = 'dolgo_' + token2class(char, rcParams['dolgo'])

                # bad check for three classes named differently
                if d == 'dolgo__':
                    d = 'dolgo_X'
                elif d == 'dolgo_1':
                    d = 'dolgo_TONE'
                elif d == 'dolgo_0':
                    d = 'dolgo_ERROR'
            classes += [residue.format(char, d)]
        return ''.join(classes)

    out = '<table>\n'  # codecs.open(kw['filename'], 'w', 'utf-8')
    for i, (a, b, c) in enumerate(alignments):
        clsA = get_classes(a)
        clsB = get_classes(b)

        ids = int(100 * pid(a, b) + 0.5)

        out += '<tr class="head">'
        out += '<td colspan=2 class="head"><b>Alignment {0}:</b> <i>{1}</i>, PID: {2}</td></tr>'.format(
            i + 1, seq_ids[i], ids)
        out += '<tr class="psa">'
        out += '<td class="taxon">{0}</td>'.format(taxa[i][0])
        out += '<td class="psa">{0}</td>'.format(clsA)
        out += '</tr>'
        out += '<tr class="psa">'
        out += '<td class="taxon">{0}</td>'.format(taxa[i][1])
        out += '<td class="psa">{0}</td>'.format(clsB)
        out += '</tr>'
        out += '<tr><td colspan=2></td></tr>'

    out += '</table>'

    html = template.format(alignments=out, css=css)

    if kw['compact']:
        html = html.replace('\n', ' ')
        html = re.sub(r'\s+', r' ', html)
        html = html.replace('> ', '>')
        html = html.replace(' >', '>')

    util.write_text_file(kw['filename'], html)

コード例 #6

0

ファイルを表示

ファイル: html.py プロジェクト: vermillionbee/lingpy

def msa2tex(infile, template='', filename='', **keywords):
    """
    Convert an MSA to a tabular representation which can easily be used in
    LaTeX documents.
    """
    util.setdefaults(keywords, pid_mode=1)

    # while alm-format can be read from the text-file without problems,
    # msa-format should be loaded first (once this is already provided), the
    # loss in speed won't matter much, since output of data is not a daily task
    # load msa
    msa = read_msa(infile)

    ## load templates
    tex = util.read_text_file(template or template_path('msa.tex'))

    # calculate pid score, if it is not passed as argument
    if 'pid_score' not in keywords:
        pid_score = 0
        count = 0
        for i, seqA in enumerate(msa['alignment']):
            for j, seqB in enumerate(msa['alignment']):
                if i < j:
                    pid_score += pid(seqA, seqB, mode=keywords['pid_mode'])
                    count += 1
        pid_score = int(100 * pid_score / count + 0.5)
    else:
        pid_score = keywords['pid_score']

    dataset = msa['dataset']
    infile = msa['infile']
    seq_id = msa['seq_id']

    # determine the length of the longest taxon
    taxl = max([len(t) for t in msa['taxa']])

    height = len(msa['alignment'])
    width = len(msa['alignment'][0])

    start = r'\tabular{l' + width * 'c' + '}\n'
    start += r'\bf\ttfamily Taxon & \multicolumn{' + str(
        width) + r'}{l}{\bf\ttfamily Alignment}\\' + '\n'

    # check for swaps in the alignment
    if 'swaps' in msa:
        swaps = []
        for s in msa['swaps']:
            swaps.extend(s)
    else:
        swaps = []

    body = start
    for i, taxon in enumerate(msa['taxa']):
        body += r'\ttfamily ' + taxon.replace('_', r'\_')
        for j, char in enumerate(msa['alignment'][i]):
            if char != '-':
                cls = token2class(char, rcParams['dolgo'])
            elif char == '-':
                cls = 'X'
            if char == '_':
                char = r'\#'
            if cls == '_':
                cls = '2'
            if j not in swaps:
                body += r'&\cellcolor{col' + cls + r'}' + char
            else:
                if char != '-':
                    body += r'&\cellcolor{col' + cls + r'}\color{white}\bf ' + char
                else:
                    body += r'&\cellcolor{col' + cls + r'}\bf ' + char
        body += r'\\' + '\n'

    body += r'&' + '&'.join([r'\color{white}XXX'
                             for i in range(width)]) + r'\\' + '\n'
    body += r'\endtabular' + '\n'

    # create the parameters etc.
    w = 1.5 * width + taxl * 0.25
    h = 0.5 * height + 1.0

    tex = tex.replace('<+WIDTH+>', '{0:2f}'.format(w))
    tex = tex.replace('<+HEIGHT+>', '{0:2f}'.format(h))

    # create the rput stuff
    tex = tex.replace('<+NEWX+>', '{0:.2f}'.format(w / 2.0))
    tex = tex.replace('<+NEWY+>', '{0:.2f}'.format((h - 0.5) / 2.0))

    # insert the rest
    tex = tex.replace('<+CONTENT+>', body)

    # write to file
    if not filename:
        filename = 'lingpy-{0}'

    util.write_text_file(filename + '.tex', tex)

コード例 #7

0

ファイルを表示

ファイル: html.py プロジェクト: vermillionbee/lingpy

def msa2html(msa, shorttitle='', filename='', template='', **keywords):
    """
    Convert files in ``msa``-format into colored ``html``-format.

    Parameters
    ----------
    msa : dict
        A dictionary object that contains all the information of an MSA object.

    shorttitle : str
        Define the shorttitle of the ``html``-page. If no title is provided,
        the default title ``SCA`` will be used.

    filename : str (default="")
        Define the name of the output file. If no name is defined, the name of
        the input file will be taken as a default.

    template : str (default="")
        The path to the template file. If no name is defined, the basic
        template will be used. The basic template currently used can be found
        under ``lingpy/data/templates/msa2html.html``.

    Examples
    --------
    Load the libary.

    >>> from lingpy import *
    
    Load an ``msq``-file from the test-sets.

    >>> msa = MSA('harry.msq')

    Align the data progressively and carry out a check for swapped sites.

    >>> msa.prog_align()
    >>> msa.swap_check()
    >>> print(msa)
    w    o    l    -    d    e    m    o    r    t
    w    a    l    -    d    e    m    a    r    -
    v    -    l    a    d    i    m    i    r    -

    Save the data to the file ``harry.msa``.

    >>> msa.output('msa',filename='harry')

    Save the ``msa``-object as ``html``.

    >>> msa.output('html',filename='harry')
    
    Notes
    -----
    The coloring of sound segments with respect to the sound class they belong
    to is based on the definitions given in the ``color``
    :py:class:`~lingpy.data.model.Model`. It can easily be changed and adapted.
    

    See also
    --------
    lingpy.convert.html.alm2html
    """
    util.setdefaults(
        keywords,
        pid_mode=1,
        stress=rcParams['stress'],
        css=False,
        js=False,
        compact=False,
        class_sort=True,
        write_to_file=True,
    )

    # while alm-format can be read from the text-file without problems,
    # msa-format should be loaded first (once this is already provided), the
    # loss in speed won't matter much, since output of data is not a daily task

    # load templates
    template = template or template_path('msa2html.html')
    if template == 'js':
        template = template_path('msa2html.js.html')
    html = util.read_text_file(template)
    css = util.read_text_file(keywords['css'] or template_path('msa.css'))
    js = util.read_text_file(keywords['js'] or template_path('msa.js'))

    # treat the msa-object as a file and try to load the file if this is the
    # case
    if isinstance(msa, string_types):
        msa = read_msa(msa, **keywords)
    else:
        raise ValueError('[!] No filename specified.')

    # load dataset, etc.
    dataset = msa['dataset']

    # calculate pid score, if it is not passed as argument
    if 'pid_score' not in keywords:
        pid_score = 0
        count = 0
        for i, seqA in enumerate(msa['alignment']):
            for j, seqB in enumerate(msa['alignment']):
                if i < j:
                    pid_score += pid(seqA, seqB, mode=keywords['pid_mode'])
                    count += 1
        pid_score = int(100 * pid_score / count + 0.5)
    else:
        pid_score = keywords['pid_score']

    infile = msa['infile']
    seq_id = msa['seq_id']

    # define the titles etc.
    if not shorttitle:
        shorttitle = 'SCA'

    # determine the length of the longest taxon
    taxl = max([len(t) for t in msa['taxa']])

    # format css file
    css = css.replace('TAXON_LENGTH', str(taxl * 10))

    out = ''
    tr = '<tr class="msa" unique="{1}" taxon={2} sequence={3}>{0}</tr>\n'
    td_taxon = '<td class="taxon">{0}</td>'
    perc = int(80 / len(msa['alignment'][0]) + 0.5)
    td_residue = '<td class="residue {1}">{0}</td>'
    td_swap = '<td class="residue swap {1}">{0}</td>'
    td_unaligned = '<td class="residue noalign {1}">{0}</td>'

    # check for swaps in the alignment
    if 'swaps' in msa:
        swaps = []
        for s in msa['swaps']:
            swaps.extend(s)
    else:
        swaps = []

    # check for
    local = ['*'] * len(msa['alignment'][0])
    if 'local' in msa:
        local = ['.'] * len(msa['alignment'][0])
        for i in msa['local']:
            local[i] = '*'

    # get two sorting schemas for the sequences
    if keywords['class_sort']:

        classes = [
            tokens2class(ipa2tokens(seq), rcParams['asjp'])
            for seq in msa['seqs']
        ]
        seqs = dict([
            (a[1], b) for a, b in zip(
                sorted(
                    zip(classes, msa['seqs']),
                    key=lambda x: x[0]  # list(zip(x[0],x[1]))
                ),
                range(1,
                      len(msa['seqs']) + 1))
        ])
    else:
        seqs = dict(zip(sorted(msa['seqs']), range(1, len(msa['seqs']) + 1)))
    taxa = dict(zip(sorted(msa['taxa']), range(1, len(msa['taxa']) + 1)))

    # set up a list to store unique alignments
    alignments = []

    # start iteration
    for i, taxon in enumerate(msa['taxa']):
        tmp = ''
        tmp += td_taxon.format(taxon)

        # append alignment to alignments
        alignment = ''.join(msa['alignment'][i])
        sequence = msa['seqs'][i]
        if alignment in alignments:
            unique = 'false'
        else:
            unique = 'true'
            alignments += [alignment]

        for j, char in enumerate(msa['alignment'][i]):
            if char == '-':
                d = 'dolgo_GAP'
                c = '#bbbbbb'
            else:
                d = 'dolgo_' + token2class(char, rcParams['dolgo'])
                c = token2class(char, rcParams['_color'])

                # bad check for three classes named differently
                if d == 'dolgo__':
                    d = 'dolgo_X'
                elif d == 'dolgo_1':
                    d = 'dolgo_TONE'
                elif d == 'dolgo_0':
                    d = 'dolgo_ERROR'

            if j in swaps:
                tmp += td_swap.format(char, d)
            elif local[j] != '*':
                tmp += td_unaligned.format(char, d)
            else:
                tmp += td_residue.format(char, d)
        out += tr.format(tmp, unique, taxa[taxon], seqs[sequence])

    html = html.format(
        table=out,
        dataset=dataset,
        pid=pid_score,
        file=infile,
        sequence=seq_id,
        shorttitle=shorttitle,
        width=len(msa['alignment'][0]),
        table_width='{0}'.format(len(msa['alignment'][0]) * 50 + 8 * taxl),
        taxa=len(msa['alignment']),
        uniseqs=len(set(msa['seqs'])),
        css=css,
        js=js)

    if not filename:
        filename = rcParams['filename']

    if not filename.endswith('.html'):
        filename = filename + '.html'

    if keywords['compact']:
        html = html.replace('\n', ' ')
        html = re.sub(r'\s+', r' ', html)
        html = html.replace('> ', '>')
        html = html.replace(' >', '>')

    if keywords['write_to_file']:
        # check, whether the outfile already exists
        util.write_text_file(filename, html)
    else:
        return html

コード例 #8

0

ファイルを表示

ファイル: html.py プロジェクト: LinguList/lingpy

def psa2html(infile, **kw):
    """
    Function converts a PSA-file into colored html-format.
    """
    util.setdefaults(
        kw,
        template=False,
        css=False,
        comment='#',
        filename=infile[:-4]+'.html',
        compact=True)

    template = util.read_text_file(kw['template'] or template_path('psa.html'))
    css = util.read_text_file(kw['css'] or template_path('psa.css'))

    data = []
    for line in util.read_text_file(infile, lines=True):
        if not line.startswith(kw['comment']):
            data.append(line)

    seq_ids = []
    pairs = []
    taxa = []
    alignments = []

    del data[0]

    i = 0
    while i <= len(data) - 3:
        try:
            seq_ids.append(data[i])

            datA = data[i + 1].split('\t')
            datB = data[i + 2].split('\t')

            taxonA = datA[0].strip('.')
            taxonB = datB[0].strip('.')
            almA = datA[1:]
            almB = datB[1:]

            taxa.append((taxonA, taxonB))
            pairs.append(
                (
                    '.'.join([k for k in almA if k != '-']),
                    '.'.join([k for k in almB if k != '-'])
                )
            )
            alignments.append(
                (
                    [str(a) for a in almA],
                    [str(b) for b in almB],
                    0)
            )
            assert len(alignments[-1][0]) == len(alignments[-1][1])
            i += 4
        except AssertionError:
            log.warning("Line {0} of the data is probably miscoded.".format(i + 1))
            i += 1

    def get_classes(alm):
        classes = []
        residue = '<div class="residue {1}">{0}</div>'
        for j, char in enumerate(alm):
            if char == '-':
                d = 'dolgo_GAP'
            else:
                d = 'dolgo_' + token2class(char, rcParams['dolgo'])

                # bad check for three classes named differently
                if d == 'dolgo__':
                    d = 'dolgo_X'
                elif d == 'dolgo_1':
                    d = 'dolgo_TONE'
                elif d == 'dolgo_0':
                    d = 'dolgo_ERROR'
            classes += [residue.format(char, d)]
        return ''.join(classes)

    out = '<table>\n'  # codecs.open(kw['filename'], 'w', 'utf-8')
    for i, (a, b, c) in enumerate(alignments):
        clsA = get_classes(a)
        clsB = get_classes(b)

        ids = int(100 * pid(a, b) + 0.5)

        out += '<tr class="head">'
        out += '<td colspan=2 class="head"><b>Alignment {0}:</b> <i>{1}</i>, PID: {2}</td></tr>'.format(
            i + 1,
            seq_ids[i],
            ids
        )
        out += '<tr class="psa">'
        out += '<td class="taxon">{0}</td>'.format(taxa[i][0])
        out += '<td class="psa">{0}</td>'.format(clsA)
        out += '</tr>'
        out += '<tr class="psa">'
        out += '<td class="taxon">{0}</td>'.format(taxa[i][1])
        out += '<td class="psa">{0}</td>'.format(clsB)
        out += '</tr>'
        out += '<tr><td colspan=2></td></tr>'

    out += '</table>'

    html = template.format(alignments=out, css=css)

    if kw['compact']:
        html = html.replace('\n', ' ')
        html = re.sub(r'\s+', r' ', html)
        html = html.replace('> ', '>')
        html = html.replace(' >', '>')

    util.write_text_file(kw['filename'], html)

コード例 #9

0

ファイルを表示

ファイル: html.py プロジェクト: LinguList/lingpy

def msa2tex(
    infile,
    template='',
    filename='',
    **keywords
):
    """
    Convert an MSA to a tabular representation which can easily be used in
    LaTeX documents.
    """
    util.setdefaults(keywords, pid_mode=1)

    # while alm-format can be read from the text-file without problems,
    # msa-format should be loaded first (once this is already provided), the
    # loss in speed won't matter much, since output of data is not a daily task
    # load msa
    msa = read_msa(infile)

    ## load templates
    tex = util.read_text_file(template or template_path('msa.tex'))

    # calculate pid score, if it is not passed as argument
    if 'pid_score' not in keywords:
        pid_score = 0
        count = 0
        for i, seqA in enumerate(msa['alignment']):
            for j, seqB in enumerate(msa['alignment']):
                if i < j:
                    pid_score += pid(seqA, seqB, mode=keywords['pid_mode'])
                    count += 1
        pid_score = int(100 * pid_score / count + 0.5)
    else:
        pid_score = keywords['pid_score']

    dataset = msa['dataset']
    infile = msa['infile']
    seq_id = msa['seq_id']

    # determine the length of the longest taxon
    taxl = max([len(t) for t in msa['taxa']])

    height = len(msa['alignment'])
    width = len(msa['alignment'][0])

    start = r'\tabular{l' + width * 'c' + '}\n'
    start += r'\bf\ttfamily Taxon & \multicolumn{' + str(
        width) + r'}{l}{\bf\ttfamily Alignment}\\' + '\n'

    # check for swaps in the alignment
    if 'swaps' in msa:
        swaps = []
        for s in msa['swaps']:
            swaps.extend(s)
    else:
        swaps = []

    body = start
    for i, taxon in enumerate(msa['taxa']):
        body += r'\ttfamily ' + taxon.replace('_', r'\_')
        for j, char in enumerate(msa['alignment'][i]):
            if char != '-':
                cls = token2class(char, rcParams['dolgo'])
            elif char == '-':
                cls = 'X'
            if char == '_':
                char = r'\#'
            if cls == '_':
                cls = '2'
            if j not in swaps:
                body += r'&\cellcolor{col' + cls + r'}' + char
            else:
                if char != '-':
                    body += r'&\cellcolor{col' + cls + r'}\color{white}\bf ' + char
                else:
                    body += r'&\cellcolor{col' + cls + r'}\bf ' + char
        body += r'\\' + '\n'

    body += r'&' + '&'.join([r'\color{white}XXX' for i in range(width)]) + r'\\' + '\n'
    body += r'\endtabular' + '\n'

    # create the parameters etc.
    w = 1.5 * width + taxl * 0.25
    h = 0.5 * height + 1.0

    tex = tex.replace('<+WIDTH+>', '{0:2f}'.format(w))
    tex = tex.replace('<+HEIGHT+>', '{0:2f}'.format(h))

    # create the rput stuff
    tex = tex.replace('<+NEWX+>', '{0:.2f}'.format(w / 2.0))
    tex = tex.replace('<+NEWY+>', '{0:.2f}'.format((h - 0.5) / 2.0))

    # insert the rest
    tex = tex.replace('<+CONTENT+>', body)

    # write to file
    if not filename:
        filename = 'lingpy-{0}'

    util.write_text_file(filename + '.tex', tex)

コード例 #10

0

ファイルを表示

ファイル: html.py プロジェクト: LinguList/lingpy

def msa2html(
    msa,
    shorttitle='',
    filename='',
    template='',
    **keywords
):
    """
    Convert files in ``msa``-format into colored ``html``-format.

    Parameters
    ----------
    msa : dict
        A dictionary object that contains all the information of an MSA object.

    shorttitle : str
        Define the shorttitle of the ``html``-page. If no title is provided,
        the default title ``SCA`` will be used.

    filename : str (default="")
        Define the name of the output file. If no name is defined, the name of
        the input file will be taken as a default.

    template : str (default="")
        The path to the template file. If no name is defined, the basic
        template will be used. The basic template currently used can be found
        under ``lingpy/data/templates/msa2html.html``.

    Examples
    --------
    Load the libary.

    >>> from lingpy import *
    
    Load an ``msq``-file from the test-sets.

    >>> msa = MSA('harry.msq')

    Align the data progressively and carry out a check for swapped sites.

    >>> msa.prog_align()
    >>> msa.swap_check()
    >>> print(msa)
    w    o    l    -    d    e    m    o    r    t
    w    a    l    -    d    e    m    a    r    -
    v    -    l    a    d    i    m    i    r    -

    Save the data to the file ``harry.msa``.

    >>> msa.output('msa',filename='harry')

    Save the ``msa``-object as ``html``.

    >>> msa.output('html',filename='harry')
    
    Notes
    -----
    The coloring of sound segments with respect to the sound class they belong
    to is based on the definitions given in the ``color``
    :py:class:`~lingpy.data.model.Model`. It can easily be changed and adapted.
    

    See also
    --------
    lingpy.convert.html.alm2html
    """
    util.setdefaults(
        keywords,
        pid_mode=1,
        stress=rcParams['stress'],
        css=False,
        js=False,
        compact=False,
        class_sort=True,
        write_to_file=True,
    )

    # while alm-format can be read from the text-file without problems,
    # msa-format should be loaded first (once this is already provided), the
    # loss in speed won't matter much, since output of data is not a daily task

    # load templates
    template = template or template_path('msa2html.html')
    if template == 'js':
        template = template_path('msa2html.js.html')
    html = util.read_text_file(template)
    css = util.read_text_file(keywords['css'] or template_path('msa.css'))
    js = util.read_text_file(keywords['js'] or template_path('msa.js'))

    # treat the msa-object as a file and try to load the file if this is the
    # case
    if isinstance(msa, string_types):
        msa = read_msa(msa, **keywords)
    else:
        raise ValueError('[!] No filename specified.')

    # load dataset, etc.
    dataset = msa['dataset']

    # calculate pid score, if it is not passed as argument
    if 'pid_score' not in keywords:
        pid_score = 0
        count = 0
        for i, seqA in enumerate(msa['alignment']):
            for j, seqB in enumerate(msa['alignment']):
                if i < j:
                    pid_score += pid(seqA, seqB, mode=keywords['pid_mode'])
                    count += 1
        pid_score = int(100 * pid_score / count + 0.5)
    else:
        pid_score = keywords['pid_score']

    infile = msa['infile']
    seq_id = msa['seq_id']

    # define the titles etc.
    if not shorttitle:
        shorttitle = 'SCA'

    # determine the length of the longest taxon
    taxl = max([len(t) for t in msa['taxa']])

    # format css file 
    css = css.replace('TAXON_LENGTH', str(taxl * 10))

    out = ''
    tr = '<tr class="msa" unique="{1}" taxon={2} sequence={3}>{0}</tr>\n'
    td_taxon = '<td class="taxon">{0}</td>'
    perc = int(80 / len(msa['alignment'][0]) + 0.5)
    td_residue = '<td class="residue {1}">{0}</td>'
    td_swap = '<td class="residue swap {1}">{0}</td>'
    td_unaligned = '<td class="residue noalign {1}">{0}</td>'

    # check for swaps in the alignment
    if 'swaps' in msa:
        swaps = []
        for s in msa['swaps']:
            swaps.extend(s)
    else:
        swaps = []

    # check for 
    local = ['*'] * len(msa['alignment'][0])
    if 'local' in msa:
        local = ['.'] * len(msa['alignment'][0])
        for i in msa['local']:
            local[i] = '*'

    # get two sorting schemas for the sequences
    if keywords['class_sort']:

        classes = [tokens2class(ipa2tokens(seq), rcParams['asjp']) for seq in msa['seqs']]
        seqs = dict(
            [(a[1], b) for a, b in zip(
                sorted(
                    zip(classes, msa['seqs']),
                    key=lambda x: x[0]  # list(zip(x[0],x[1]))
                ),
                range(1, len(msa['seqs']) + 1)
            )]
        )
    else:
        seqs = dict(zip(sorted(msa['seqs']), range(1, len(msa['seqs']) + 1)))
    taxa = dict(zip(sorted(msa['taxa']), range(1, len(msa['taxa']) + 1)))

    # set up a list to store unique alignments
    alignments = []

    # start iteration
    for i, taxon in enumerate(msa['taxa']):
        tmp = ''
        tmp += td_taxon.format(taxon)

        # append alignment to alignments
        alignment = ''.join(msa['alignment'][i])
        sequence = msa['seqs'][i]
        if alignment in alignments:
            unique = 'false'
        else:
            unique = 'true'
            alignments += [alignment]

        for j, char in enumerate(msa['alignment'][i]):
            if char == '-':
                d = 'dolgo_GAP'
                c = '#bbbbbb'
            else:
                d = 'dolgo_' + token2class(char, rcParams['dolgo'])
                c = token2class(char, rcParams['_color'])

                # bad check for three classes named differently
                if d == 'dolgo__':
                    d = 'dolgo_X'
                elif d == 'dolgo_1':
                    d = 'dolgo_TONE'
                elif d == 'dolgo_0':
                    d = 'dolgo_ERROR'

            if j in swaps:
                tmp += td_swap.format(char, d)
            elif local[j] != '*':
                tmp += td_unaligned.format(char, d)
            else:
                tmp += td_residue.format(char, d)
        out += tr.format(tmp, unique, taxa[taxon], seqs[sequence])

    html = html.format(
        table=out,
        dataset=dataset,
        pid=pid_score,
        file=infile,
        sequence=seq_id,
        shorttitle=shorttitle,
        width=len(msa['alignment'][0]),
        table_width='{0}'.format(len(msa['alignment'][0]) * 50 + 8 * taxl),
        taxa=len(msa['alignment']),
        uniseqs=len(set(msa['seqs'])),
        css=css,
        js=js
    )

    if not filename:
        filename = rcParams['filename']

    if not filename.endswith('.html'):
        filename = filename + '.html'

    if keywords['compact']:
        html = html.replace('\n', ' ')
        html = re.sub(r'\s+', r' ', html)
        html = html.replace('> ', '>')
        html = html.replace(' >', '>')

    if keywords['write_to_file']:
        # check, whether the outfile already exists
        util.write_text_file(filename, html)
    else:
        return html