Python matrix2dst 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: lingpy.convert.strings

메소드/함수: matrix2dst

hotexamples.com에서의 예제들: 7

Python matrix2dst - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 lingpy.convert.strings.matrix2dst에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_strings.py 프로젝트: anukat2015/lingpy

    def test_matrix2dst(self):
        matrix = lingpy.algorithm.squareform([0.5, 0.75, 0.8])

        # we choose same format for taxa as default
        taxa = ['t_1', 't_2', 't_3']

        phylA = matrix2dst(matrix, taxa=taxa)
        phylB = matrix2dst(matrix)

        assert phylA == phylB

        phylC = matrix2dst(matrix, taxa=taxa, stamp='# Written with joy.')
        phylD = matrix2dst(matrix, stamp='# Written with joy.')

        assert phylC == phylD

        phylE = matrix2dst(matrix, taxa=taxa, taxlen=20)
        phylF = matrix2dst(matrix, taxlen=30)

        assert 18 * ' ' in phylE and 28 * ' ' in phylF

        # check for tab-stop output when taxlen is set to 0
        self.assertEqual(matrix2dst(matrix, taxlen=0).count('\t'), 9)

예제 #2

파일 보기

def test_matrix2dst():
    matrix = squareform([0.5, 0.75, 0.8])

    # we choose same format for taxa as default
    taxa = ['t_1', 't_2', 't_3']

    phyl_a = matrix2dst(matrix, taxa=taxa)
    phyl_b = matrix2dst(matrix)

    assert phyl_a == phyl_b

    phyl_c = matrix2dst(matrix, taxa=taxa, stamp='# Written with joy.')
    phyl_d = matrix2dst(matrix, stamp='# Written with joy.')

    assert phyl_c == phyl_d

    phyl_e = matrix2dst(matrix, taxa=taxa, taxlen=20)
    phyl_f = matrix2dst(matrix, taxlen=30)

    assert 18 * ' ' in phyl_e and 28 * ' ' in phyl_f

    # check for tab-stop output when taxlen is set to 0
    assert matrix2dst(matrix, taxlen=0).count('\t') == 9

예제 #3

파일 보기

파일: test_strings.py 프로젝트: tjade273/lingpy

    def test_matrix2dst(self):
        matrix = lingpy.algorithm.squareform([0.5, 0.75, 0.8])

        # we choose same format for taxa as default
        taxa = ['t_1', 't_2', 't_3']

        phylA = matrix2dst(matrix, taxa=taxa)
        phylB = matrix2dst(matrix)

        assert phylA == phylB

        phylC = matrix2dst(matrix, taxa=taxa, stamp='# Written with joy.')
        phylD = matrix2dst(matrix, stamp='# Written with joy.')

        assert phylC == phylD

        phylE = matrix2dst(matrix, taxa=taxa, taxlen=20)
        phylF = matrix2dst(matrix, taxlen=30)

        assert 18 * ' ' in phylE and 28 * ' ' in phylF

        # check for tab-stop output when taxlen is set to 0
        self.assertEqual(matrix2dst(matrix, taxlen=0).count('\t'), 9)

예제 #4

파일 보기

파일: wordlist.py 프로젝트: kadster/lingpy

    def _output(self, fileformat, **keywords):
        """
        Internal function that eases its modification by daughter classes.
        """
        # check for stamp attribute
        keywords["stamp"] = getattr(self, '_stamp', '')

        # add the default parameters, they will be checked against the keywords
        util.setdefaults(
            keywords,
            cols=False,
            distances=False,
            entries=("concept", "counterpart"),
            entry='concept',
            fileformat=fileformat,
            filename=rcParams['filename'],
            formatter='concept',
            modify_ref=False,
            meta=self._meta,
            missing=0,
            prettify='false',
            ignore='all',
            ref='cogid',
            rows=False,
            subset=False,  # setup a subset of the data,
            taxa='taxa',
            threshold=0.6,  # threshold for flat clustering
            tree_calc='neighbor')

        if fileformat in ['triple', 'triples', 'triples.tsv']:
            return tsv2triple(self, keywords['filename'] + '.' + fileformat)

        if fileformat in ['paps.nex', 'paps.csv']:
            paps = self.get_paps(
                ref=keywords['ref'], entry=keywords['entry'], missing=keywords['missing'])
            kw = dict(filename=keywords['filename'] + '.paps')
            if fileformat == 'paps.nex':
                kw['missing'] = keywords['missing']
                return pap2nex(self.cols, paps, **kw)
            return pap2csv(self.cols, paps, **kw)

        # simple printing of taxa
        if fileformat == 'taxa':
            assert hasattr(self, 'taxa')
            return util.write_text_file(keywords['filename'] + '.taxa', self.cols)

        # csv-output
        if fileformat in ['csv', 'qlc', 'tsv']:

            # get the header line
            header = sorted(
                [s for s in set(self._alias.values()) if s in self._header],
                key=lambda x: self._header[x])
            header = [h.upper() for h in header]

            self._meta.setdefault('taxa', self.cols)

            # get the data, in case a subset is chosen
            if not keywords['subset']:
                # write stuff to file
                return wl2qlc(header, self._data, **keywords)

            cols, rows = keywords['cols'], keywords['rows']

            if not isinstance(cols, (list, tuple, bool)):
                raise ValueError("[i] Argument 'cols' should be list or tuple.")
            if not isinstance(rows, (dict, bool)):
                raise ValueError("[i] Argument 'rows' should be a dictionary.")

            # check for chosen header
            if cols:
                # get indices for header
                indices = [self._header[x] for x in cols]
                header = [c.upper() for c in cols]
            else:
                indices = [r for r in range(len(self.header))]

            if rows:
                stmts = []
                for key, value in rows.items():
                    if key == 'ID':
                        stmts += ["key " + value]
                    else:
                        idx = self._header[key]
                        stmts += ["line[{0}] ".format(idx) + value]

            log.debug("calculated what should be excluded")

            # get the data
            out = {}
            for key, line in self._data.items():
                log.debug(key)

                if rows:
                    if eval(" and ".join(stmts)):
                        out[key] = [line[i] for i in indices]
                else:
                    out[key] = [line[i] for i in indices]

            log.debug("passing data to wl2qlc")
            return wl2qlc(header, out, **keywords)

        # output dst-format (phylip)
        if fileformat == 'dst':
            # check for distances as keyword
            if 'distances' not in self._meta:
                self._meta['distances'] = wl2dst(self, **keywords)

            out = matrix2dst(self._meta['distances'], self.taxa,
                    stamp=keywords['stamp'], taxlen=keywords.get('taxlen', 0))
            return _write_file(keywords['filename'], out, fileformat)

        # output tre-format (newick)
        if fileformat in ['tre', 'nwk']:  # ,'cluster','groups']:
            if 'tree' not in self._meta:
                # check for distances
                if 'distances' not in self._meta:
                    self._meta['distances'] = wl2dst(self)
                # we look up a function to calculate a tree in the cluster module:
                tree = getattr(cluster, keywords['tree_calc'])(
                    self._meta['distances'], self.cols, distances=keywords['distances'])
            else:
                tree = self._meta['tree']

            return _write_file(keywords['filename'], '{0}'.format(tree), fileformat)

        if fileformat in ['cluster', 'groups']:
            if 'distances' not in self._meta:
                self._meta['distances'] = wl2dst(self)  # check for keywords

            if 'groups' not in self._meta:
                self._meta['groups'] = cluster.matrix2groups(
                    keywords['threshold'], self._meta['distances'], self.taxa)
            lines = []
            for taxon, group in sorted(self._meta['groups'].items(), key=lambda x: x[0]):
                lines.append('{0}\t{1}'.format(taxon, group))
            return _write_file(keywords['filename'], lines, fileformat)

        if fileformat in ['starling', 'star.csv']:
            # make lambda inline for data-check
            l = lambda x: ['-' if x == 0 else x][0]

            lines = []
            if 'cognates' not in keywords:
                lines.append('ID\tConcept\t' + '\t'.join(self.taxa))
                for i, concept in enumerate(self.concepts):
                    for line in self.get_list(row=concept, entry=keywords['entry']):
                        lines.append(
                            str(i + 1) + '\t' + concept + '\t' + '\t'.join(
                                [l(t) for t in line]))
            else:
                lines.append(
                    'ID\tConcept\t' + '\t'.join(
                        ['{0}\t COG'.format(t) for t in self.taxa]))
                for i, concept in enumerate(self.concepts):
                    cogs = self.get_list(row=concept, entry=keywords['cognates'])
                    for j, line in enumerate(
                            self.get_list(row=concept, entry=keywords['entry'])):
                        part = '\t'.join(
                            '{0}\t{1}'.format(l(a), b) for a, b in zip(line, cogs[j]))
                        lines.append(util.tabjoin(i + 1, concept, part))

            return _write_file(
                keywords['filename'], lines, 'starling_' + keywords['entry'] + '.csv')

        if fileformat == 'multistate.nex':
            if not keywords['filename'].endswith('.multistate.nex'):
                keywords['filename'] += '.multistate.nex'

            matrix = wl2multistate(self, keywords['ref'], keywords['missing'])
            return multistate2nex(self.taxa, matrix, keywords['filename'])

        if fileformat == 'separated':
            if not os.path.isdir(keywords['filename']):
                os.mkdir(keywords['filename'])

            for l in self.cols:
                lines = [''] if 'ignore_keys' in keywords else ['ID\t']
                lines[0] += '\t'.join(x.upper() for x in keywords['entries'])
                for key in self.get_list(col=l, flat=True):
                    line = [] if 'ignore_keys' in keywords else [key]
                    for entry in keywords['entries']:
                        tmp = self[key, entry]
                        if isinstance(tmp, list):
                            tmp = ' '.join([str(x) for x in tmp])
                        line += [tmp]
                    lines.append('\t'.join('{0}'.format(x) for x in line))
                _write_file('{0}/{1}'.format(keywords['filename'], l), lines, 'tsv')

예제 #5

파일 보기

파일: wordlist.py 프로젝트: LinguList/lingpy

    def _output(self, fileformat, **keywords):
        """
        Internal function that eases its modification by daughter classes.
        """
        # check for stamp attribute
        keywords["stamp"] = getattr(self, '_stamp', '')

        # add the default parameters, they will be checked against the keywords
        util.setdefaults(
            keywords,
            cols=False,
            distances=False,
            entries=("concept", "counterpart"),
            entry='concept',
            fileformat=fileformat,
            filename=rcParams['filename'],
            formatter='concept',
            modify_ref=False,
            meta=self._meta,
            missing=0,
            prettify='false',
            ignore='all',
            ref='cogid',
            rows=False,
            subset=False,  # setup a subset of the data,
            taxa='taxa',
            threshold=0.6,  # threshold for flat clustering
            tree_calc='neighbor')

        if fileformat in ['triple', 'triples', 'triples.tsv']:
            return tsv2triple(self, keywords['filename'] + '.' + fileformat)

        if fileformat in ['paps.nex', 'paps.csv']:
            paps = self.get_paps(
                ref=keywords['ref'], entry=keywords['entry'], missing=keywords['missing'])
            kw = dict(filename=keywords['filename'] + '.paps')
            if fileformat == 'paps.nex':
                kw['missing'] = keywords['missing']
                return pap2nex(self.cols, paps, **kw)
            return pap2csv(self.cols, paps, **kw)

        # simple printing of taxa
        if fileformat == 'taxa':
            assert hasattr(self, 'taxa')
            return util.write_text_file(keywords['filename'] + '.taxa', self.cols)

        # csv-output
        if fileformat in ['csv', 'qlc', 'tsv']:

            # get the header line
            header = sorted(
                [s for s in set(self._alias.values()) if s in self._header],
                key=lambda x: self._header[x])
            header = [h.upper() for h in header]

            self._meta.setdefault('taxa', self.cols)

            # get the data, in case a subset is chosen
            if not keywords['subset']:
                # write stuff to file
                return wl2qlc(header, self._data, **keywords)

            cols, rows = keywords['cols'], keywords['rows']

            if not isinstance(cols, (list, tuple, bool)):
                raise ValueError("[i] Argument 'cols' should be list or tuple.")
            if not isinstance(rows, (dict, bool)):
                raise ValueError("[i] Argument 'rows' should be a dictionary.")

            # check for chosen header
            if cols:
                # get indices for header
                indices = [self._header[x] for x in cols]
                header = [c.upper() for c in cols]
            else:
                indices = [r for r in range(len(self.header))]

            if rows:
                stmts = []
                for key, value in rows.items():
                    if key == 'ID':
                        stmts += ["key " + value]
                    else:
                        idx = self._header[key]
                        stmts += ["line[{0}] ".format(idx) + value]

            log.debug("calculated what should be excluded")

            # get the data
            out = {}
            for key, line in self._data.items():
                log.debug(key)

                if rows:
                    if eval(" and ".join(stmts)):
                        out[key] = [line[i] for i in indices]
                else:
                    out[key] = [line[i] for i in indices]

            log.debug("passing data to wl2qlc")
            return wl2qlc(header, out, **keywords)

        # output dst-format (phylip)
        if fileformat == 'dst':
            # check for distances as keyword
            if 'distances' not in self._meta:
                self._meta['distances'] = wl2dst(self, **keywords)

            out = matrix2dst(self._meta['distances'], self.taxa,
                    stamp=keywords['stamp'], taxlen=keywords.get('taxlen', 0))
            return _write_file(keywords['filename'], out, fileformat)

        # output tre-format (newick)
        if fileformat in ['tre', 'nwk']:  # ,'cluster','groups']:
            if 'tree' not in self._meta:
                # check for distances
                if 'distances' not in self._meta:
                    self._meta['distances'] = wl2dst(self)
                # we look up a function to calculate a tree in the cluster module:
                tree = getattr(cluster, keywords['tree_calc'])(
                    self._meta['distances'], self.cols, distances=keywords['distances'])
            else:
                tree = self._meta['tree']

            return _write_file(keywords['filename'], '{0}'.format(tree), fileformat)

        if fileformat in ['cluster', 'groups']:
            if 'distances' not in self._meta:
                self._meta['distances'] = wl2dst(self)  # check for keywords

            if 'groups' not in self._meta:
                self._meta['groups'] = cluster.matrix2groups(
                    keywords['threshold'], self._meta['distances'], self.taxa)
            lines = []
            for taxon, group in sorted(self._meta['groups'].items(), key=lambda x: x[0]):
                lines.append('{0}\t{1}'.format(taxon, group))
            return _write_file(keywords['filename'], lines, fileformat)

        if fileformat in ['starling', 'star.csv']:
            # make lambda inline for data-check
            l = lambda x: ['-' if x == 0 else x][0]

            lines = []
            if 'cognates' not in keywords:
                lines.append('ID\tConcept\t' + '\t'.join(self.taxa))
                for i, concept in enumerate(self.concepts):
                    for line in self.get_list(row=concept, entry=keywords['entry']):
                        lines.append(
                            str(i + 1) + '\t' + concept + '\t' + '\t'.join(
                                [l(t) for t in line]))
            else:
                lines.append(
                    'ID\tConcept\t' + '\t'.join(
                        ['{0}\t COG'.format(t) for t in self.taxa]))
                for i, concept in enumerate(self.concepts):
                    cogs = self.get_list(row=concept, entry=keywords['cognates'])
                    for j, line in enumerate(
                            self.get_list(row=concept, entry=keywords['entry'])):
                        part = '\t'.join(
                            '{0}\t{1}'.format(l(a), b) for a, b in zip(line, cogs[j]))
                        lines.append(util.tabjoin(i + 1, concept, part))

            return _write_file(
                keywords['filename'], lines, 'starling_' + keywords['entry'] + '.csv')

        if fileformat == 'multistate.nex':
            if not keywords['filename'].endswith('.multistate.nex'):
                keywords['filename'] += '.multistate.nex'

            matrix = wl2multistate(self, keywords['ref'], keywords['missing'])
            return multistate2nex(self.taxa, matrix, keywords['filename'])

        if fileformat == 'separated':
            if not os.path.isdir(keywords['filename']):
                os.mkdir(keywords['filename'])

            for l in self.cols:
                lines = [''] if 'ignore_keys' in keywords else ['ID\t']
                lines[0] += '\t'.join(x.upper() for x in keywords['entries'])
                for key in self.get_list(col=l, flat=True):
                    line = [] if 'ignore_keys' in keywords else [key]
                    for entry in keywords['entries']:
                        tmp = self[key, entry]
                        if isinstance(tmp, list):
                            tmp = ' '.join([str(x) for x in tmp])
                        line += [tmp]
                    lines.append('\t'.join('{0}'.format(x) for x in line))
                _write_file('{0}/{1}'.format(keywords['filename'], l), lines, 'tsv')

예제 #6

파일 보기

def wl2qlc(header, data, filename='', formatter='concept', **keywords):
    """
    Write the basic data of a wordlist to file.
    """
    util.setdefaults(keywords,
                     ignore=['taxa', 'doculects', 'msa'],
                     fileformat='qlc',
                     prettify=True)
    if keywords['ignore'] == 'all':
        keywords['ignore'] = [
            'taxa', 'scorer', 'meta', 'distances', 'doculects', 'msa', 'json'
        ]

    formatter = formatter.upper()
    if not filename:
        filename = rcParams['filename']

    # create output string
    out = '# Wordlist\n' if keywords['prettify'] else ''

    # write meta to file
    meta = keywords.get("meta", {})
    kvpairs = {}
    jsonpairs = {}
    msapairs = {}
    trees = {}
    distances = ''
    taxa = ''
    scorer = ''

    for k, v in meta.items():
        # simple key-value-pairs
        if isinstance(v, (str, int)) or k == "tree":
            kvpairs[k] = v
        elif k == 'msa' and k not in keywords['ignore']:
            # go a level deeper, checking for keys
            for ref in v:
                if ref not in msapairs:
                    msapairs[ref] = {}
                for a, b in v[ref].items():
                    msapairs[ref][a] = b
        elif k == 'distances':
            distances = matrix2dst(v, meta['taxa'])
        elif k in ['taxa', 'doculect', 'taxon', 'doculects']:
            # we need to find a better solution here, since it is not nice to
            # have taxa written to json again and again
            pass
        elif k == 'trees' and k not in keywords['ignore']:
            trees = ''
            for key, value in v.items():
                trees += '<tre id="{0}">\n{1}\n</tre>\n'.format(key, value)
        elif k == 'scorer' and k not in keywords['ignore']:
            for key, value in v.items():
                scorer += '<{2} id="{0}">\n{1}</{2}>\n\n'.format(
                    key, scorer2str(value), k)
        else:
            # check whether serialization works
            try:
                json.dumps(v)
                jsonpairs[k] = v
            except TypeError:
                pass

    if kvpairs and 'meta' not in keywords['ignore']:
        out += '\n# META\n' if keywords['prettify'] else ''
        for k, v in sorted(kvpairs.items(), key=lambda x: x[0]):
            out += '@{0}:{1}\n'.format(k, v)
    if taxa and keywords['taxa']:
        out += '\n# TAXA\n<taxa>\n' + taxa + '\n</taxa>\n'
    if jsonpairs and 'json' not in keywords['ignore']:
        out += "@json: " + json.dumps(jsonpairs) + '\n'
    if msapairs and 'msa' not in keywords['ignore']:
        for ref in msapairs:
            out += "\n# MSA reference: {0}\n".format(ref)
            for k, v in msapairs[ref].items():
                if 'consensus' in v:
                    out += '#\n<msa '
                    out += 'id="{0}" ref="{1}" consensus="{2}">\n'.format(
                        k, ref, ' '.join(v['consensus']))
                else:
                    out += '#\n<msa id="{0}" ref="{1}">\n'.format(k, ref)
                outs = msa2str(v, wordlist=True)
                out += outs
                out += "</msa>\n"

    if distances and 'distances' not in keywords['ignore']:
        out += '\n# DISTANCES\n<dst>\n'
        out += distances + '</dst>\n'

    if trees:
        out += '\n# TREES\n' + trees

    if scorer and 'scorer' not in keywords['ignore']:
        out += '\n# SCORER\n' + scorer

    out += '\n# DATA\n' if keywords['prettify'] else ''
    out += 'ID\t' + '\t'.join(header) + '\n'

    # check for gloss in header to create nice output format
    if formatter in header:
        idx = header.index(formatter)
        formatter = None
        sorted_data = sorted(data.keys(), key=lambda x: data[x][idx])
    elif len(formatter.split(',')) == 2:
        idxA, idxB = formatter.split(',')
        idxA = header.index(idxA)
        idxB = header.index(idxB)
        idx = idxA
        formatter = None
        sorted_data = sorted(data.keys(),
                             key=lambda x: (data[x][idxA], data[x][idxB]))
    else:
        idx = False
        formatter = ''
        sorted_data = sorted(data.keys())

    for key in sorted_data:
        # get the line
        line = data[key]

        # check for formatter
        if idx in range(len(line)):
            if line[idx] != formatter:
                out += '#\n' if keywords['prettify'] else ''
                formatter = line[idx]

        # add the key
        out += str(key)

        # add the rest of the values
        for value in line:
            if type(value) == list:
                try:
                    out += '\t' + ' '.join(value)
                except:
                    out += '\t' + ' '.join([str(v) for v in value])
            elif type(value) == int:
                out += '\t' + str(value)
            elif type(value) == float:
                out += '\t{0:.4f}'.format(value)
            elif value is None:
                out += '\t'
            else:
                out += '\t{:}'.format(value)
        out += '\n'

    util.write_text_file(filename + '.' + keywords['fileformat'],
                         out + keywords.get('stamp', ''),
                         normalize="NFC")
    return

예제 #7

파일 보기

파일: ops.py 프로젝트: LinguList/lingpy

def wl2qlc(
        header,
        data,
        filename='',
        formatter='concept',
        **keywords):
    """
    Write the basic data of a wordlist to file.
    """
    util.setdefaults(
        keywords,
        ignore=['taxa', 'doculects', 'msa'],
        fileformat='qlc',
        prettify=True)
    if keywords['ignore'] == 'all':
        keywords['ignore'] = [
            'taxa', 'scorer', 'meta', 'distances', 'doculects', 'msa', 'json']

    formatter = formatter.upper()
    if not filename:
        filename = rcParams['filename']

    # create output string
    out = '# Wordlist\n' if keywords['prettify'] else ''

    # write meta to file
    meta = keywords.get("meta", {})
    kvpairs = {}
    jsonpairs = {}
    msapairs = {}
    trees = {}
    distances = ''
    taxa = ''
    scorer = ''

    for k, v in meta.items():
        # simple key-value-pairs
        if isinstance(v, (text_type, int)) or k == "tree":
            kvpairs[k] = v
        elif k == 'msa' and k not in keywords['ignore']:
            # go a level deeper, checking for keys
            for ref in v:
                if ref not in msapairs:
                    msapairs[ref] = {}
                for a, b in v[ref].items():
                    msapairs[ref][a] = b
        elif k == 'distances':
            distances = matrix2dst(v, meta['taxa'])
        elif k in ['taxa', 'doculect', 'taxon', 'doculects']:
            # we need to find a better solution here, since it is not nice to
            # have taxa written to json again and again
            pass
        elif k == 'trees' and k not in keywords['ignore']:
            trees = ''
            for key, value in v.items():
                trees += '<tre id="{0}">\n{1}\n</tre>\n'.format(key, value)
        elif k == 'scorer' and k not in keywords['ignore']:
            for key, value in v.items():
                scorer += '<{2} id="{0}">\n{1}</{2}>\n\n'.format(
                    key, scorer2str(value), k)
        else:
            # check whether serialization works
            try:
                json.dumps(v)
                jsonpairs[k] = v
            except TypeError:
                pass

    if kvpairs and 'meta' not in keywords['ignore']:
        out += '\n# META\n' if keywords['prettify'] else ''
        for k, v in sorted(kvpairs.items(), key=lambda x: x[0]):
            out += '@{0}:{1}\n'.format(k, v)
    if taxa and keywords['taxa']:
        out += '\n# TAXA\n<taxa>\n' + taxa + '\n</taxa>\n'
    if jsonpairs and 'json' not in keywords['ignore']:
        out += "@json: " + json.dumps(jsonpairs) + '\n'
    if msapairs and 'msa' not in keywords['ignore']:
        for ref in msapairs:
            out += "\n# MSA reference: {0}\n".format(ref)
            for k, v in msapairs[ref].items():
                if 'consensus' in v:
                    out += '#\n<msa '
                    out += 'id="{0}" ref="{1}" consensus="{2}">\n'.format(
                        k, ref, ' '.join(v['consensus']))
                else:
                    out += '#\n<msa id="{0}" ref="{1}">\n'.format(k, ref)
                outs = msa2str(v, wordlist=True)
                out += outs
                out += "</msa>\n"

    if distances and 'distances' not in keywords['ignore']:
        out += '\n# DISTANCES\n<dst>\n'
        out += distances + '</dst>\n'

    if trees:
        out += '\n# TREES\n' + trees

    if scorer and 'scorer' not in keywords['ignore']:
        out += '\n# SCORER\n' + scorer

    out += '\n# DATA\n' if keywords['prettify'] else ''
    out += 'ID\t' + '\t'.join(header) + '\n'

    # check for gloss in header to create nice output format
    if formatter in header:
        idx = header.index(formatter)
        formatter = None
        sorted_data = sorted(data.keys(), key=lambda x: data[x][idx])
    elif len(formatter.split(',')) == 2:
        idxA, idxB = formatter.split(',')
        idxA = header.index(idxA)
        idxB = header.index(idxB)
        idx = idxA
        formatter = None
        sorted_data = sorted(data.keys(), key=lambda x: (
            data[x][idxA], data[x][idxB]))
    else:
        idx = False
        formatter = ''
        sorted_data = sorted(data.keys())

    for key in sorted_data:
        # get the line
        line = data[key]

        # check for formatter
        if idx in range(len(line)):
            if line[idx] != formatter:
                out += '#\n' if keywords['prettify'] else ''
                formatter = line[idx]

        # add the key
        out += text_type(key)

        # add the rest of the values
        for value in line:
            if type(value) == list:
                try:
                    out += '\t' + ' '.join(value)
                except:
                    out += '\t' + ' '.join([text_type(v) for v in value])
            elif type(value) == int:
                out += '\t' + text_type(value)
            elif type(value) == float:
                out += '\t{0:.4f}'.format(value)
            elif value is None:
                out += '\t'
            else:
                out += '\t{:}'.format(value)
        out += '\n'

    util.write_text_file(
        filename + '.' + keywords['fileformat'],
        out + keywords.get('stamp', ''),
        normalize="NFC")
    return