def test_wl2qlc(self): from lingpy.basic.ops import wl2qlc stamp = 'test-stamp' out = self.tmp_path('test') wl2qlc(self.wordlist.header, self.wordlist._data, filename=str(out), stamp=stamp) out = self.tmp_path('test.qlc') with out.open(encoding='utf8') as fp: self.assertTrue(fp.read().endswith(stamp))
def test_wl2qlc(self): stamp = 'test-stamp' out = self.tmp_path('test') wl2qlc(self.wordlist.header, self.wordlist._data, filename=out.as_posix(), stamp=stamp) out = self.tmp_path('test.qlc') with out.open(encoding='utf8') as fp: self.assertTrue(fp.read().endswith(stamp)) # load a worldist with alignments and otuput it as string with msapairs tmp = Alignments(test_data('good_file.tsv'), ref='cogid') tmp.align(ref="cogid") wl2qlc(tmp.header, tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[]) tmp.get_consensus(ref="cogid") wl2qlc([h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect,concept") wl2qlc([h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect")
def test_wl2qlc(self): stamp = 'test-stamp' out = self.tmp_path('test') wl2qlc(self.wordlist.header, self.wordlist._data, filename=out.as_posix(), stamp=stamp) out = self.tmp_path('test.qlc') with out.open(encoding='utf8') as fp: self.assertTrue(fp.read().endswith(stamp)) # load a worldist with alignments and otuput it as string with msapairs tmp = Alignments(test_data('good_file.tsv'), ref='cogid') tmp.align(ref="cogid") wl2qlc(tmp.header, tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[]) tmp.get_consensus(ref="cogid") wl2qlc([ h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x]) ], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect,concept") wl2qlc([ h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x]) ], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect")
def test_wl2qlc(tmppath, test_data, wordlist): stamp = 'test-stamp' out = tmppath / 'test' wl2qlc(wordlist.header, wordlist._data, filename=str(out), stamp=stamp) out = tmppath / 'test.qlc' assert out.read_text(encoding='utf8').endswith(stamp) # load a worldist with alignments and otuput it as string with msapairs tmp = Alignments(str(test_data / 'good_file.tsv'), ref='cogid') tmp.align(ref="cogid") wl2qlc(tmp.header, tmp._data, meta=tmp._meta, filename=str(out), stamp='stampo', ignore=[]) tmp.get_consensus(ref="cogid") wl2qlc( [h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect,concept") wl2qlc( [h.upper() for h in sorted(tmp.header, key=lambda x: tmp.header[x])], tmp._data, meta=tmp._meta, filename=out.as_posix(), stamp='stampo', ignore=[], formatter="doculect")
def _output(self, fileformat, **keywords): """ Internal function that eases its modification by daughter classes. """ # check for stamp attribute keywords["stamp"] = getattr(self, '_stamp', '') # add the default parameters, they will be checked against the keywords util.setdefaults( keywords, cols=False, distances=False, entries=("concept", "counterpart"), entry='concept', fileformat=fileformat, filename=rcParams['filename'], formatter='concept', modify_ref=False, meta=self._meta, missing=0, prettify='false', ignore='all', ref='cogid', rows=False, subset=False, # setup a subset of the data, taxa='taxa', threshold=0.6, # threshold for flat clustering tree_calc='neighbor') if fileformat in ['triple', 'triples', 'triples.tsv']: return tsv2triple(self, keywords['filename'] + '.' + fileformat) if fileformat in ['paps.nex', 'paps.csv']: paps = self.get_paps( ref=keywords['ref'], entry=keywords['entry'], missing=keywords['missing']) kw = dict(filename=keywords['filename'] + '.paps') if fileformat == 'paps.nex': kw['missing'] = keywords['missing'] return pap2nex(self.cols, paps, **kw) return pap2csv(self.cols, paps, **kw) # simple printing of taxa if fileformat == 'taxa': assert hasattr(self, 'taxa') return util.write_text_file(keywords['filename'] + '.taxa', self.cols) # csv-output if fileformat in ['csv', 'qlc', 'tsv']: # get the header line header = sorted( [s for s in set(self._alias.values()) if s in self._header], key=lambda x: self._header[x]) header = [h.upper() for h in header] self._meta.setdefault('taxa', self.cols) # get the data, in case a subset is chosen if not keywords['subset']: # write stuff to file return wl2qlc(header, self._data, **keywords) cols, rows = keywords['cols'], keywords['rows'] if not isinstance(cols, (list, tuple, bool)): raise ValueError("[i] Argument 'cols' should be list or tuple.") if not isinstance(rows, (dict, bool)): raise ValueError("[i] Argument 'rows' should be a dictionary.") # check for chosen header if cols: # get indices for header indices = [self._header[x] for x in cols] header = [c.upper() for c in cols] else: indices = [r for r in range(len(self.header))] if rows: stmts = [] for key, value in rows.items(): if key == 'ID': stmts += ["key " + value] else: idx = self._header[key] stmts += ["line[{0}] ".format(idx) + value] log.debug("calculated what should be excluded") # get the data out = {} for key, line in self._data.items(): log.debug(key) if rows: if eval(" and ".join(stmts)): out[key] = [line[i] for i in indices] else: out[key] = [line[i] for i in indices] log.debug("passing data to wl2qlc") return wl2qlc(header, out, **keywords) # output dst-format (phylip) if fileformat == 'dst': # check for distances as keyword if 'distances' not in self._meta: self._meta['distances'] = wl2dst(self, **keywords) out = matrix2dst(self._meta['distances'], self.taxa, stamp=keywords['stamp'], taxlen=keywords.get('taxlen', 0)) return _write_file(keywords['filename'], out, fileformat) # output tre-format (newick) if fileformat in ['tre', 'nwk']: # ,'cluster','groups']: if 'tree' not in self._meta: # check for distances if 'distances' not in self._meta: self._meta['distances'] = wl2dst(self) # we look up a function to calculate a tree in the cluster module: tree = getattr(cluster, keywords['tree_calc'])( self._meta['distances'], self.cols, distances=keywords['distances']) else: tree = self._meta['tree'] return _write_file(keywords['filename'], '{0}'.format(tree), fileformat) if fileformat in ['cluster', 'groups']: if 'distances' not in self._meta: self._meta['distances'] = wl2dst(self) # check for keywords if 'groups' not in self._meta: self._meta['groups'] = cluster.matrix2groups( keywords['threshold'], self._meta['distances'], self.taxa) lines = [] for taxon, group in sorted(self._meta['groups'].items(), key=lambda x: x[0]): lines.append('{0}\t{1}'.format(taxon, group)) return _write_file(keywords['filename'], lines, fileformat) if fileformat in ['starling', 'star.csv']: # make lambda inline for data-check l = lambda x: ['-' if x == 0 else x][0] lines = [] if 'cognates' not in keywords: lines.append('ID\tConcept\t' + '\t'.join(self.taxa)) for i, concept in enumerate(self.concepts): for line in self.get_list(row=concept, entry=keywords['entry']): lines.append( str(i + 1) + '\t' + concept + '\t' + '\t'.join( [l(t) for t in line])) else: lines.append( 'ID\tConcept\t' + '\t'.join( ['{0}\t COG'.format(t) for t in self.taxa])) for i, concept in enumerate(self.concepts): cogs = self.get_list(row=concept, entry=keywords['cognates']) for j, line in enumerate( self.get_list(row=concept, entry=keywords['entry'])): part = '\t'.join( '{0}\t{1}'.format(l(a), b) for a, b in zip(line, cogs[j])) lines.append(util.tabjoin(i + 1, concept, part)) return _write_file( keywords['filename'], lines, 'starling_' + keywords['entry'] + '.csv') if fileformat == 'multistate.nex': if not keywords['filename'].endswith('.multistate.nex'): keywords['filename'] += '.multistate.nex' matrix = wl2multistate(self, keywords['ref'], keywords['missing']) return multistate2nex(self.taxa, matrix, keywords['filename']) if fileformat == 'separated': if not os.path.isdir(keywords['filename']): os.mkdir(keywords['filename']) for l in self.cols: lines = [''] if 'ignore_keys' in keywords else ['ID\t'] lines[0] += '\t'.join(x.upper() for x in keywords['entries']) for key in self.get_list(col=l, flat=True): line = [] if 'ignore_keys' in keywords else [key] for entry in keywords['entries']: tmp = self[key, entry] if isinstance(tmp, list): tmp = ' '.join([str(x) for x in tmp]) line += [tmp] lines.append('\t'.join('{0}'.format(x) for x in line)) _write_file('{0}/{1}'.format(keywords['filename'], l), lines, 'tsv')