def interleaved_phy(msa, add_flag = False, max_name_length = _MAX_NAME_LENGTH): MSA.validate(msa) header = "%i %i" % (len(msa), msa.seqlen()) if add_flag: header += " I" result = [header, ""] padded_len = min(max_name_length, max(len(name) for name in msa.names())) + 2 padded_len -= padded_len % -(_BLOCK_SIZE + _BLOCK_SPACING) + _BLOCK_SPACING streams = [] spacing = " " * _BLOCK_SPACING for record in sorted(msa): name = record.name[:max_name_length] padding = (padded_len - len(name)) * " " lines = [] line = [name, padding] for block in grouper(_BLOCK_SIZE, record.sequence, fillvalue = ""): block = "".join(block) if sum(len(segment) for segment in line) >= _LINE_SIZE: lines.append("".join(line)) line = [block] else: line.extend((spacing, block)) lines.append("".join(line)) streams.append(lines) for rows in zip(*streams): result.extend(row for row in rows) result.append("") result.pop() return "\n".join(result)
def _read_sequences(filenames): results = {} for filename in filenames: results[filename] = MSA.from_file(filename) MSA.validate(*results.values()) return results.iteritems()
def _run(self, _config, temp): # Read and check that MSAs share groups msas = [MSA.from_file(filename) for filename in sorted(self.input_files)] MSA.validate(*msas) blocks = [] for msa in msas: blocks.append(sequential_phy(msa, add_flag = self._add_flag)) with open(reroot_path(temp, self._out_phy), "w") as output: output.write("\n\n".join(blocks))
def _run(self, _config, temp): # Read and check that MSAs share groups msas = [ MSA.from_file(filename) for filename in sorted(self.input_files) ] MSA.validate(*msas) blocks = [] for msa in msas: blocks.append(sequential_phy(msa, add_flag=self._add_flag)) with open(reroot_path(temp, self._out_phy), "w") as output: output.write("\n\n".join(blocks))
def sequential_phy(msa, add_flag = False, max_name_length = _MAX_NAME_LENGTH): MSA.validate(msa) header = "%i %i" % (len(msa), msa.seqlen()) if add_flag: header += " S" spacing = " " * _BLOCK_SPACING result = [header, ""] for record in sorted(msa): result.append(record.name[:max_name_length]) blocks = grouper(_BLOCK_SIZE, record.sequence, fillvalue = "") lines = grouper(_NUM_BLOCKS, blocks) for line in lines: result.append(spacing.join("".join(block) for block in line if block)) return "\n".join(result)
def sequential_phy(msa, add_flag=False, max_name_length=_MAX_NAME_LENGTH): MSA.validate(msa) header = "%i %i" % (len(msa), msa.seqlen()) if add_flag: header += " S" spacing = " " * _BLOCK_SPACING result = [header, ""] for record in sorted(msa): result.append(record.name[:max_name_length]) blocks = grouper(_BLOCK_SIZE, record.sequence, fillvalue="") lines = grouper(_NUM_BLOCKS, blocks) for line in lines: result.append( spacing.join("".join(block) for block in line if block)) return "\n".join(result)
def interleaved_phy(msa, add_flag=False, max_name_length=_MAX_NAME_LENGTH): MSA.validate(msa) header = "%i %i" % (len(msa), msa.seqlen()) if add_flag: header += " I" result = [header, ""] padded_len = min(max_name_length, max(len(name) for name in msa.names())) + 2 padded_len -= padded_len % -(_BLOCK_SIZE + _BLOCK_SPACING) + _BLOCK_SPACING streams = [] spacing = " " * _BLOCK_SPACING for record in sorted(msa): name = record.name[:max_name_length] padding = (padded_len - len(name)) * " " lines = [] line = [name, padding] for block in grouper(_BLOCK_SIZE, record.sequence, fillvalue=""): block = "".join(block) if sum(len(segment) for segment in line) >= _LINE_SIZE: lines.append("".join(line)) line = [block] else: line.extend((spacing, block)) lines.append("".join(line)) streams.append(lines) for rows in zip(*streams): result.extend(row for row in rows) result.append("") result.pop() return "\n".join(result)
def test_msa_validate__missing_names_second(): msa_1 = copy.copy(_JOIN_MSA_1) msa_2 = MSA(list(_JOIN_MSA_2)[:-1]) with pytest.raises(MSAError): MSA.validate(msa_1, msa_2)