def main(fin, fin_gcode, fin_taxa, fout): """ @param fin: interleaved phylip codon alignment file open for reading @param fin_gcode: open file for reading the genetic code @param fin_taxa: optional open file for defining taxon subset and order @param fout: open file for writing the integer ndarray as text """ # read the description of the genetic code arr = list(csv.reader(fin_gcode, delimiter='\t')) indices, aminos, codons = zip(*arr) if [int(x) for x in indices] != range(len(indices)): raise ValueError # read the interleaved phylip alignment taxon_names = None cols = [] for col in phylip.read_interleaved_codon_alignment(fin): if taxon_names is None: taxon_names = col else: cols.append(col) # define the ndarray of integers M_full = design.get_pattern_array(codons, cols) if fin_taxa is None: M = M_full else: # read the ordered taxon subset arr = list(csv.reader(fin_taxa, delimiter='\t')) indices, requested_taxa = zip(*arr) if [int(x) for x in indices] != range(len(indices)): raise ValueError # init the pattern ndarray with unknown codon states M = np.empty((len(cols), len(requested_taxa)), dtype=int) M.fill(-1) # construct the inverse map of the default taxon ordering name_to_phlip_index = dict((x, i) for i, x in enumerate(taxon_names)) # Redefine the columns according to the user ordering and subsetting. # In this code we are pretending to be a database software. for i, name in enumerate(requested_taxa): phylip_index = name_to_phlip_index.get(name, None) if phylip_index is not None: M[:, i] = M_full[:, phylip_index] # write the ndarray of integers np.savetxt(fout, M, fmt='%d', delimiter='\t')
def main(args): # read the description of the genetic code with open(args.code) as fin_gcode: arr = list(csv.reader(fin_gcode, delimiter='\t')) indices, aminos, codons = zip(*arr) if [int(x) for x in indices] != range(len(indices)): raise ValueError # read the input with fileutil.open_or_stdin(args.i) as fin: paragraphs = list(gen_paragraphs(fin)) human_header = paragraphs[1][0] human_lines = paragraphs[1][1:] chimp_header = paragraphs[2][0] chimp_lines = paragraphs[2][1:] if human_header != 'Human_Horai': raise ValueError if chimp_header != 'Chimp_Horai': raise ValueError human_dna = ''.join(human_lines) human_codons = [human_dna[i:i+3] for i in range(0, len(human_dna), 3)] chimp_dna = ''.join(chimp_lines) chimp_codons = [chimp_dna[i:i+3] for i in range(0, len(chimp_dna), 3)] codon_alignment_columns = zip(*(human_codons, chimp_codons)) patterns = design.get_pattern_array(codons, codon_alignment_columns) ncodons = len(codons) counts = np.zeros((ncodons, ncodons), dtype=int) for i, j in patterns: counts[i, j] += 1 # write the (ncodons, ncodons) array of counts of human to chimp changes with fileutil.open_or_stdout(args.counts_out, 'w') as fout: np.savetxt(fout, counts, fmt='%g', delimiter='\t')