def main(fin, fin_gcode, fin_taxa, fout):
    """
    @param fin: interleaved phylip codon alignment file open for reading
    @param fin_gcode: open file for reading the genetic code
    @param fin_taxa: optional open file for defining taxon subset and order
    @param fout: open file for writing the integer ndarray as text
    """

    # read the description of the genetic code
    arr = list(csv.reader(fin_gcode, delimiter='\t'))
    indices, aminos, codons = zip(*arr)
    if [int(x) for x in indices] != range(len(indices)):
        raise ValueError

    # read the interleaved phylip alignment
    taxon_names = None
    cols = []
    for col in phylip.read_interleaved_codon_alignment(fin):
        if taxon_names is None:
            taxon_names = col
        else:
            cols.append(col)

    # define the ndarray of integers
    M_full = design.get_pattern_array(codons, cols)

    if fin_taxa is None:

        M = M_full

    else:

        # read the ordered taxon subset
        arr = list(csv.reader(fin_taxa, delimiter='\t'))
        indices, requested_taxa = zip(*arr)
        if [int(x) for x in indices] != range(len(indices)):
            raise ValueError

        # init the pattern ndarray with unknown codon states
        M = np.empty((len(cols), len(requested_taxa)), dtype=int)
        M.fill(-1)

        # construct the inverse map of the default taxon ordering
        name_to_phlip_index = dict((x, i) for i, x in enumerate(taxon_names))

        # Redefine the columns according to the user ordering and subsetting.
        # In this code we are pretending to be a database software.
        for i, name in enumerate(requested_taxa):
            phylip_index = name_to_phlip_index.get(name, None)
            if phylip_index is not None:
                M[:, i] = M_full[:, phylip_index]

    # write the ndarray of integers
    np.savetxt(fout, M, fmt='%d', delimiter='\t')
def main(args):

    # read the description of the genetic code
    with open(args.code) as fin_gcode:
        arr = list(csv.reader(fin_gcode, delimiter='\t'))
        indices, aminos, codons = zip(*arr)
        if [int(x) for x in indices] != range(len(indices)):
            raise ValueError

    # read the input
    with fileutil.open_or_stdin(args.i) as fin:
        paragraphs = list(gen_paragraphs(fin))

    human_header = paragraphs[1][0]
    human_lines = paragraphs[1][1:]
    chimp_header = paragraphs[2][0]
    chimp_lines = paragraphs[2][1:]

    if human_header != 'Human_Horai':
        raise ValueError
    if chimp_header != 'Chimp_Horai':
        raise ValueError

    human_dna = ''.join(human_lines)
    human_codons = [human_dna[i:i+3] for i in range(0, len(human_dna), 3)]

    chimp_dna = ''.join(chimp_lines)
    chimp_codons = [chimp_dna[i:i+3] for i in range(0, len(chimp_dna), 3)]

    codon_alignment_columns = zip(*(human_codons, chimp_codons))

    patterns = design.get_pattern_array(codons, codon_alignment_columns)

    ncodons = len(codons)
    counts = np.zeros((ncodons, ncodons), dtype=int)
    for i, j in patterns:
        counts[i, j] += 1

    # write the (ncodons, ncodons) array of counts of human to chimp changes
    with fileutil.open_or_stdout(args.counts_out, 'w') as fout:
        np.savetxt(fout, counts, fmt='%g', delimiter='\t')