Python stripped_lines Examples, iterutils.stripped_lines Python Examples

Example #1

0

Show file

File: 20081201a.py Project: BIGtigr/xgcode

def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree
        # and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            msg = 'expected at least 4 tips but found ' + str(len(tip_names))
            raise HandlingError(msg)
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # get the threshold for negligibility of an eigenvector loading
    epsilon = fs.epsilon
    if not (0 <= epsilon < 1):
        raise HandlingError('invalid threshold for negligibility')
    # get the set of selected options
    selected_options = fs.options
    # analyze each tree
    results = []
    for tree in trees:
        results.append(AnalysisResult(tree, epsilon))
    # create the response
    out = StringIO()
    for result in results:
        for line in result.get_response_lines(selected_options):
            print >> out, line
        print >> out
    # return the response
    return out.getvalue()

Example #2

0

Show file

File: 20100603e.py Project: BIGtigr/xgcode

def process(hud_lines, matpheno_lines):
    """
    @param hud_lines: lines of a .hud file
    @param matpheno_lines: lines of a MAT_pheno.txt file
    @return: contents of an .ind file
    """
    # get the ordered names from the .hud file
    names, hud_data = hud.decode(hud_lines)
    # get case and control status from the matpheno file
    cases = set()
    controls = set()
    for line in iterutils.stripped_lines(matpheno_lines):
        name, classification = line.split(None, 1)
        if classification == '1':
            cases.add(name)
        elif classification == '2':
            controls.add(name)
        elif classification in ('12', 'null'):
            # skip individuals classified like this
            pass
        else:
            msg = 'invalid MAT_pheno classification: ' + classification
            raise Exception(msg)
    # write the .ind file contents
    out = StringIO()
    for name in names:
        gender = 'U'
        classification = 'Ignore'
        if name in cases:
            classification = 'Case'
        elif name in controls:
            classification = 'Control'
        row = [name, gender, classification]
        print >> out, '\t'.join(row)
    return out.getvalue().rstrip()

Example #3

0

Show file

File: 20081201a.py Project: argriffing/xgcode

def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree
        # and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            msg = 'expected at least 4 tips but found ' + str(len(tip_names))
            raise HandlingError(msg)
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # get the threshold for negligibility of an eigenvector loading
    epsilon = fs.epsilon
    if not (0 <= epsilon < 1):
        raise HandlingError('invalid threshold for negligibility')
    # get the set of selected options
    selected_options = fs.options
    # analyze each tree
    results = []
    for tree in trees:
        results.append(AnalysisResult(tree, epsilon))
    # create the response
    out = StringIO()
    for result in results:
        for line in result.get_response_lines(selected_options):
            print >> out, line
        print >> out
    # return the response
    return out.getvalue()

Example #4

0

Show file

File: 20090318a.py Project: BIGtigr/xgcode

def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips but found ' +
                                str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # begin the response
    out = StringIO()
    # look at each tree
    nerrors = 0
    ncounterexamples = 0
    for tree in trees:
        # get the set of valid partitions implied by the tree
        valid_parts = TreeComparison.get_partitions(tree)
        ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
        # assert that the partition implied by the correct formula is valid
        D = np.array(tree.get_distance_matrix(ordered_tip_names))
        loadings = get_principal_coordinate(D)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            nerrors += 1
            print >> out, 'error: a partition that was supposed to be valid was found to be invalid'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
        # check the validity of the partition implied by the incorrect formula
        Q = D * D
        loadings = get_principal_coordinate(Q)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            ncounterexamples += 1
            print >> out, 'found a counterexample!'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
    print >> out, 'errors found:', nerrors
    print >> out, 'counterexamples found:', ncounterexamples
    # return the response
    return out.getvalue()

Example #5

0

Show file

File: Hyphy.py Project: argriffing/xgcode

def get_hyphy_namespace(lines):
    """
    @param lines: lines of HyPhy output
    @return: a HyphyNamespace object
    """
    # process each line of the hyphy output
    ns = HyphyNamespace()
    for line in iterutils.stripped_lines(lines):
        ns.process_line(line)
    return ns

Example #6

0

Show file

def get_hyphy_namespace(lines):
    """
    @param lines: lines of HyPhy output
    @return: a HyphyNamespace object
    """
    # process each line of the hyphy output
    ns = HyphyNamespace()
    for line in iterutils.stripped_lines(lines):
        ns.process_line(line)
    return ns

Example #7

0

Show file

File: 20080129b.py Project: argriffing/xgcode

def get_response_content(fs):
    # get a properly formatted newick tree with branch lengths
    tree = Newick.parse(fs.tree, SpatialTree.SpatialTree)
    tree.assert_valid()
    if tree.has_negative_branch_lengths():
        msg = 'drawing a tree with negative branch lengths is not implemented'
        raise HandlingError(msg)
    tree.add_branch_lengths()
    # get the dictionary mapping the branch name to the nucleotide
    name_to_nucleotide = {}
    # parse the column string
    for line in iterutils.stripped_lines(fs.column.splitlines()):
        name_string, nucleotide_string = SnippetUtil.get_state_value_pair(line)
        if nucleotide_string not in list('acgtACGT'):
            msg = '"%s" is not a valid nucleotide' % nucleotide_string
            raise HandlingError(msg)
        nucleotide_string = nucleotide_string.upper()
        if name_string in name_to_nucleotide:
            raise HandlingError('the name "%s" was duplicated' % name_string)
        name_to_nucleotide[name_string] = nucleotide_string
    # augment the tips with the nucleotide letters
    for name, nucleotide in name_to_nucleotide.items():
        try:
            node = tree.get_unique_node(name)
        except Newick.NewickSearchError as e:
            raise HandlingError(e)
        if node.children:
            msg = 'constraints on internal nodes are not implemented'
            raise HandlingError(msg)
        node.state = nucleotide
    # get the Jukes-Cantor rate matrix object
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states)
    # simulate the ancestral nucleotides
    rate_matrix_object.simulate_ancestral_states(tree)
    # simulate a path on each branch
    # this breaks up the branch into a linear sequence of nodes and adds color
    for node in tree.gen_non_root_nodes():
        simulate_branch_path(tree, node)
    # do the layout
    EqualArcLayout.do_layout(tree)
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return DrawTreeImage.get_tree_image(tree, (640, 480), ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)

Example #8

0

Show file

File: 20080129b.py Project: BIGtigr/xgcode

def get_response_content(fs):
    # get a properly formatted newick tree with branch lengths
    tree = Newick.parse(fs.tree, SpatialTree.SpatialTree)
    tree.assert_valid()
    if tree.has_negative_branch_lengths():
        msg = 'drawing a tree with negative branch lengths is not implemented'
        raise HandlingError(msg)
    tree.add_branch_lengths()
    # get the dictionary mapping the branch name to the nucleotide
    name_to_nucleotide = {}
    # parse the column string
    for line in iterutils.stripped_lines(fs.column.splitlines()):
        name_string, nucleotide_string = SnippetUtil.get_state_value_pair(line)
        if nucleotide_string not in list('acgtACGT'):
            msg = '"%s" is not a valid nucleotide' % nucleotide_string
            raise HandlingError(msg)
        nucleotide_string = nucleotide_string.upper()
        if name_string in name_to_nucleotide:
            raise HandlingError('the name "%s" was duplicated' % name_string)
        name_to_nucleotide[name_string] = nucleotide_string
    # augment the tips with the nucleotide letters
    for name, nucleotide in name_to_nucleotide.items():
        try:
            node = tree.get_unique_node(name)
        except Newick.NewickSearchError as e:
            raise HandlingError(e)
        if node.children:
            msg = 'constraints on internal nodes are not implemented'
            raise HandlingError(msg)
        node.state = nucleotide
    # get the Jukes-Cantor rate matrix object
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
        dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix,
                                               ordered_states)
    # simulate the ancestral nucleotides
    rate_matrix_object.simulate_ancestral_states(tree)
    # simulate a path on each branch
    # this breaks up the branch into a linear sequence of nodes and adds color
    for node in tree.gen_non_root_nodes():
        simulate_branch_path(tree, node)
    # do the layout
    EqualArcLayout.do_layout(tree)
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return DrawTreeImage.get_tree_image(tree, (640, 480), ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)

Example #9

0

Show file

File: 20090318a.py Project: argriffing/xgcode

def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # begin the response
    out = StringIO()
    # look at each tree
    nerrors = 0
    ncounterexamples = 0
    for tree in trees:
        # get the set of valid partitions implied by the tree
        valid_parts = TreeComparison.get_partitions(tree)
        ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
        # assert that the partition implied by the correct formula is valid
        D = np.array(tree.get_distance_matrix(ordered_tip_names))
        loadings = get_principal_coordinate(D)
        nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            nerrors += 1
            print >> out, 'error: a partition that was supposed to be valid was found to be invalid'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
        # check the validity of the partition implied by the incorrect formula
        Q = D * D
        loadings = get_principal_coordinate(Q)
        nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            ncounterexamples += 1
            print >> out, 'found a counterexample!'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
    print >> out, 'errors found:', nerrors
    print >> out, 'counterexamples found:', ncounterexamples
    # return the response
    return out.getvalue()

Example #10

0

Show file

File: 20080123a.py Project: argriffing/xgcode

def get_response_content(fs):
    # get a properly formatted newick tree with branch lengths
    tree = Newick.parse(fs.tree, SpatialTree.SpatialTree)
    tree.assert_valid()
    if tree.has_negative_branch_lengths():
        msg = 'drawing a tree with negative branch lengths is not implemented'
        raise HandlingError(msg)
    tree.add_branch_lengths()
    # get the dictionary mapping the branch name to the rgb color
    name_to_rgb = {}
    # parse the coloration string
    for line in iterutils.stripped_lines(fs.coloration.splitlines()):
        # get the branch and its color
        name_string, rgb_string = SnippetUtil.get_state_value_pair(line)
        rgb_string = rgb_string.upper()
        # validate the rgb string
        if len(rgb_string) != 6:
            msg = 'expected each rgb string to be six characters long'
            raise HandlingError(msg)
        bad_letters = set(rgb_string) - set('0123456789ABCDEFabcdef')
        if bad_letters:
            msg = 'found invalid rgb characters: %s' % str(tuple(bad_letters))
            raise HandlingError(msg)
        # associate the branch with its color
        name_to_rgb[name_string] = rgb_string
    # color the branches
    for name, rgb in name_to_rgb.items():
        try:
            node = tree.get_unique_node(name)
        except Newick.NewickSearchError as e:
            raise HandlingError(e)
        node.branch_color = rgb
    # do the layout
    try:
        layout = FastDaylightLayout.StraightBranchLayout()
        layout.do_layout(tree)
    except RuntimeError as e:
        pass
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return DrawTreeImage.get_tree_image(tree, (640, 480), ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)

Example #11

0

Show file

def process(raw_lines):
    """
    @param lines: lines of an .ind file
    @return: the single string of a .pheno file
    """
    values = []
    for line in iterutils.stripped_lines(raw_lines):
        name, gender, status = line.split()
        if status == 'Control':
            v = '0'
        elif status == 'Case':
            v = '1'
        elif status == 'Ignore':
            v = '9'
        else:
            msg = 'Invalid status: ' + status
            raise Exception(msg)
        values.append(v)
    return ''.join(values)

Example #12

0

Show file

File: 20081218a.py Project: BIGtigr/xgcode

def get_alignment(data_string, tree_string):
    # convert the comma separated data into a table
    table = []
    for line in iterutils.stripped_lines(StringIO(data_string)):
        row = list(csv.reader(StringIO(line), delimiter=',', quotechar='"'))[0]
        table.append(row)
    # create the amino acid fasta alignment
    alignment = get_amino_acid_alignment(table)
    # create the tree
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # Make sure that the newick tree has all of the taxa
    # required by the alignment.
    tree_taxa_set = set(node.get_name() for node in tree.gen_tips())
    alignment_taxa_set = set(alignment.headers)
    weird_alignment_taxa = alignment_taxa_set - tree_taxa_set
    if weird_alignment_taxa:
        raise HandlingError('the following taxa were not found '
                            'in the tree: %s' % str(weird_taxa))
    # return the alignment
    return alignment

Example #13

0

Show file

File: 20081218a.py Project: argriffing/xgcode

def get_alignment(data_string, tree_string):
    # convert the comma separated data into a table
    table = []
    for line in iterutils.stripped_lines(StringIO(data_string)):
        row = list(csv.reader(
            StringIO(line), delimiter=',', quotechar='"'))[0]
        table.append(row)
    # create the amino acid fasta alignment
    alignment = get_amino_acid_alignment(table)
    # create the tree
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # Make sure that the newick tree has all of the taxa
    # required by the alignment.
    tree_taxa_set = set(node.get_name() for node in tree.gen_tips())
    alignment_taxa_set = set(alignment.headers)
    weird_alignment_taxa = alignment_taxa_set - tree_taxa_set
    if weird_alignment_taxa:
        raise HandlingError(
                'the following taxa were not found '
                'in the tree: %s' % str(weird_taxa))
    # return the alignment
    return alignment

Example #14

0

Show file

def get_response_content(fs):
    # get the sequences
    sequences = []
    for raw_string in iterutils.stripped_lines(fs.sequences.splitlines()):
        sequences.append(raw_string.strip())
    # get the alphabet
    alphabet = list(sorted(set(''.join(sequences))))
    # get the vectors that should represent the symbols.
    raw_vectors = get_vectors(len(alphabet))
    # set values smaller than user-defined epsilon to zero
    vectors = [[eps_filter(x, fs.epsilon) for x in v] for v in raw_vectors]
    # map letters to vectors
    letter_to_vector = dict(zip(alphabet, vectors))
    # get the number lists corresponding to the sequences
    number_lists = []
    for sequence in sequences:
        number_list = []
        for letter in sequence:
            number_list.extend(letter_to_vector[letter])
        number_lists.append(number_list)
    # return the response
    return MatrixUtil.m_to_string(number_lists) + '\n'

Example #15

0

Show file

File: 20081114a.py Project: argriffing/xgcode

def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(fs.trees.splitlines()):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError(
                    'expected at least four tips '
                    'but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # create the response
    out = StringIO()
    same_count = 0
    diff_count = 0
    for tree in trees:
        # make the local paragraph that will be shown if there is an event
        local_out = StringIO()
        has_event = False
        # print the tree
        print >> local_out, NewickIO.get_newick_string(tree)
        # get the tip nodes and the internal nodes
        tip_nodes = []
        internal_nodes = []
        for node in tree.preorder():
            if node.is_tip():
                tip_nodes.append(node)
            else:
                internal_nodes.append(node)
        all_nodes = tip_nodes + internal_nodes
        # get all tip name partitions implied by the tree topology
        valid_partitions = TreeComparison.get_partitions(tree)
        # get results from the augmented distance matrix
        D_full = tree.get_partial_distance_matrix(
                [id(node) for node in all_nodes])
        y_full = get_vector(D_full).tolist()
        y = y_full[:len(tip_nodes)]
        name_selection = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem > 0)
        name_complement = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem <= 0)
        name_partition_a = frozenset((name_selection, name_complement))
        if name_partition_a not in valid_partitions:
            print >> local_out, 'augmented distance matrix split fail:',
            print >> local_out, name_partition_a
            has_event = True
        # get results from the not-augmented distance matrix
        D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes])
        y = get_vector(D).tolist()
        name_selection = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem > 0)
        name_complement = frozenset(node.get_name()
                for node, elem in zip(tip_nodes, y) if elem <= 0)
        name_partition_b = frozenset((name_selection, name_complement))
        if name_partition_b not in valid_partitions:
            print >> local_out, 'not-augmented distance matrix split fail:',
            print >> local_out, name_partition_b
            has_event = True
        # compare the name partitions
        if name_partition_a == name_partition_b:
            same_count += 1
        else:
            diff_count += 1
            print >> local_out, 'this tree was split differently '
            print >> local_out, 'by the different methods:'
            print >> local_out, 'augmented distance matrix split:',
            print >> local_out, name_partition_a
            print >> local_out, 'not-augmented distance matrix split:',
            print >> local_out, name_partition_b
            has_event = True
        # print a newline between trees
        if has_event:
            print >> out, local_out.getvalue()
    # write the summary
    print >> out, 'for this many trees the same split was found:',
    print >> out, same_count
    print >> out, 'for this many trees different splits were found:',
    print >> out, diff_count
    # write the response
    return out.getvalue()

Example #16

0

Show file

File: 20081114a.py Project: BIGtigr/xgcode

def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(fs.trees.splitlines()):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips '
                                'but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # create the response
    out = StringIO()
    same_count = 0
    diff_count = 0
    for tree in trees:
        # make the local paragraph that will be shown if there is an event
        local_out = StringIO()
        has_event = False
        # print the tree
        print >> local_out, NewickIO.get_newick_string(tree)
        # get the tip nodes and the internal nodes
        tip_nodes = []
        internal_nodes = []
        for node in tree.preorder():
            if node.is_tip():
                tip_nodes.append(node)
            else:
                internal_nodes.append(node)
        all_nodes = tip_nodes + internal_nodes
        # get all tip name partitions implied by the tree topology
        valid_partitions = TreeComparison.get_partitions(tree)
        # get results from the augmented distance matrix
        D_full = tree.get_partial_distance_matrix(
            [id(node) for node in all_nodes])
        y_full = get_vector(D_full).tolist()
        y = y_full[:len(tip_nodes)]
        name_selection = frozenset(node.get_name()
                                   for node, elem in zip(tip_nodes, y)
                                   if elem > 0)
        name_complement = frozenset(node.get_name()
                                    for node, elem in zip(tip_nodes, y)
                                    if elem <= 0)
        name_partition_a = frozenset((name_selection, name_complement))
        if name_partition_a not in valid_partitions:
            print >> local_out, 'augmented distance matrix split fail:',
            print >> local_out, name_partition_a
            has_event = True
        # get results from the not-augmented distance matrix
        D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes])
        y = get_vector(D).tolist()
        name_selection = frozenset(node.get_name()
                                   for node, elem in zip(tip_nodes, y)
                                   if elem > 0)
        name_complement = frozenset(node.get_name()
                                    for node, elem in zip(tip_nodes, y)
                                    if elem <= 0)
        name_partition_b = frozenset((name_selection, name_complement))
        if name_partition_b not in valid_partitions:
            print >> local_out, 'not-augmented distance matrix split fail:',
            print >> local_out, name_partition_b
            has_event = True
        # compare the name partitions
        if name_partition_a == name_partition_b:
            same_count += 1
        else:
            diff_count += 1
            print >> local_out, 'this tree was split differently '
            print >> local_out, 'by the different methods:'
            print >> local_out, 'augmented distance matrix split:',
            print >> local_out, name_partition_a
            print >> local_out, 'not-augmented distance matrix split:',
            print >> local_out, name_partition_b
            has_event = True
        # print a newline between trees
        if has_event:
            print >> out, local_out.getvalue()
    # write the summary
    print >> out, 'for this many trees the same split was found:',
    print >> out, same_count
    print >> out, 'for this many trees different splits were found:',
    print >> out, diff_count
    # write the response
    return out.getvalue()

Example #17

0

Show file

File: 20080703b.py Project: argriffing/xgcode

def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # Parse each tree and make sure
        # that it conforms to various requirements.
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            msg_a = 'expected at least four tips but found '
            msg_b = str(len(tip_names))
            raise HandlingError(msg_a + msg_b)
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # read the criterion string, creating the splitter object
    if fs.exact:
        splitter = Clustering.StoneExactDMS()
    elif fs.sign:
        splitter = Clustering.StoneSpectralSignDMS()
    elif fs.threshold:
        splitter = Clustering.StoneSpectralThresholdDMS()
    elif fs.nj:
        splitter = Clustering.NeighborJoiningDMS()
    elif fs.random:
        splitter = Clustering.RandomDMS()
    # assert that the computation is fast
    complexity = 0
    for tree in trees:
        n = len(list(tree.gen_tips()))
        complexity += n * splitter.get_complexity(n)
    if complexity > 1000000:
        raise HandlingError('this computation would take too long')
    # evaluate the bipartition of each tree based on its distance matrix
    informative_split_count = 0
    degenerate_split_count = 0
    invalid_split_count = 0
    for tree in trees:
        tips = list(tree.gen_tips())
        n = len(tips)
        D = tree.get_distance_matrix()
        if fs.strength:
            P = [row[:] for row in D]
            for i in range(n):
                for j in range(i):
                    x = random.normalvariate(0, fs.strength)
                    new_distance = D[i][j] * math.exp(x)
                    P[i][j] = new_distance
                    P[j][i] = new_distance
        else:
            P = D
        index_selection = splitter.get_selection(P)
        tip_selection = [tips[i] for i in index_selection]
        n_selection = len(tip_selection)
        n_complement = n - n_selection
        if min(n_selection, n_complement) < 2:
            degenerate_split_count += 1
        else:
            if tree.get_split_branch(tip_selection):
                informative_split_count += 1
            else:
                invalid_split_count += 1
    # define the response
    out = StringIO()
    print >> out, informative_split_count, 'informative splits'
    print >> out, degenerate_split_count, 'degenerate splits'
    print >> out, invalid_split_count, 'invalid splits'
    # return the response
    return out.getvalue()

Example #18

0

Show file

def gen_typed_rows(fin):
    for line in iterutils.stripped_lines(fin):
        yield line_to_row(line)

Example #19

0

Show file

File: Nexus.py Project: argriffing/xgcode

 def load(self, lines):
     """
     @param lines: lines of nexus data
     """
     # get the taxa, tree, and character lines
     taxa_lines = []
     tree_lines = []
     character_lines = []
     current_array = None
     for line in iterutils.stripped_lines(lines):
         # Ignore an entire line that is a comment.
         # Nested comments and multi-line comments
         # are not correctly processed here.
         if line.startswith('[') and line.endswith(']'):
             self.add_comment(line[1:-1])
             continue
         tokens = line.upper().split()
         if tokens == ['BEGIN', 'TAXA;']:
             current_array = taxa_lines
         elif tokens == ['BEGIN', 'TREES;']:
             current_array = tree_lines
         elif tokens == ['BEGIN', 'CHARACTERS;']:
             current_array = character_lines
         elif tokens == ['END;']:
             current_array = None
         elif current_array is not None:
             current_array.append(line)
     # assert that tree lines and character lines are present
     if not tree_lines:
         raise NexusError('TREES was not found')
     if not character_lines:
         raise NexusError('CHARACTERS was not found')
     # read the newick tree string
     nexus_tree_string = ''.join(tree_lines)
     if nexus_tree_string.count(';') != 1:
         raise NexusError('expected exactly one semicolon in the nexus TREES block')
     if nexus_tree_string.count('=') != 1:
         raise NexusError('expected exactly one equals sign in the nexus TREES block')
     offset = nexus_tree_string.find('=')
     newick_string = nexus_tree_string[offset+1:]
     self.tree = Newick.parse(newick_string, Newick.NewickTree)
     # read the alignment matrix
     arr = []
     found_matrix = False
     for line in character_lines:
         if line.upper().startswith('DIMENSIONS'):
             continue
         if line.upper().startswith('FORMAT'):
             continue
         if line.upper().startswith('MATRIX'):
             found_matrix = True
             continue
         if found_matrix:
             arr.append(line.replace(';', ' '))
     if not arr:
         raise NexusError('no alignment was found')
     tokens = ' '.join(arr).split()
     if len(tokens) % 2 != 0:
         raise NexusError('expected the alignment to be a list of (taxon, sequence) pairs')
     alignment_out = StringIO()
     for header, sequence in iterutils.chopped(tokens, 2):
         sequence = sequence.upper()
         unexpected_letters = set(sequence) - set('ACGT')
         if unexpected_letters:
             raise NexusError('unexpected sequence character(s): %s' % list(unexpected_letters))
         print >> alignment_out, '>%s' % header
         print >> alignment_out, sequence
     alignment_string = alignment_out.getvalue()
     self.alignment = Fasta.Alignment(StringIO(alignment_string))

Example #20

0

Show file

 def load(self, lines):
     """
     @param lines: lines of nexus data
     """
     # get the taxa, tree, and character lines
     taxa_lines = []
     tree_lines = []
     character_lines = []
     current_array = None
     for line in iterutils.stripped_lines(lines):
         # Ignore an entire line that is a comment.
         # Nested comments and multi-line comments
         # are not correctly processed here.
         if line.startswith('[') and line.endswith(']'):
             self.add_comment(line[1:-1])
             continue
         tokens = line.upper().split()
         if tokens == ['BEGIN', 'TAXA;']:
             current_array = taxa_lines
         elif tokens == ['BEGIN', 'TREES;']:
             current_array = tree_lines
         elif tokens == ['BEGIN', 'CHARACTERS;']:
             current_array = character_lines
         elif tokens == ['END;']:
             current_array = None
         elif current_array is not None:
             current_array.append(line)
     # assert that tree lines and character lines are present
     if not tree_lines:
         raise NexusError('TREES was not found')
     if not character_lines:
         raise NexusError('CHARACTERS was not found')
     # read the newick tree string
     nexus_tree_string = ''.join(tree_lines)
     if nexus_tree_string.count(';') != 1:
         raise NexusError(
             'expected exactly one semicolon in the nexus TREES block')
     if nexus_tree_string.count('=') != 1:
         raise NexusError(
             'expected exactly one equals sign in the nexus TREES block')
     offset = nexus_tree_string.find('=')
     newick_string = nexus_tree_string[offset + 1:]
     self.tree = Newick.parse(newick_string, Newick.NewickTree)
     # read the alignment matrix
     arr = []
     found_matrix = False
     for line in character_lines:
         if line.upper().startswith('DIMENSIONS'):
             continue
         if line.upper().startswith('FORMAT'):
             continue
         if line.upper().startswith('MATRIX'):
             found_matrix = True
             continue
         if found_matrix:
             arr.append(line.replace(';', ' '))
     if not arr:
         raise NexusError('no alignment was found')
     tokens = ' '.join(arr).split()
     if len(tokens) % 2 != 0:
         raise NexusError(
             'expected the alignment to be a list of (taxon, sequence) pairs'
         )
     alignment_out = StringIO()
     for header, sequence in iterutils.chopped(tokens, 2):
         sequence = sequence.upper()
         unexpected_letters = set(sequence) - set('ACGT')
         if unexpected_letters:
             raise NexusError('unexpected sequence character(s): %s' %
                              list(unexpected_letters))
         print >> alignment_out, '>%s' % header
         print >> alignment_out, sequence
     alignment_string = alignment_out.getvalue()
     self.alignment = Fasta.Alignment(StringIO(alignment_string))

Example #21

0

Show file

def gen_untyped_rows(fin):
    for line in iterutils.stripped_lines(fin):
        yield line.split()

Example #22

0

Show file

File: 20080227a.py Project: BIGtigr/xgcode

def get_response_content(fs):
    # read the energies from the form data
    energies = []
    for line in iterutils.stripped_lines(fs.energies.splitlines()):
        try:
            energy = float(line)
        except ValueError as e:
            raise ValueError('invalid energy: %s' % line)
        energies.append(energy)
    n = len(energies)
    if n > 100:
        raise ValueError('too many energies')
    # compute the rate matrix
    R = np.zeros((n, n))
    for row in range(n):
        for col in range(n):
            rate = math.exp(-(energies[col] - energies[row]))
            R[row, col] = rate
    for i, r in enumerate(R):
        R[i, i] = -np.sum(r) + 1
    # get the transition matrix at large finite time
    large_t = 1000.0
    T = scipy.linalg.expm(R * large_t)
    # eigendecompose
    Wr, Vr = scipy.linalg.eig(R, left=False, right=True)
    Wl, Vl = scipy.linalg.eig(R, left=True, right=False)
    # get left eigenvector associated with stationary distribution
    val_vec_pairs = [(abs(Wl[i]), Vl[:, i]) for i in range(n)]
    dummy, pi_eigenvector = min(val_vec_pairs)
    # get the stationary distribution itself
    total = np.sum(pi_eigenvector)
    pi_arr = np.array([v / total for v in pi_eigenvector])
    # get the square root stationary vector and diagonal matrix
    sqrt_pi_arr = np.sqrt(pi_arr)
    lam = np.diag(sqrt_pi_arr)
    # get reciprocal arrays
    recip_sqrt_pi_arr = np.reciprocal(sqrt_pi_arr)
    recip_lam = np.reciprocal(lam)
    # print things
    np.set_printoptions(linewidth=300)
    out = StringIO()
    print >> out, 'rate matrix:'
    print >> out, R
    print >> out
    print >> out, 'rate matrix row sums:'
    print >> out, np.sum(R, axis=1)
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wr
    print >> out
    print >> out, 'corresponding orthonormal right eigenvectors (columns):'
    print >> out, Vr
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wl
    print >> out
    print >> out, 'corresponding orthonormal left eigenvectors (columns):'
    print >> out, Vl
    print >> out
    print >> out, 'L2 normalized eigenvector associated with stationary distn:'
    print >> out, pi_eigenvector
    print >> out
    print >> out, 'L1 renormalized vector (the stationary distribution):'
    print >> out, pi_arr
    print >> out
    print >> out
    # eigendecompose the transition matrix
    Wr, Vr = scipy.linalg.eig(T, left=False, right=True)
    Wl, Vl = scipy.linalg.eig(T, left=True, right=False)
    print >> out, 'transition matrix for t=%f:' % large_t
    print >> out, T
    print >> out
    print >> out, 'transition matrix row sums:'
    print >> out, np.sum(T, axis=1)
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wr
    print >> out
    print >> out, 'corresponding orthonormal right eigenvectors (columns):'
    print >> out, Vr
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wl
    print >> out
    print >> out, 'corresponding orthonormal left eigenvectors (columns):'
    print >> out, Vl
    print >> out
    print >> out, 'incorrect reconstitution of the transition matrix:'
    print >> out, ndot(Vr, np.diag(Wr), Vl.T)
    print >> out
    print >> out
    # Use the known properties of reversibility to symmetrize the matrix.
    t = 3
    coeffs, rates, c = get_identicality_params(R)
    print >> out, 'brute identicality computation for t=%f:' % t
    print >> out, get_numerical_identicality(R, t)
    print >> out
    print >> out, 'sophisticated identicality computation for t=%f:' % t
    print >> out, get_symbolic_identicality(coeffs, rates, c, t)
    print >> out
    print >> out
    # Try another couple rate matrices.
    e2 = math.exp(2)
    en2 = math.exp(-2)
    rate_matrices = [
        np.array([[-2.0, 2.0], [2.0, -2.0]]),
        np.array([[-1.0, 1.0], [3.0, -3.0]]),
        np.array([[-1, 1, 0], [1, -2, 1], [0, 1, -1]]),
        #np.array([[-4.0, 4.0, 0], [1, -2, 1], [0, 4, -4]])]
        #np.array([[-1, 1, 0], [7, -14, 7], [0, 1, -1]])]
        np.array([[-en2, en2, 0], [e2, -2 * e2, e2], [0, en2, -en2]])
    ]
    t = 3.0
    for R in rate_matrices:
        coeffs, rates, c = get_identicality_params(R)
        print >> out, 'test rate matrix:'
        print >> out, R
        print >> out
        print >> out, 'eigenvalues:'
        print >> out, scipy.linalg.eigvals(R)
        print >> out
        print >> out, 'stationary distribution:'
        print >> out, R_to_distn(R)
        print >> out
        print >> out, 'brute identicality computation for t=%f:' % t
        print >> out, get_numerical_identicality(R, t)
        print >> out
        print >> out, 'sophisticated identicality computation for t=%f:' % t
        print >> out, get_symbolic_identicality(coeffs, rates, c, t)
        print >> out
        print >> out, 'identicality derivative for t=%f:' % t
        print >> out, get_identicality_derivative(coeffs, rates, t)
        print >> out
        print >> out
    # return the message
    return out.getvalue().rstrip()

Example #23

0

Show file

File: 20081117a.py Project: BIGtigr/xgcode

def get_response_content(fs):
    # read the edge triples (vertex name, vertex name, edge weight)
    edge_triples = []
    for line in iterutils.stripped_lines(fs.graph.splitlines()):
        string_triple = line.split()
        if len(string_triple) != 3:
            raise HandlingError(
                    'each graph row should have three elements '
                    'but found this line: ' + line)
        triple = string_triple[:2]
        try:
            weight = float(string_triple[2])
        except ValueError as e:
            raise HandlingError(
                    'edge weights should be floating point numbers')
        if weight <= 0:
            raise HandlingError('edge weights should be positive')
        triple.append(weight)
        edge_triples.append(triple)
    # get the set of directed edges to check for redundant or invalid input
    unordered_directed_edges = set()
    for a, b, weight in edge_triples:
        if a == b:
            raise HandlingError(
                    'vertices should not have edges connecting to themselves')
        if (a, b) in unordered_directed_edges:
            raise HandlingError('each edge should be given only once')
        if (b, a) in unordered_directed_edges:
            raise HandlingError(
                    'each edge should be given in only one direction')
        unordered_directed_edges.add((a, b))
    # get the lexicographically ordered list of vertex names
    unordered_vertex_names = set()
    for edge in unordered_directed_edges:
        unordered_vertex_names.update(set(edge))
    ordered_vertex_names = list(sorted(unordered_vertex_names))
    name_to_index = dict(
            (name, i) for i, name in enumerate(ordered_vertex_names))
    n = len(ordered_vertex_names)
    # read the set of vertices that the user wants to remove
    vertex_names_to_remove = set()
    for name in iterutils.stripped_lines(fs.vertices.splitlines()):
        if name in vertex_names_to_remove:
            raise HandlingError(
                    'vertices should be named for removal at most once')
        vertex_names_to_remove.add(name)
    # Assert that the set of vertex names for removal
    # is a subset of the vertex names in the graph.
    weird_names = vertex_names_to_remove - unordered_vertex_names
    if weird_names:
        raise HandlingError(
                'some vertices named for removal '
                'were not found in the graph: ' + str(weird_names))
    # get the ordered list of vertex names that will remain
    reduced_ordered_vertex_names = list(
            sorted(unordered_vertex_names - vertex_names_to_remove))
    # get the laplacian depending on the method
    if fs.funky:
        reduced_edge_triples = get_funky_transformation(
                edge_triples, name_to_index, reduced_ordered_vertex_names)
    elif fs.funky_corrected:
        reduced_edge_triples = get_corrected_funky_transformation(
                edge_triples, name_to_index, reduced_ordered_vertex_names)
    elif fs.ohm:
        reduced_edge_triples = get_ohm_transformation(
                edge_triples, name_to_index, reduced_ordered_vertex_names)
    elif fs.conductance:
        reduced_edge_triples = get_conductance_transformation(
                edge_triples, name_to_index, reduced_ordered_vertex_names)
    # write the reduced edge triples
    out = StringIO()
    for name_a, name_b, weight in reduced_edge_triples:
        print >> out, name_a, name_b, weight
    # write the response
    return out.getvalue()

Example #24

0

Show file

def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # Parse each tree and make sure
        # that it conforms to various requirements.
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            msg_a = 'expected at least four tips but found '
            msg_b = str(len(tip_names))
            raise HandlingError(msg_a + msg_b)
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # read the criterion string, creating the splitter object
    if fs.exact:
        splitter = Clustering.StoneExactDMS()
    elif fs.sign:
        splitter = Clustering.StoneSpectralSignDMS()
    elif fs.threshold:
        splitter = Clustering.StoneSpectralThresholdDMS()
    elif fs.nj:
        splitter = Clustering.NeighborJoiningDMS()
    elif fs.random:
        splitter = Clustering.RandomDMS()
    # assert that the computation is fast
    complexity = 0
    for tree in trees:
        n = len(list(tree.gen_tips()))
        complexity += n * splitter.get_complexity(n)
    if complexity > 1000000:
        raise HandlingError('this computation would take too long')
    # evaluate the bipartition of each tree based on its distance matrix
    informative_split_count = 0
    degenerate_split_count = 0
    invalid_split_count = 0
    for tree in trees:
        tips = list(tree.gen_tips())
        n = len(tips)
        D = tree.get_distance_matrix()
        if fs.strength:
            P = [row[:] for row in D]
            for i in range(n):
                for j in range(i):
                    x = random.normalvariate(0, fs.strength)
                    new_distance = D[i][j] * math.exp(x)
                    P[i][j] = new_distance
                    P[j][i] = new_distance
        else:
            P = D
        index_selection = splitter.get_selection(P)
        tip_selection = [tips[i] for i in index_selection]
        n_selection = len(tip_selection)
        n_complement = n - n_selection
        if min(n_selection, n_complement) < 2:
            degenerate_split_count += 1
        else:
            if tree.get_split_branch(tip_selection):
                informative_split_count += 1
            else:
                invalid_split_count += 1
    # define the response
    out = StringIO()
    print >> out, informative_split_count, 'informative splits'
    print >> out, degenerate_split_count, 'degenerate splits'
    print >> out, invalid_split_count, 'invalid splits'
    # return the response
    return out.getvalue()

Example #25

0

Show file

File: 20080227a.py Project: argriffing/xgcode

def get_response_content(fs):
    # read the energies from the form data
    energies = []
    for line in iterutils.stripped_lines(fs.energies.splitlines()):
        try:
            energy = float(line)
        except ValueError as e:
            raise ValueError('invalid energy: %s' % line)
        energies.append(energy)
    n = len(energies)
    if n > 100:
        raise ValueError('too many energies')
    # compute the rate matrix
    R = np.zeros((n, n))
    for row in range(n):
        for col in range(n):
            rate = math.exp(-(energies[col] - energies[row]))
            R[row, col] = rate
    for i, r in enumerate(R):
        R[i, i] = -np.sum(r) + 1
    # get the transition matrix at large finite time
    large_t = 1000.0
    T = scipy.linalg.expm(R*large_t)
    # eigendecompose
    Wr, Vr = scipy.linalg.eig(R, left=False, right=True)
    Wl, Vl = scipy.linalg.eig(R, left=True, right=False)
    # get left eigenvector associated with stationary distribution
    val_vec_pairs = [(abs(Wl[i]), Vl[:,i]) for i in range(n)]
    dummy, pi_eigenvector = min(val_vec_pairs)
    # get the stationary distribution itself
    total = np.sum(pi_eigenvector)
    pi_arr = np.array([v/total for v in pi_eigenvector])
    # get the square root stationary vector and diagonal matrix
    sqrt_pi_arr = np.sqrt(pi_arr)
    lam = np.diag(sqrt_pi_arr)
    # get reciprocal arrays
    recip_sqrt_pi_arr = np.reciprocal(sqrt_pi_arr)
    recip_lam = np.reciprocal(lam)
    # print things
    np.set_printoptions(linewidth=300)
    out = StringIO()
    print >> out, 'rate matrix:'
    print >> out, R
    print >> out
    print >> out, 'rate matrix row sums:'
    print >> out, np.sum(R, axis=1)
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wr
    print >> out
    print >> out, 'corresponding orthonormal right eigenvectors (columns):'
    print >> out, Vr
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wl
    print >> out
    print >> out, 'corresponding orthonormal left eigenvectors (columns):'
    print >> out, Vl
    print >> out
    print >> out, 'L2 normalized eigenvector associated with stationary distn:'
    print >> out, pi_eigenvector
    print >> out
    print >> out, 'L1 renormalized vector (the stationary distribution):'
    print >> out, pi_arr
    print >> out
    print >> out
    # eigendecompose the transition matrix
    Wr, Vr = scipy.linalg.eig(T, left=False, right=True)
    Wl, Vl = scipy.linalg.eig(T, left=True, right=False)
    print >> out, 'transition matrix for t=%f:' % large_t
    print >> out, T
    print >> out
    print >> out, 'transition matrix row sums:'
    print >> out, np.sum(T, axis=1)
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wr
    print >> out
    print >> out, 'corresponding orthonormal right eigenvectors (columns):'
    print >> out, Vr
    print >> out
    print >> out, 'eigenvalues:'
    print >> out, Wl
    print >> out
    print >> out, 'corresponding orthonormal left eigenvectors (columns):'
    print >> out, Vl
    print >> out
    print >> out, 'incorrect reconstitution of the transition matrix:'
    print >> out, ndot(Vr, np.diag(Wr), Vl.T)
    print >> out
    print >> out
    # Use the known properties of reversibility to symmetrize the matrix.
    t = 3
    coeffs, rates, c = get_identicality_params(R)
    print >> out, 'brute identicality computation for t=%f:' % t
    print >> out, get_numerical_identicality(R, t)
    print >> out
    print >> out, 'sophisticated identicality computation for t=%f:' % t
    print >> out, get_symbolic_identicality(coeffs, rates, c, t)
    print >> out
    print >> out
    # Try another couple rate matrices.
    e2 = math.exp(2)
    en2 = math.exp(-2)
    rate_matrices = [
            np.array([[-2.0, 2.0], [2.0, -2.0]]),
            np.array([[-1.0, 1.0], [3.0, -3.0]]),
            np.array([[-1, 1, 0], [1, -2, 1], [0, 1, -1]]),
            #np.array([[-4.0, 4.0, 0], [1, -2, 1], [0, 4, -4]])]
            #np.array([[-1, 1, 0], [7, -14, 7], [0, 1, -1]])]
            np.array([[-en2, en2, 0], [e2, -2*e2, e2], [0, en2, -en2]])]
    t = 3.0
    for R in rate_matrices:
        coeffs, rates, c = get_identicality_params(R)
        print >> out, 'test rate matrix:'
        print >> out, R
        print >> out
        print >> out, 'eigenvalues:'
        print >> out, scipy.linalg.eigvals(R)
        print >> out
        print >> out, 'stationary distribution:'
        print >> out, R_to_distn(R)
        print >> out
        print >> out, 'brute identicality computation for t=%f:' % t
        print >> out, get_numerical_identicality(R, t)
        print >> out
        print >> out, 'sophisticated identicality computation for t=%f:' % t
        print >> out, get_symbolic_identicality(coeffs, rates, c, t)
        print >> out
        print >> out, 'identicality derivative for t=%f:' % t
        print >> out, get_identicality_derivative(coeffs, rates, t)
        print >> out
        print >> out
    # return the message
    return out.getvalue().rstrip()