Ejemplo n.º 1
0
def plot_marker_tree(tree, marker, resize_nodes=False, save=True):
    supplementary_data = pd.read_csv('../Suppl.Table2.CODEX_paper_MRLdatasetexpression.csv')
    supplementary_data.rename(columns={'X.X': 'X', 'Y.Y': 'Y', 'Z.Z': 'Z'}, inplace=True)
    supplementary_data['CD45_int'] = supplementary_data['CD45'].astype(int)
    ids_to_names = pd.read_csv('ClusterIDtoName.txt', sep='\t')
    cell_lines = list(ids_to_names['ID'].values)
    ids_to_names = dict(zip(ids_to_names['ID'].values, ids_to_names['Name'].values))
    # remove dirt from supplementary data 
    supplementary_annotations = pd.read_excel('../Suppl.Table2.cluster annotations and cell counts.xlsx')
    dirt = supplementary_annotations.loc[supplementary_annotations['Imaging phenotype (cell type)'] == 'dirt', 
                                         'X-shift cluster ID']
    supplementary_data = supplementary_data[~supplementary_data['Imaging phenotype cluster ID'].isin(dirt)]
    supplementary_data['sample'] = supplementary_data['sample_Xtile_Ytile'].apply(lambda x: x.split('_')[0])
    suppl_converted = convert_coordinates(supplementary_data)[['X', 'Y', 'Z', 'sample', marker]]
    
    new_tree = TreeNode(name = tree.name)
    new_tree.img_style['size'] = 1 if resize_nodes else 10
    new_tree.img_style['fgcolor'] = hls2hex(0, 0, 0)
    new_tree.img_style['shape'] = 'sphere'
    
    marker_avgs = []
    old_layer = [tree]
    new_layer = [new_tree]
    layer_num = 0
    while old_layer:
        next_old_layer, next_new_layer = [], []
        for ind, node in enumerate(old_layer):
            for child in node.children:
                next_old_layer.append(child)
                new_child = TreeNode(name = child.name)
                marker_avg = get_node_markers(child, marker, suppl_converted)
                new_child.add_features(marker_avg=marker_avg)
                marker_avgs.append(marker_avg)
                new_layer[ind].add_child(new_child)
                next_new_layer.append(new_child)
        old_layer = next_old_layer
        new_layer = next_new_layer
        layer_num += 1
        
    marker_min, marker_max = np.min(marker_avgs), np.max(marker_avgs)
    for node in new_tree.iter_descendants():
        norm_marker = (node.marker_avg - marker_min) / (marker_max - marker_min)
        node.add_features(marker_avg=norm_marker)
        node.add_features(color=hls2hex(0, norm_marker, norm_marker*0.5))
        
    for node in new_tree.iter_descendants():
        node.img_style['size'] = 1 + 10 * node.marker_avg if resize_nodes else 10
        node.img_style['fgcolor'] = node.color
        node.img_style['shape'] = 'sphere'
        
    ts = TreeStyle()
    ts.show_leaf_name = False
    ts.rotation = 90
    ts.title.add_face(TextFace(marker, fsize=20), column=0)
    save_dir = 'Marker_Trees' if resize_nodes else 'Marker_Trees_Same_Size'
        
    if save:
        new_tree.render(save_dir + '/marker_tree_{}.png'.format(marker), tree_style=ts)
    else:
        return new_tree.render('%%inline', tree_style=ts)
Ejemplo n.º 2
0
 def initialize_pathogen_tree(self):
     """
     Initialize one pathogen lineage per host tip
     dist records height that pathogen lineage was started
     TODO: relax this assumption - needs some way to input
     """
     # reset containers
     self.extant_p = []  # pathogen lineages that have not coalesced
     self.not_yet_sampled_p = []  # pathogen lineages higher in the tree
     for i, host_tip in enumerate(self.hosttree.get_leaves()):
         pnode = TreeNode(name=host_tip.name + '_P', dist=0)
         pnode.add_features(height=host_tip.height, host=host_tip)
         if host_tip.height == 0:
             self.extant_p.append(pnode)
         else:
             self.not_yet_sampled_p.append(pnode)
Ejemplo n.º 3
0
Archivo: utils.py Proyecto: zivia/grove
def discover_children(object=None):

    """
    Discovers all children defined in the thrift_spec of an instance of a thrift auto-generated class.
    :param object: The treenode object to search to discover the children.
    :return: The discovered children, wrapped in treenodes.
    """

    nodes = []

    for spec in object.obj.thrift_spec.values():

        node = TreeNode(name=spec[1])
        node.add_features(t_parent=object.obj, t_name=spec[1], t_type=spec[2])
        object.add_child(node)
        nodes.append(node)

    return nodes
Ejemplo n.º 4
0
    def coalesce_paths(self, child_paths, t0):
        """
        Create a new TreeNode and assign a given list of child nodes and its host node.
        :param child_paths:  A list of TreeNodes in the pathogen tree.
        :param t0:  Time of pathogen coalescence as height
        :return:  A tuple containing:
            1. TreeNode object for the new pathogen lineage.
            2. updated extant list
        """
        assert len(child_paths
                   ) == 2, 'Can only coalesce 2 pathogen lineages at a time'
        p1, p2 = child_paths

        assert p1 in self.extant_p and p2 in self.extant_p, 'Both pathogen lineages must be extant'
        assert p1.host == p2.host, 'Can only coalesce pathogen lineages in the same host'
        host = p1.host

        assert p1.height < t0 and p2.height < t0, \
            'Pathogen lineage heights %f %f cannot exceed coalescent event %f' % (p1.height, p2.height, t0)

        # create new pathogen lineage
        new_path = TreeNode(name='_'.join([x.name for x in child_paths]),
                            dist=0)
        new_path.add_features(host=host, height=t0)

        # cast child_paths as a List because ete3.Tree.children requires it
        new_path.children = list(child_paths)
        self.extant_p.append(new_path)

        # coalesced pathogen lineages are no longer extant
        for node in child_paths:
            node.up = new_path
            node.dist = t0 - node.height  # when node was created, we stored the height
            self.extant_p.remove(node)
            self.not_extant_p.append(node)

        return new_path
def build_tree(fcs_paths, num_neighbors, prop_filter=0.1):
    '''
    fcs_paths: dictionary of (cluster numbers, path)
    num_neighbors: number of neighbors used in X-shift 
    prop_filter: proportion of cells for edge between clusters to be created 
    '''
    # first initialize tree with 1 node at top and its children
    tree = TreeNode(name=0)
    leaves = {0: tree}
    _, cluster_data_child = fcsparser.parse(fcs_paths[0])
    cluster_data_child = process_fcs(cluster_data_child)
    tree.add_features(coords=cluster_data_child[['X', 'Y', 'Z']])
    tree.add_features(cluster_id=0)
    child_cluster_counts = cluster_data_child['cluster_id'].value_counts()
    child_coords = cluster_data_child[['cluster_id', 'sample', 'X', 'Y', 'Z']]
    child_coords_groupby = child_coords.groupby('cluster_id')
    child_coords = {
        group: child_coords.loc[inds, ['X', 'Y', 'Z', 'sample']]
        for group, inds in child_coords_groupby.groups.items()
    }
    clusters = list(child_cluster_counts.keys())
    child_cluster_counts /= child_cluster_counts.sum()
    proportions = {}
    for child_node_id, val in child_cluster_counts.iteritems():
        proportions[child_node_id] = {0: val}
    # set proportion filter to 0 for first layer, as everything is a child of the vertex
    tree, leaves = add_tree_layer(tree,
                                  leaves,
                                  clusters,
                                  proportions,
                                  child_coords,
                                  prop_filter=0)

    # build the rest of the tree
    for ind, nn in enumerate(num_neighbors[:-1]):
        _, cluster_data_parent = fcsparser.parse(fcs_paths[ind])
        _, cluster_data_child = fcsparser.parse(fcs_paths[ind + 1])
        cluster_data_parent = process_fcs(cluster_data_parent)
        cluster_data_child = process_fcs(cluster_data_child)

        child_cluster_counts = cluster_data_child['cluster_id'].value_counts()
        clusters = list(child_cluster_counts.keys())
        match_data_parent = cluster_data_parent[['X', 'Y', 'Z',
                                                 'cluster_id']].astype(int)
        match_data_child = cluster_data_child[['X', 'Y', 'Z',
                                               'cluster_id']].astype(int)
        merged = pd.merge(match_data_parent,
                          match_data_child,
                          on=['X', 'Y', 'Z'])
        parent_clusters = merged['cluster_id_x'].tolist()
        child_clusters = merged['cluster_id_y'].tolist()
        child_coords = cluster_data_child[[
            'cluster_id', 'sample', 'X', 'Y', 'Z'
        ]]
        child_coords_groupby = child_coords.groupby('cluster_id')
        child_coords = {
            group: child_coords.loc[inds, ['X', 'Y', 'Z', 'sample']]
            for group, inds in child_coords_groupby.groups.items()
        }
        proportions = defaultdict(Counter)
        for parent_cluster, child_cluster in zip(parent_clusters,
                                                 child_clusters):
            proportions[child_cluster][
                parent_cluster] += 1 / child_cluster_counts[child_cluster]
        tree, leaves = add_tree_layer(tree, leaves, clusters, proportions,
                                      child_coords, prop_filter)

    return tree