def plot_marker_tree(tree, marker, resize_nodes=False, save=True): supplementary_data = pd.read_csv('../Suppl.Table2.CODEX_paper_MRLdatasetexpression.csv') supplementary_data.rename(columns={'X.X': 'X', 'Y.Y': 'Y', 'Z.Z': 'Z'}, inplace=True) supplementary_data['CD45_int'] = supplementary_data['CD45'].astype(int) ids_to_names = pd.read_csv('ClusterIDtoName.txt', sep='\t') cell_lines = list(ids_to_names['ID'].values) ids_to_names = dict(zip(ids_to_names['ID'].values, ids_to_names['Name'].values)) # remove dirt from supplementary data supplementary_annotations = pd.read_excel('../Suppl.Table2.cluster annotations and cell counts.xlsx') dirt = supplementary_annotations.loc[supplementary_annotations['Imaging phenotype (cell type)'] == 'dirt', 'X-shift cluster ID'] supplementary_data = supplementary_data[~supplementary_data['Imaging phenotype cluster ID'].isin(dirt)] supplementary_data['sample'] = supplementary_data['sample_Xtile_Ytile'].apply(lambda x: x.split('_')[0]) suppl_converted = convert_coordinates(supplementary_data)[['X', 'Y', 'Z', 'sample', marker]] new_tree = TreeNode(name = tree.name) new_tree.img_style['size'] = 1 if resize_nodes else 10 new_tree.img_style['fgcolor'] = hls2hex(0, 0, 0) new_tree.img_style['shape'] = 'sphere' marker_avgs = [] old_layer = [tree] new_layer = [new_tree] layer_num = 0 while old_layer: next_old_layer, next_new_layer = [], [] for ind, node in enumerate(old_layer): for child in node.children: next_old_layer.append(child) new_child = TreeNode(name = child.name) marker_avg = get_node_markers(child, marker, suppl_converted) new_child.add_features(marker_avg=marker_avg) marker_avgs.append(marker_avg) new_layer[ind].add_child(new_child) next_new_layer.append(new_child) old_layer = next_old_layer new_layer = next_new_layer layer_num += 1 marker_min, marker_max = np.min(marker_avgs), np.max(marker_avgs) for node in new_tree.iter_descendants(): norm_marker = (node.marker_avg - marker_min) / (marker_max - marker_min) node.add_features(marker_avg=norm_marker) node.add_features(color=hls2hex(0, norm_marker, norm_marker*0.5)) for node in new_tree.iter_descendants(): node.img_style['size'] = 1 + 10 * node.marker_avg if resize_nodes else 10 node.img_style['fgcolor'] = node.color node.img_style['shape'] = 'sphere' ts = TreeStyle() ts.show_leaf_name = False ts.rotation = 90 ts.title.add_face(TextFace(marker, fsize=20), column=0) save_dir = 'Marker_Trees' if resize_nodes else 'Marker_Trees_Same_Size' if save: new_tree.render(save_dir + '/marker_tree_{}.png'.format(marker), tree_style=ts) else: return new_tree.render('%%inline', tree_style=ts)
def initialize_pathogen_tree(self): """ Initialize one pathogen lineage per host tip dist records height that pathogen lineage was started TODO: relax this assumption - needs some way to input """ # reset containers self.extant_p = [] # pathogen lineages that have not coalesced self.not_yet_sampled_p = [] # pathogen lineages higher in the tree for i, host_tip in enumerate(self.hosttree.get_leaves()): pnode = TreeNode(name=host_tip.name + '_P', dist=0) pnode.add_features(height=host_tip.height, host=host_tip) if host_tip.height == 0: self.extant_p.append(pnode) else: self.not_yet_sampled_p.append(pnode)
def discover_children(object=None): """ Discovers all children defined in the thrift_spec of an instance of a thrift auto-generated class. :param object: The treenode object to search to discover the children. :return: The discovered children, wrapped in treenodes. """ nodes = [] for spec in object.obj.thrift_spec.values(): node = TreeNode(name=spec[1]) node.add_features(t_parent=object.obj, t_name=spec[1], t_type=spec[2]) object.add_child(node) nodes.append(node) return nodes
def coalesce_paths(self, child_paths, t0): """ Create a new TreeNode and assign a given list of child nodes and its host node. :param child_paths: A list of TreeNodes in the pathogen tree. :param t0: Time of pathogen coalescence as height :return: A tuple containing: 1. TreeNode object for the new pathogen lineage. 2. updated extant list """ assert len(child_paths ) == 2, 'Can only coalesce 2 pathogen lineages at a time' p1, p2 = child_paths assert p1 in self.extant_p and p2 in self.extant_p, 'Both pathogen lineages must be extant' assert p1.host == p2.host, 'Can only coalesce pathogen lineages in the same host' host = p1.host assert p1.height < t0 and p2.height < t0, \ 'Pathogen lineage heights %f %f cannot exceed coalescent event %f' % (p1.height, p2.height, t0) # create new pathogen lineage new_path = TreeNode(name='_'.join([x.name for x in child_paths]), dist=0) new_path.add_features(host=host, height=t0) # cast child_paths as a List because ete3.Tree.children requires it new_path.children = list(child_paths) self.extant_p.append(new_path) # coalesced pathogen lineages are no longer extant for node in child_paths: node.up = new_path node.dist = t0 - node.height # when node was created, we stored the height self.extant_p.remove(node) self.not_extant_p.append(node) return new_path
def build_tree(fcs_paths, num_neighbors, prop_filter=0.1): ''' fcs_paths: dictionary of (cluster numbers, path) num_neighbors: number of neighbors used in X-shift prop_filter: proportion of cells for edge between clusters to be created ''' # first initialize tree with 1 node at top and its children tree = TreeNode(name=0) leaves = {0: tree} _, cluster_data_child = fcsparser.parse(fcs_paths[0]) cluster_data_child = process_fcs(cluster_data_child) tree.add_features(coords=cluster_data_child[['X', 'Y', 'Z']]) tree.add_features(cluster_id=0) child_cluster_counts = cluster_data_child['cluster_id'].value_counts() child_coords = cluster_data_child[['cluster_id', 'sample', 'X', 'Y', 'Z']] child_coords_groupby = child_coords.groupby('cluster_id') child_coords = { group: child_coords.loc[inds, ['X', 'Y', 'Z', 'sample']] for group, inds in child_coords_groupby.groups.items() } clusters = list(child_cluster_counts.keys()) child_cluster_counts /= child_cluster_counts.sum() proportions = {} for child_node_id, val in child_cluster_counts.iteritems(): proportions[child_node_id] = {0: val} # set proportion filter to 0 for first layer, as everything is a child of the vertex tree, leaves = add_tree_layer(tree, leaves, clusters, proportions, child_coords, prop_filter=0) # build the rest of the tree for ind, nn in enumerate(num_neighbors[:-1]): _, cluster_data_parent = fcsparser.parse(fcs_paths[ind]) _, cluster_data_child = fcsparser.parse(fcs_paths[ind + 1]) cluster_data_parent = process_fcs(cluster_data_parent) cluster_data_child = process_fcs(cluster_data_child) child_cluster_counts = cluster_data_child['cluster_id'].value_counts() clusters = list(child_cluster_counts.keys()) match_data_parent = cluster_data_parent[['X', 'Y', 'Z', 'cluster_id']].astype(int) match_data_child = cluster_data_child[['X', 'Y', 'Z', 'cluster_id']].astype(int) merged = pd.merge(match_data_parent, match_data_child, on=['X', 'Y', 'Z']) parent_clusters = merged['cluster_id_x'].tolist() child_clusters = merged['cluster_id_y'].tolist() child_coords = cluster_data_child[[ 'cluster_id', 'sample', 'X', 'Y', 'Z' ]] child_coords_groupby = child_coords.groupby('cluster_id') child_coords = { group: child_coords.loc[inds, ['X', 'Y', 'Z', 'sample']] for group, inds in child_coords_groupby.groups.items() } proportions = defaultdict(Counter) for parent_cluster, child_cluster in zip(parent_clusters, child_clusters): proportions[child_cluster][ parent_cluster] += 1 / child_cluster_counts[child_cluster] tree, leaves = add_tree_layer(tree, leaves, clusters, proportions, child_coords, prop_filter) return tree