def _calc_live_and_dead_finals( data: RawData, path_to_variable: Path, time_range: Tuple[float, float], agents: Iterable[str] = tuple(), ) -> Tuple[Dict[str, float], Dict[str, float]]: values: Dict[str, Dict[float, float]] = {} die = set() end_time = max(data.keys()) for time, time_data in data.items(): if (time < time_range[0] * end_time or time > time_range[1] * end_time): continue agents_data = get_in(time_data, PATH_TO_AGENTS) for agent, agent_data in agents_data.items(): if agents and agent not in agents: continue agent_values = values.setdefault(agent, {}) value = get_in(agent_data, path_to_variable) if get_in(agent_data, PATH_TO_DEAD, False): die.add(agent) agent_values[time] = value live_finals = {} dead_finals = {} for agent, agent_values in values.items(): if not agent_values: continue if agent in die: dead_finals[agent] = agent_values[max(agent_values.keys())] else: live_finals[agent] = agent_values[max(agent_values.keys())] return live_finals, dead_finals
def raw_data_to_end_expression_table( raw_data: RawData, paths_dict: Dict[str, Path]) -> pd.DataFrame: '''Create a table of end expression levels from raw simulation data. Args: raw_data: Raw simulation data paths: Map from names to paths to protein counts. The names will be used as column headers in the returned table. Returns: Table with one column for each protein and one row for each agent. Each cell contains the protein concentration in that agent in the final simulation timepoint. ''' end_data = raw_data[max(raw_data.keys())] expression_data: Dict = {name: [] for name in paths_dict} expression_data[VOLUME_KEY] = [] agents_data = get_in(end_data, AGENTS_PATH) for agent_data in agents_data.values(): volume = get_in(agent_data, VOLUME_PATH, 0) expression_data[VOLUME_KEY].append(volume) for name, path in paths_dict.items(): count = get_in(agent_data, path, 0) concentration = count / volume if volume else 0 expression_data[name].append(concentration) return pd.DataFrame(expression_data)
def get_total_mass_timeseries(data: RawData) -> List[float]: '''Get a timeseries of the total mass of a simulation. Args: data: Data from the simulation. Returns: A list of the total cell mass in the simulation over time. ''' times = sorted(data.keys()) mass_timeseries = [] for time in times: agents_data = get_in(data[time], AGENTS_PATH) mass_timeseries.append(get_total_mass(agents_data)) return mass_timeseries
def _get_final_live_agents( data: RawData, time_range: Tuple[float, float] = (0, 1), ) -> List[str]: data = filter_raw_data_by_time(data, time_range) max_time = max(data.keys()) agents = [] agents_data = get_in( # Pylint doesn't recognize that the RawData NewType is a dict data[max_time], # pylint: disable=unsubscriptable-object PATH_TO_AGENTS, ) for agent, agent_data in agents_data.items(): dead = get_in(agent_data, PATH_TO_DEAD) if not dead: agents.append(agent) return agents
def filter_raw_data_by_time(raw_data: RawData, time_range: Tuple[float, float]) -> RawData: '''Filter raw simulation data to the timepoints within a range Args: raw_data: Raw simulation data. time_range: Tuple of range endpoints. Each endpoint is a float between 0 and 1 (inclusive) that denotes a fraction of the total simulation time. Returns: A subset of the key-value pairs in ``raw_data``. Includes only those timepoints between the ``time_range`` endpoints (inclusive). ''' floor, ceil = time_range end = max(raw_data.keys()) filtered = RawData({ time: time_data for time, time_data in raw_data.items() if floor * end <= time <= ceil * end }) return filtered
def plot_phylogeny( data: RawData, out: str = 'phylogeny.pdf', live_color: str = 'green', dead_color: str = 'black', ignore_color: str = 'lightgray', time_range: Tuple[float, float] = (0, 1) ) -> Tuple[TreeNode, pd.DataFrame]: '''Plot phylogenetic tree from an experiment. Args: data: The simulation data. out: Path to the output file. File type will be inferred from the file name. live_color: Color for nodes representing cells that survive until division. dead_color: Color for nodes representing cells that die. ignore_color: Color for nodes outside the time range considered. time_range: Tuple specifying the range of times to consider. Range values specified as fractions of the final timepointpoint. ''' agent_ids: Set[str] = set() dead_ids: Set[str] = set() in_time_range_ids: Set[str] = set() end_time = max(data.keys()) for time, time_data in data.items(): agents_data = get_in(time_data, AGENTS_PATH) assert agents_data is not None agent_ids |= set(agents_data.keys()) if time_range[0] * end_time <= time <= time_range[1] * end_time: in_time_range_ids |= set(agents_data.keys()) for agent_id, agent_data in agents_data.items(): if get_in(agent_data, PATH_TO_DEAD, False): dead_ids.add(agent_id) trees = make_ete_trees(agent_ids) assert len(trees) == 1 tree = trees[0] # Set style for overall figure tstyle = TreeStyle() tstyle.show_scale = False tstyle.show_leaf_name = False tstyle.scale = None tstyle.optimal_scale_level = 'full' # Avoid artificial branches tstyle.mode = 'c' legend = { 'Die': dead_color, 'Survive': live_color, 'Divided Before Antibiotics Appeared': ignore_color, } for label, color in legend.items(): tstyle.legend.add_face(CircleFace(5, color), column=0) tstyle.legend.add_face(TextFace(' ' + label, ftype=FONT), column=1) # Set styles for each node for node in tree.traverse(): nstyle = NodeStyle() nstyle['size'] = 5 nstyle['vt_line_width'] = 1 nstyle['hz_line_width'] = 1 if node.name in in_time_range_ids: if node.name in dead_ids: nstyle['fgcolor'] = dead_color else: nstyle['fgcolor'] = live_color else: nstyle['fgcolor'] = ignore_color node.set_style(nstyle) tree.render(out, tree_style=tstyle, w=400) survive_col = [] agents_col = [] for agent in in_time_range_ids: agents_col.append(agent) survive_col.append(0 if agent in dead_ids else 1) df = pd.DataFrame({'agents': agents_col, 'survival': survive_col}) return tree, df