def plot_adjacent_unifrac( sid_data, dm_header, dm_data, map_data, time_field="WeeksSinceStart", line_color="blue", disturbance_colors=["blue", "green", "red", "orange"], ): disturbed_weeks = [] disturbance_types = [] for e in sid_data: for d in e[2]: disturbed_weeks.append((float(e[0]), d)) disturbance_types.append(d) disturbance_types = list(set(disturbance_types)) disturbance_colors = dict(zip(disturbance_types, disturbance_colors)) adjacent_distances = get_adjacent_distances(dm_header, dm_data, [e[1] for e in sid_data]) x = map(float, [map_data[e[1]][time_field] for e in adjacent_distances[1]]) y = adjacent_distances[0] plot(x, y, "-.", c=line_color) ylim(0, 1) ymin, ymax = ylim() for dw, dt in disturbed_weeks: line_color = disturbance_colors[dt] dw_w_offset = dw + (disturbance_types.index(dt) * 0.1) plot([dw_w_offset, dw_w_offset], [ymin, ymax], "--", c=line_color)
def test_get_adjacent_distances(self): """ extracting adjacent distances works as expected """ dm_str = ["\ts1\ts2\ts3", "s1\t0\t2\t4", "s2\t2\t0\t3.2", "s3\t4\t3.2\t0"] dm_header, dm = parse_distmat(dm_str) # error cases: fewer than 2 valid sample ids self.assertRaises(ValueError, get_adjacent_distances, dm_header, dm, []) self.assertRaises(ValueError, get_adjacent_distances, dm_header, dm, ["s1"]) self.assertRaises(ValueError, get_adjacent_distances, dm_header, dm, ["s0", "s1"]) self.assertRaises(ValueError, get_adjacent_distances, dm_header, dm, ["s1", "s4"]) # one pair of valid distances self.assertEqual(get_adjacent_distances(dm_header, dm, ["s1", "s2"]), ([2], [("s1", "s2")])) self.assertEqual(get_adjacent_distances(dm_header, dm, ["s1", "s1"]), ([0], [("s1", "s1")])) self.assertEqual(get_adjacent_distances(dm_header, dm, ["s1", "s3"]), ([4], [("s1", "s3")])) self.assertEqual(get_adjacent_distances(dm_header, dm, ["s2", "s3"]), ([3.2], [("s2", "s3")])) # multiple valid distances self.assertEqual( get_adjacent_distances(dm_header, dm, ["s1", "s2", "s3"]), ([2, 3.2], [("s1", "s2"), ("s2", "s3")]) ) self.assertEqual( get_adjacent_distances(dm_header, dm, ["s1", "s3", "s2", "s1"]), ([4, 3.2, 2], [("s1", "s3"), ("s3", "s2"), ("s2", "s1")]), ) # mixed valid and invalid distances ignores invalid distances self.assertEqual( get_adjacent_distances(dm_header, dm, ["s1", "s3", "s4", "s5", "s6", "s2", "s1"]), ([4, 3.2, 2], [("s1", "s3"), ("s3", "s2"), ("s2", "s1")]), ) # strict=True results in missing sample ids raising an error self.assertRaises( ValueError, get_adjacent_distances, dm_header, dm, ["s1", "s3", "s4", "s5", "s6", "s2", "s1"], strict=True )
def rank_adjacent_unifrac(sid_data, dm_header, dm_data, map_data, time_field="WeeksSinceStart", line_color="blue"): disturbed_weeks = [float(e[0]) for e in sid_data if e[2]] if len(disturbed_weeks) < 1: raise ValueError, "Must be some disturbance events." adjacent_distances = get_adjacent_distances(dm_header, dm_data, [e[1] for e in sid_data]) distance_to_disturbance = [] for d, sample_ids in zip(adjacent_distances[0], adjacent_distances[1]): distance_to_disturbance.append((d, map_data[sample_ids[1]]["SampleAntibioticDisturbance"])) distance_to_disturbance.sort() result = [] for i, dd in enumerate(distance_to_disturbance): result.append((i, dd[1], dd[0])) return result
def test_get_adjacent_distances(self): """ extracting adjacent distances works as expected """ dm_str = ["\ts1\ts2\ts3", "s1\t0\t2\t4", "s2\t2\t0\t3.2", "s3\t4\t3.2\t0"] dm_header, dm = parse_distmat(dm_str) # error cases: fewer than 2 valid sample ids self.assertRaises(ValueError, get_adjacent_distances,dm_header, dm, []) self.assertRaises(ValueError, get_adjacent_distances,dm_header, dm, ['s1']) self.assertRaises(ValueError, get_adjacent_distances,dm_header, dm, ['s0','s1']) self.assertRaises(ValueError, get_adjacent_distances,dm_header, dm, ['s1','s4']) # one pair of valid distances self.assertEqual(get_adjacent_distances(dm_header, dm, ['s1','s2']), ([2],[('s1','s2')])) self.assertEqual(get_adjacent_distances(dm_header, dm, ['s1','s1']), ([0],[('s1','s1')])) self.assertEqual(get_adjacent_distances(dm_header, dm, ['s1','s3']), ([4],[('s1','s3')])) self.assertEqual(get_adjacent_distances(dm_header, dm, ['s2','s3']), ([3.2],[('s2','s3')])) # multiple valid distances self.assertEqual(get_adjacent_distances(dm_header, dm, ['s1','s2','s3']), ([2,3.2],[('s1','s2'),('s2','s3')])) self.assertEqual(get_adjacent_distances(dm_header, dm, ['s1','s3','s2','s1']), ([4,3.2,2],[('s1','s3'),('s3','s2'),('s2','s1')])) # mixed valid and invalid distances ignores invalid distances self.assertEqual(get_adjacent_distances(dm_header, dm, ['s1','s3','s4','s5','s6','s2','s1']), ([4,3.2,2],[('s1','s3'),('s3','s2'),('s2','s1')])) # strict=True results in missing sample ids raising an error self.assertRaises(ValueError,get_adjacent_distances, dm_header, dm, ['s1','s3','s4','s5','s6','s2','s1'], strict=True)