def _get_sorted_index(self): """Get index of samples indexed by reference and ordered by start pos. :returns: {ref_name: [sample dicts sorted by start]} """ ref_names = defaultdict(list) for key, f in self.samples: d = Sample.decode_sample_name(key) if d is not None: d['key'] = key d['filename'] = f ref_names[d['ref_name']].append(d) # sort dicts so that refs are in order and within a ref, chunks are in order ref_names_ordered = OrderedDict() get_major_minor = lambda x: tuple((int(i) for i in x.split('.'))) # sort by start and -end so that if we have two samples with the same # start but differrent end points, the longest sample comes first sorter = lambda x: (get_major_minor(x['start']) + tuple((-i for i in get_major_minor(x['end'])))) for ref_name in sorted(ref_names.keys()): ref_names[ref_name].sort(key=sorter) ref_names_ordered[ref_name] = ref_names[ref_name] return ref_names_ordered
def test_decode_sample_name(self): expected = [ { 'ref_name': 'contig1', 'start': '0.0', 'end': '4.3' }, { 'ref_name': 'contig1', 'start': '4.1', 'end': '7.0' }, ] for expt, sample in zip(expected, self.samples): self.assertEqual(expt, Sample.decode_sample_name(sample.name))
def _get_sorted_index(self): """Get index of samples indexed by reference and ordered by start pos. :returns: {ref_name: [sample dicts sorted by start]} """ ref_names = defaultdict(list) for key, f in self.samples: d = Sample.decode_sample_name(key) if d is not None: d['key'] = key d['filename'] = f ref_names[d['ref_name']].append(d) # sort dicts so that refs are in order and within a ref, chunks are in order ref_names_ordered = OrderedDict() for ref_name in sorted(ref_names.keys()): sorter = lambda x: float(x['start']) ref_names[ref_name].sort(key=sorter) ref_names_ordered[ref_name] = ref_names[ref_name] return ref_names_ordered