def diploid_sample_from_labels(ls=None, ref=None, hp1=None, hp2=None): """Create `medaka.common.Sample` objects from a specified reference string and haplotype strings for easily mocking variant calling scenarios. """ assert len(ref) == len(hp1) == len(hp2) pos = mock_positions_array(ref) # mocking up the network output in terms of 0. and 1. # in reality they would be float probabilities probs = np.zeros((len(pos), len(ls._decoding))) for i in range(len(ref)): diploid_label = tuple(sorted((hp1[i], hp2[i]))) probs[i, ls._encoding[diploid_label]] = 1 s = common.Sample(ref_name='contig1', features=None, labels=None, ref_seq=None, positions=pos, label_probs=probs) return s, ref
def test_decode_consensus(self): """Test the conversion between network outputs and sequence""" num_classes = 13 # 3 elements per base * 4 bases + * label_probs = np.zeros([6, num_classes]) label_probs[0, 10] = 0.9 # decodes to (T, 1) label_probs[1, 5] = 0.8 # (C, 2) label_probs[2, 0] = 0.81 # (*, 1) label_probs[3, 3] = 0.95 # (A, 3) label_probs[4, 8] = 0.9 # (G, 2) label_probs[5, 5] = 0.9 # (C, 2) mock = common.Sample(None, None, None, None, None, label_probs) expected = 'TCCAAAGGCC' got = self.ls.decode_consensus(mock) self.assertEqual(expected, got)
def haploid_sample_from_labels(ls=None, ref=None, pri=None, sec=None, pri_prob=0.6, sec_prob=0.3): """Create `medaka.common.Sample` objects from a specified reference string and predicted sequence string for easily mocking variant calling scenarios. """ assert len(ref) == len(pri) if sec is not None: assert len(ref) == len(sec) pos = mock_positions_array(ref) probs = np.zeros((len(pos), len(ls._decoding))) if sec is None: pri_prob = pri_prob + sec_prob for i, l in enumerate(pri): probs[i, ls._encoding[(l, )]] = pri_prob if sec is not None: assert sec[i] != pri[i] probs[i, ls._encoding[(sec[i], )]] = sec_prob #set another label to have non-zero prob #use the ref if is not in #primary or secondary other_inds = np.where(probs[i] == 0)[0] if ls._encoding[(ref[i], )] in other_inds: other_ind = ls._encoding[(ref[i], )] else: other_ind = other_inds[0] probs[i, other_ind] = 1 - np.sum(probs[i]) s = common.Sample(ref_name='contig1', features=None, labels=None, ref_seq=None, positions=pos, label_probs=probs) return s, ref
def diploid_zygosity_sample_from_labels(ls=None, ref=None, pri=None, sec=None, pri_prob=None, sec_prob=None, het=None): """Create `medaka.common.Sample` objects from a specified reference string and haplotype strings for easily mocking variant calling scenarios. """ assert len(ref) == len(pri) == len(sec) pos = mock_positions_array(ref) probs = np.zeros((len(pos), len(ls._decoding))) for i in range(len(ref)): probs[i, ls._unitary_encoding[(pri[i], )]] = pri_prob probs[i, ls._unitary_encoding[(sec[i], )]] = sec_prob # het is always true probs[i, -1] = int(het[i]) s = common.Sample(ref_name='contig1', features=None, labels=None, ref_seq=None, positions=pos, label_probs=probs) return s, ref def test_snp_metainfo(self): self.assertEqual(len(self.ls.snp_metainfo), 7) def test_variant_metainfo(self): self.assertEqual(len(self.ls.variant_metainfo), 9)