def test_alternating_order_insertion(self): '''Test alternating order generation.''' # Minimum possible depth = [log2(n)]+1 where n=size of data, so here 4 data = np.array([5, 1, 2, 7, 8, 3, 4, 6, 9]) n = np.size(data) # Original list order self._test_tree_depth(data, 5) # Ordered insertion is worst case! sorted_data = sort(data) self._test_tree_depth(sorted_data, 9) # Alternating order is not so good either self._test_tree_depth(sorted_data[alternating_order(n)], 5) # Neither does random order guarantee anything self._test_tree_depth(data[np.random.permutation(n)], 4, 9) # Best order. O(n log n) complexity to sort the list. self._test_tree_depth(sorted_data[optimal_insertion_order(n)], 4) self._test_tree_depth([8, 4, 2, 1, 3, 6, 5, 7, 9], 4) # Best order holds for a general list. All numbers in the list must be different. n = 100 data = np.random.uniform(size=n)#np.random.permutation(n) sorted_data = sort(data) depth = int(np.floor(np.log2(n)))+1 self._test_tree_depth(sorted_data[optimal_insertion_order(n)], depth) # Same code using a util function b = sequence_to_tree(sorted_data) assert_equal(b.depth(), depth, 'Wrong tree depth')
def __init__(self, data, snp, sample_id): ''' Construct a genotype set from data arrays: - snp: SNP metadata record array (contains chromosome, name, morgans, base-pair location) - data: a 3-D genotype data array: (individual x SNP x allele) - sample_id: genotyped individuals' ID set ''' # People's IDs self.sample_id = sample_id self.data = data self._num_snps = self.data.shape[0] self._num_samples = self.data.shape[1] self._snp_range = None # SNP metadata: SNP label, chromosome number, Genetic distance in Morgans, and # base pair location for each SNP self.snp = snp # Base-pair-location to snp-index map, lazily-initialized + cached base_pair = self.snp['base_pair'] self._base_pair = base_pair # np.array([int(base_pair)]) if base_pair.size == 1 else base_pair self._bp_to_snp = dict_invert(dict(enumerate(self._base_pair))) # Construct a BST for fast bp queries self._snp_tree = BinarySearchTree(values=self._base_pair[optimal_insertion_order(self._num_snps)]) self._snp_index_tree = util.list_index_tree(self._base_pair) # A genetic map: lists the two allele letters corresponding to 1 and 2 for each SNP, according # their order in the self.snp array. self.map = [] # General metadata, for easy handling of CGI data self.metadata = [] # samples for which the parent-of-origin phase is determined self.poo_phase = np.zeros((self._num_samples,), dtype=np.byte)