def test_bit_count1(self): a = bitarray('001111') aa = a.copy() b = bitarray('010011') bb = b.copy() self.assertEqual(count_and(a, b), 2) self.assertEqual(count_or(a, b), 5) self.assertEqual(count_xor(a, b), 3) for f in count_and, count_or, count_xor: # not two arguments self.assertRaises(TypeError, f) self.assertRaises(TypeError, f, a) self.assertRaises(TypeError, f, a, b, 3) # wrong argument types self.assertRaises(TypeError, f, a, '') self.assertRaises(TypeError, f, '1', b) self.assertRaises(TypeError, f, a, 4) self.assertEQUAL(a, aa) self.assertEQUAL(b, bb) b.append(1) for f in count_and, count_or, count_xor: self.assertRaises(ValueError, f, a, b) self.assertRaises(ValueError, f, bitarray('110', 'big'), bitarray('101', 'little'))
def test_bit_count_random(self): for n in list(range(50)) + [randint(1000, 2000)]: a = urandom(n) b = urandom(n) self.assertEqual(count_and(a, b), (a & b).count()) self.assertEqual(count_or(a, b), (a | b).count()) self.assertEqual(count_xor(a, b), (a ^ b).count())
def test_count_byte(self): ones = bitarray(8) ones.setall(1) zeros = bitarray(8) zeros.setall(0) for i in range(0, 256): a = bitarray() a.frombytes(bytes(bytearray([i]))) cnt = a.count() self.assertEqual(count_and(a, zeros), 0) self.assertEqual(count_and(a, ones), cnt) self.assertEqual(count_and(a, a), cnt) self.assertEqual(count_or(a, zeros), cnt) self.assertEqual(count_or(a, ones), 8) self.assertEqual(count_or(a, a), cnt) self.assertEqual(count_xor(a, zeros), cnt) self.assertEqual(count_xor(a, ones), 8 - cnt) self.assertEqual(count_xor(a, a), 0)
def test_bit_count2(self): for n in list(range(50)) + [randint(1000, 2000)]: a = bitarray() a.frombytes(os.urandom(bits2bytes(n))) del a[n:] b = bitarray() b.frombytes(os.urandom(bits2bytes(n))) del b[n:] self.assertEqual(count_and(a, b), (a & b).count()) self.assertEqual(count_or(a, b), (a | b).count()) self.assertEqual(count_xor(a, b), (a ^ b).count())
def test_bit_count_frozen(self): a = frozenbitarray('001111') b = frozenbitarray('010011') self.assertEqual(count_and(a, b), 2) self.assertEqual(count_or(a, b), 5) self.assertEqual(count_xor(a, b), 3)
def main(): # >>>> Debugging <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # import argparse # args = argparse.Namespace() # args.trajectory = '../trajs/aligned_tau.dcd' # args.topology = '../trajs/aligned_tau.pdb' # args.nclust = np.inf # args.min_clust_size = 2 # args.first = 1000 # args.last = 6000 # args.stride = 3 # args.selection = 'all' # args.cutoff = 4 # args.outdir = 'bitQT_outputs' # ========================================================================= # 1. Creating binary matrix (adjacency list) # ========================================================================= # ++++ Get adjacency matrix of trajectory as list of bitarrays ++++++++++++ args = parse_arguments() try: os.makedirs(args.outdir) except FileExistsError: raise Exception('{} directory already exists.'.format(args.outdir) + 'Please specify another location or rename it.') trajectory = load_raw_traj(args.trajectory, valid_trajs, args.topology) trajectory = shrink_traj_selection(trajectory, args.selection) N1 = trajectory.n_frames trajectory = shrink_traj_range(args.first, args.last, args.stride, trajectory) trajectory.center_coordinates() matrix = calc_rmsd_matrix(trajectory, args) # ++++ Tracking clust/uNCLUSTERed bits to avoid re-computations +++++++++++ N = len(matrix[0]) m = len(matrix) unclust_bit = ba(N) unclust_bit.setall(1) clustered_bit = unclust_bit.copy() clustered_bit.setall(0) zeros = np.zeros(N, dtype=np.int32) # ++++ Save clusters in an array (1 .. N) +++++++++++++++++++++++++++++++++ clusters_array = np.zeros(N, dtype=np.int32) NCLUSTER = 0 clustered = set() nmembers = [] # ++++ Coloring ordered vertices (1 .. N) +++++++++++++++++++++++++++++++++ degrees = calc_matrix_degrees(unclust_bit, matrix) ordered_by_degs = degrees.argsort()[::-1] colors = colour_matrix(ordered_by_degs, matrix) # colors[np.frombuffer(clustered_bit.unpack(), dtype=np.bool)] = 0 # ========================================================================= # 2. Main algorithm: BitQT ! # ========================================================================= while True: NCLUSTER += 1 # ++++ Find a big clique early ++++++++++++++++++++++++++++++++++++++++ big_node = degrees.argmax() bit_clique, big_clique = do_bit_cascade(big_node, degrees, colors, matrix, 0) big_clique_size = big_clique.size # ++++ Find promising nodes +++++++++++++++++++++++++++++++++++++++++++ biggers = degrees > big_clique_size biggers[big_clique] = False cluster_colors = colors[big_clique] biggers_colors = colors[biggers] promising_colors = np.setdiff1d(biggers_colors, cluster_colors) promising_nodes = deque() for x in promising_colors: promising_nodes.extend(((colors == x) & biggers).nonzero()[0]) # ++++ Explore all promising nodes ++++++++++++++++++++++++++++++++++++ cum_found = big_clique while promising_nodes: node = promising_nodes.popleft() try: bit_clique, clique = do_bit_cascade(node, degrees, colors, matrix, big_clique_size) CLIQUE_SIZE = len(clique) except TypeError: CLIQUE_SIZE = 0 # ++++ Cumulative update only if biggers candidates are found +++++ if CLIQUE_SIZE > big_clique_size: big_node = node big_clique = clique big_clique_size = big_clique.size # ++++ Repeat previous condition ++++++++++++++++++++++++++++++ cum_found = np.concatenate((cum_found, big_clique)) biggers = degrees > big_clique_size biggers[cum_found] = False cluster_colors = colors[big_clique] biggers_colors = colors[biggers] promising_colors = np.setdiff1d(biggers_colors, cluster_colors) promising_nodes = deque() for x in promising_colors: promising_nodes.extend(((colors == x) & biggers).nonzero()[0]) nmembers.append(big_clique_size) if (big_clique_size < args.min_clust_size) or (NCLUSTER == args.nclust): break # ++++ Save new cluster & update NCLUSTER +++++++++++++++++++++++++++++ clusters_array[big_clique] = NCLUSTER # ++++ Update (un)clustered_bit +++++++++++++++++++++++++++++++++++++++ clustered.update(big_clique) clustered_bit = set_to_bitarray(clustered, N) unclust_bit = ~clustered_bit # ++++ Hard erasing of clustered frames from matrix +++++++++++++++++++ degrees = zeros.copy() for x in unclust_bit[:m].itersearch(ba('1')): degrees[x] = matrix[x].count() if bu.count_and(matrix[x], clustered_bit): matrix[x] &= (matrix[x] ^ clustered_bit) # ========================================================================= # 3. Output # ========================================================================= # saving pickle for api debugging tests outname = os.path.basename(args.topology).split('.')[0] pickle_to_file(clusters_array, os.path.join(args.outdir, '{}.pick'.format(outname))) # saving VMD visualization script to_VMD(args.outdir, args.topology, args.first, args.last, N1, args.stride, clusters_array[:m]) # saving clustering info files frames_stats = get_frames_stats(N1, args.first, args.last, args.stride, clusters_array[:m], args.outdir) cluster_stats = get_cluster_stats(clusters_array[:m], args.outdir) print('\n\nNormal Termination of BitQT :)')
def bitclusterize(matrix, degrees, args): ''' DESCRIPTION Clusters the bit matrix using bitwise operations. Args: matrix (list): list of bitarrays degrees (collections.OrderedDict): dict of bitarrays lenghts. Return: clusters (numpy.ndarray): array of clusters ID. leaders (list) : list of clusters´ centers ID. ''' degrees = np.asarray([degrees[x] for x in degrees]) # Memory allocation for clusters container -------------------------------- clusters = np.empty(len(matrix), dtype='int32') clusters.fill(-1) # Declare all bits as available ------------------------------------------- available_bits = ba(int(len(degrees))) available_bits.setall('1') # Start iterative switching ----------------------------------------------- leaders = [] clust_id = 0 ncluster = -1 while True: ncluster += 1 # Break 0: break if max number of cluster was reached ----------------- if ncluster > args.max_clust: break # Find the biggest cluster -------------------------------------------- leader = degrees.argmax() # Break 1: all candidates cluster have degree 1 (can´t clusterize) ---- if degrees.sum() == np.nonzero(degrees)[0].size: # return clusters, leaders break biggest_cluster = matrix[leader] & available_bits biggest_cluster_list = np.frombuffer(biggest_cluster.unpack(), dtype=np.bool) # Break 2: all candidates cluster have degree < minsize --------------- if biggest_cluster_list.sum() < args.minsize: # return clusters, leaders break # Break 3: No more candidates available (empty matrix) ---------------- if degrees.sum() == 0: # return clusters, leaders break degrees[biggest_cluster_list] = 0 available_bits = (available_bits ^ biggest_cluster) & available_bits if biggest_cluster.count() <= 1: leaders.append(-1) # return clusters, leaders break else: leaders.append(leader) # Assign next cluster ID ------------------------------------------ clusters[biggest_cluster_list] = clust_id clust_id += 1 # Update degrees of unclustered frames -------------------------------- for degree in available_bits.itersearch(ba('1')): # degrees[degree] = ba.fast_hw_and(available_bits, matrix[degree]) degrees[degree] = bau.count_and(available_bits, matrix[degree]) return clusters, leaders