def test_rectangular(self): w0 = np.array([[19, 95, 9, 43, 62, 90, 10, 77, 71, 27], [26, 30, 88, 78, 87, 2, 14, 71, 78, 11], [48, 70, 26, 82, 32, 16, 36, 26, 42, 79], [47, 46, 93, 66, 38, 20, 73, 39, 55, 51], [ 1, 81, 31, 49, 20, 24, 95, 80, 82, 11], [81, 48, 35, 54, 35, 55, 27, 87, 96, 7], [42, 17, 60, 73, 37, 36, 79, 3, 60, 82], [14, 57, 23, 69, 93, 78, 56, 49, 83, 36], [11, 37, 24, 70, 62, 35, 64, 18, 99, 20]]) la0 = LinearAssignment(w0) w1 = np.array([[19, 95, 9, 43, 62, 90, 10, 77, 71, 27], [26, 30, 88, 78, 87, 2, 14, 71, 78, 11], [48, 70, 26, 82, 32, 16, 36, 26, 42, 79], [47, 46, 93, 66, 38, 20, 73, 39, 55, 51], [ 1, 81, 31, 49, 20, 24, 95, 80, 82, 11], [81, 48, 35, 54, 35, 55, 27, 87, 96, 7], [42, 17, 60, 73, 37, 36, 79, 3, 60, 82], [14, 57, 23, 69, 93, 78, 56, 49, 83, 36], [11, 37, 24, 70, 62, 35, 64, 18, 99, 20], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,]]) la1 = LinearAssignment(w1) self.assertEqual(len(la1.solution), 10) self.assertEqual(la0.min_cost, la1.min_cost) self.assertRaises(ValueError, LinearAssignment, w0.T)
def test(self): w0 = np.array( [ [19, 95, 9, 43, 62, 90, 10, 77, 71, 27], [26, 30, 88, 78, 87, 2, 14, 71, 78, 11], [48, 70, 26, 82, 32, 16, 36, 26, 42, 79], [47, 46, 93, 66, 38, 20, 73, 39, 55, 51], [1, 81, 31, 49, 20, 24, 95, 80, 82, 11], [81, 48, 35, 54, 35, 55, 27, 87, 96, 7], [42, 17, 60, 73, 37, 36, 79, 3, 60, 82], [14, 57, 23, 69, 93, 78, 56, 49, 83, 36], [11, 37, 24, 70, 62, 35, 64, 18, 99, 20], [73, 11, 98, 50, 19, 96, 61, 73, 98, 14], ] ) w1 = np.array( [ [95, 60, 89, 38, 36, 38, 58, 94, 66, 23], [37, 0, 40, 58, 97, 85, 18, 54, 86, 21], [9, 74, 11, 45, 65, 64, 27, 88, 24, 26], [58, 90, 6, 36, 17, 21, 2, 12, 80, 90], [33, 0, 74, 75, 11, 84, 34, 7, 39, 0], [17, 61, 94, 68, 27, 41, 33, 86, 59, 2], [61, 94, 36, 53, 66, 33, 15, 87, 97, 11], [22, 20, 57, 69, 15, 9, 15, 8, 82, 68], [40, 0, 13, 61, 67, 40, 29, 25, 72, 44], [13, 97, 97, 54, 5, 30, 44, 75, 16, 0], ] ) w2 = np.array( [ [34, 44, 72, 13, 10, 58, 16, 1, 10, 61], [54, 70, 99, 4, 64, 0, 15, 94, 39, 46], [49, 21, 80, 68, 96, 58, 24, 87, 79, 67], [86, 46, 58, 83, 83, 56, 83, 65, 4, 96], [48, 95, 64, 34, 75, 82, 64, 47, 35, 19], [11, 49, 6, 57, 80, 26, 47, 63, 75, 75], [74, 7, 15, 83, 64, 26, 78, 17, 67, 46], [19, 13, 2, 26, 52, 16, 65, 24, 2, 98], [36, 7, 93, 93, 11, 39, 94, 26, 46, 69], [32, 95, 37, 50, 97, 96, 12, 70, 40, 93], ] ) la0 = LinearAssignment(w0) self.assertEqual(la0.min_cost, 194, "Incorrect cost") la1 = LinearAssignment(w1) self.assertEqual(la0.min_cost, la0.min_cost, "Property incorrect") self.assertEqual(la1.min_cost, 125, "Incorrect cost") la2 = LinearAssignment(w2) self.assertEqual(la2.min_cost, 110, "Incorrect cost")
def _cart_dists(self, s1, s2, avg_lattice, mask, normalization): """ Finds a matching in cartesian space. Finds an additional fractional translation vector to minimize RMS distance Args: s1, s2: numpy arrays of fractional coordinates. len(s1) >= len(s2) avg_lattice: Lattice on which to calculate distances mask: numpy array of booleans. mask[i, j] = True indicates that s2[i] cannot be matched to s1[j] normalization (float): inverse normalization length Returns: Distances from s2 to s1, normalized by (V/Natom) ^ 1/3 Fractional translation vector to apply to s2. Mapping from s1 to s2, i.e. with numpy slicing, s1[mapping] => s2 """ if len(s2) > len(s1): raise ValueError("s1 must be larger than s2") if mask.shape != (len(s2), len(s1)): raise ValueError("mask has incorrect shape") mask_val = 1e10 * self.stol / normalization #vectors are from s2 to s1 vecs = pbc_shortest_vectors(avg_lattice, s2, s1) vecs[mask] = mask_val d_2 = np.sum(vecs**2, axis=-1) lin = LinearAssignment(d_2) s = lin.solution short_vecs = vecs[np.arange(len(s)), s] translation = np.average(short_vecs, axis=0) f_translation = avg_lattice.get_fractional_coords(translation) new_d2 = np.sum((short_vecs - translation)**2, axis=-1) return new_d2**0.5 * normalization, f_translation, s
def test_boolean_inputs(self): w = np.ones((135, 135), dtype=bool) np.fill_diagonal(w, False) la = LinearAssignment(w) # if the input doesn't get converted to a float, the masking # doesn't work properly self.assertEqual(la.orig_c.dtype, np.float64)
def _strict_match(self, struct1, struct2, fu, s1_supercell=True, use_rms=False, break_on_match=False): """ Matches struct2 onto struct1 (which should contain all sites in struct2). Args: struct1, struct2 (Structure): structures to be matched fu (int): size of supercell to create s1_supercell (bool): whether to create the supercell of struct1 (vs struct2) use_rms (bool): whether to minimize the rms of the matching break_on_match (bool): whether to stop search at first valid match """ if fu < 1: raise ValueError("fu cannot be less than 1") mask, s1_t_inds, s2_t_ind = self._get_mask(struct1, struct2, fu, s1_supercell) if mask.shape[0] > mask.shape[1]: raise ValueError('after supercell creation, struct1 must ' 'have more sites than struct2') # check that a valid mapping exists if not self._subset and mask.shape[1] != mask.shape[0]: return None if LinearAssignment(mask).min_cost > 0: return None best_match = None # loop over all lattices for s1fc, s2fc, avg_l, sc_m in \ self._get_supercells(struct1, struct2, fu, s1_supercell): # compute fractional tolerance normalization = (len(s1fc) / avg_l.volume) ** (1/3) inv_abc = np.array(avg_l.reciprocal_lattice.abc) frac_tol = inv_abc * self.stol / (np.pi * normalization) # loop over all translations for s1i in s1_t_inds: t = s1fc[s1i] - s2fc[s2_t_ind] t_s2fc = s2fc + t if self._cmp_fstruct(s1fc, t_s2fc, frac_tol, mask): dist, t_adj, mapping = self._cart_dists( s1fc, t_s2fc, avg_l, mask, normalization) if use_rms: val = np.linalg.norm(dist) / len(dist) ** 0.5 else: val = max(dist) if best_match is None or val < best_match[0]: total_t = t + t_adj total_t -= np.round(total_t) best_match = val, dist, sc_m, total_t, mapping if (break_on_match or val < 1e-5) and val < self.stol: return best_match if best_match and best_match[0] < self.stol: return best_match
def get_ionic_pol_change(struct2, struct1, psp_table, extra_trans=None): """should already be translated""" LOGGER.info("Finding ionic change in polarization") if extra_trans is None: extra_trans = np.array([0., 0., 0.]) else: # convert extra_trans to cartesian extra_trans = np.array(extra_trans) * np.array(struct2.lattice.abc) mask = get_mask(struct2, struct1) vecs, d_2 = pbc_shortest_vectors(struct2.lattice, struct2.frac_coords, struct1.frac_coords, mask, return_d2=True, lll_frac_tol=[0.4, 0.4, 0.4]) lin = LinearAssignment(d_2) s = lin.solution species = [struct1[i].species_string for i in s] short_vecs = vecs[np.arange(len(s)), s] LOGGER.debug("Displacements:") LOGGER.debug(short_vecs) pol_change = np.array([0., 0., 0.]) for v, sp in zip(short_vecs, species): pol_change += (v - extra_trans) * psp_table.pseudo_with_symbol(sp).Z_val LOGGER.debug("{}\t{}\t{}".format( sp, psp_table.pseudo_with_symbol(sp).Z_val, v)) return (ECHARGE * 10**20) * pol_change / struct2.lattice.volume
def _cmp_fractional_struct(self, s1, s2, frac_tol, mask): #ensure that we always calculate distances from the subset #to the superset if len(s1) > len(s2): s_superset, s_subset = s1, s2 else: s_superset, s_subset = s2, s1 mask = mask.T #compares the fractional coordinates mask_val = 3 * len(s_superset) #distance from subset to superset dist = s_superset[None, :] - s_subset[:, None] dist = abs(dist - np.round(dist)) dist[np.where(dist > frac_tol[None, None, :])] = mask_val cost = np.sum(dist, axis=-1) cost[mask] = mask_val if np.max(np.min(cost, axis=1)) >= mask_val: return False if self._subset: n = len(s_superset) square_cost = np.zeros((n, n)) square_cost[:cost.shape[0], :cost.shape[1]] = cost cost = square_cost lin = LinearAssignment(cost) if lin.min_cost >= mask_val: return False return True
def _cmp_fstruct(self, s1, s2, frac_tol, mask): """ Returns true if a matching exists between s2 and s2 under frac_tol. s2 should be a subset of s1 """ if len(s2) > len(s1): raise ValueError("s1 must be larger than s2") if mask.shape != (len(s2), len(s1)): raise ValueError("mask has incorrect shape") mask_val = 3 * len(s1) #distance from subset to superset dist = s1[None, :] - s2[:, None] dist = abs(dist - np.round(dist)) dist[dist > frac_tol[None, None, :]] = mask_val cost = np.sum(dist, axis=-1) cost[mask] = mask_val #maximin is a lower bound on linear assignment #(and faster to compute) if np.max(np.min(cost, axis=1)) >= mask_val: return False return LinearAssignment(cost).min_cost < mask_val
def test_small_range(self): # can be tricky for the augment step x = np.array([[4, 5, 5, 6, 8, 4, 7, 4, 7, 8], [5, 6, 6, 6, 7, 6, 6, 5, 6, 7], [4, 4, 5, 7, 7, 4, 8, 4, 7, 7], [6, 7, 6, 6, 7, 6, 6, 6, 6, 6], [4, 4, 4, 6, 6, 4, 7, 4, 7, 7], [4, 5, 5, 6, 8, 4, 7, 4, 7, 8], [5, 7, 5, 5, 5, 6, 4, 5, 4, 6], [8, 9, 8, 4, 5, 9, 4, 8, 4, 4], [5, 6, 6, 6, 7, 6, 6, 5, 6, 7], [5, 6, 6, 6, 7, 6, 6, 5, 6, 7]]) self.assertAlmostEqual(LinearAssignment(x).min_cost, 48)
def _cmp_fractional_struct(self, s1, s2, frac_tol): #compares the fractional coordinates for s1_coords, s2_coords in zip(s1, s2): dist = s1_coords[:, None] - s2_coords[None, :] dist = abs(dist - np.round(dist)) dist[np.where(dist > frac_tol[None, None, :])] = 3 * len(dist) cost = np.sum(dist, axis=-1) if np.max(np.min(cost, axis=0)) >= 3 * len(dist): return False lin = LinearAssignment(cost) if lin.min_cost >= 3 * len(dist): return False return True
def _cmp_cartesian_struct(self, s1, s2, l1, l2): """ Once a fit is found, a rms minimizing fit is done to ensure the fit is correct. To do this, 1) The structures are placed into an average lattice 2) All sites are shifted by the mean displacement vector between matched sites. 3) calculate distances 4) return rms distance normalized by (V/Natom) ^ 1/3 and the maximum distance found """ nsites = sum(map(len, s1)) avg_params = (np.array(l1.lengths_and_angles) + np.array(l2.lengths_and_angles)) / 2 avg_lattice = Lattice.from_lengths_and_angles(avg_params[0], avg_params[1]) dist = np.zeros([nsites, nsites]) + 100 * nsites vec_matrix = np.zeros([nsites, nsites, 3]) i = 0 for s1_coords, s2_coords in zip(s1, s2): j = len(s1_coords) vecs = pbc_shortest_vectors(avg_lattice, s1_coords, s2_coords) distances = (np.sum(vecs**2, axis=-1))**0.5 dist[i:i + j, i:i + j] = distances vec_matrix[i:i + j, i:i + j] = vecs i += j lin = LinearAssignment(dist) inds = np.arange(nsites) shortest_vecs = vec_matrix[inds, lin.solution, :] shortest_vec_square = np.sum( (shortest_vecs - np.average(shortest_vecs, axis=0))**2, -1) norm_length = (avg_lattice.volume / nsites)**(1 / 3) rms = np.average(shortest_vec_square)**0.5 / norm_length max_dist = np.max(shortest_vec_square)**0.5 / norm_length return rms, max_dist
def _cart_dists(self, s1, s2, l1, l2, mask): """ Finds the cartesian distances normalized by (V/Natom) ^ 1/3 between two structures on the average lattice of l1 and l2 s_superset and s_subset are lists of fractional coordinates. Minimizes the RMS distance of the matching with an additional translation (but doesn't change the mapping) returns distances, fractional_translation vector """ #ensure that we always calculate distances from the subset #to the superset if len(s1) > len(s2): s_superset, s_subset, mult = s1, s2, 1 else: s_superset, s_subset, mult = s2, s1, -1 mask = mask.T #create the average lattice avg_params = (np.array(l1.lengths_and_angles) + np.array(l2.lengths_and_angles)) / 2 avg_lattice = Lattice.from_lengths_and_angles(*avg_params) norm_length = (avg_lattice.volume / len(s_superset))**(1 / 3) mask_val = 1e20 * norm_length * self.stol all_d_2 = np.zeros([len(s_superset), len(s_superset)]) vec_matrix = np.zeros([len(s_superset), len(s_superset), 3]) #vectors from subset to superset #1st index subset, 2nd index superset vecs = pbc_shortest_vectors(avg_lattice, s_subset, s_superset) vec_matrix[:len(s_subset), :len(s_superset)] = vecs vec_matrix[mask] = mask_val d_2 = (np.sum(vecs**2, axis=-1)) all_d_2[:len(s_subset), :len(s_superset)] = d_2 all_d_2[mask] = mask_val lin = LinearAssignment(all_d_2) inds = np.arange(len(s_subset)) #shortest vectors from the subset to the superset shortest_vecs = vec_matrix[inds, lin.solution[:len(s_subset)], :] translation = np.average(shortest_vecs, axis=0) f_translation = avg_lattice.get_fractional_coords(translation) shortest_distances = np.sum((shortest_vecs - translation)**2, -1)**0.5 return shortest_distances / norm_length, f_translation * mult
def another_test_case(self): w1 = np.array([[0.03900238875468465, 0.003202415721817453, 0.20107156847937024, 0.0, 0.5002116398420846, 0.11951326861160616, 0.0, 0.5469032363997579, 0.3243791041219123, 0.1119882291981289], [0.6048342640688928, 0.3847629088356139, 0.0, 0.44358269535118944, 0.45925670625165016, 0.31416882324798145, 0.8065128182180494, 0.0, 0.26153475286065075, 0.6862799559241944], [0.5597215814025246, 0.15133664165478322, 0.0, 0.6218101659263295, 0.15438455134183793, 0.17281467064043232, 0.8458127968475472, 0.020860721537078075, 0.1926886361228456, 0.0], [0.0, 0.0, 0.6351848838666995, 0.21261247074659906, 0.4811603832432241, 0.6663733668270337, 0.63970145187428, 0.1415815172623256, 0.5294574133825874, 0.5576702829768786], [0.25052904388309016, 0.2309392544588127, 0.0656162006684271, 0.0248922362001176, 0.0, 0.2101808638720748, 0.6529031699724193, 0.1503003886507902, 0.375576165698992, 0.7368328849560374], [0.0, 0.042215873587668984, 0.10326920761908365, 0.3562551151517992, 0.9170343984958856, 0.818783531026254, 0.7896770426052844, 0.0, 0.6573135097946438, 0.17806189728574429], [0.44992199118890386, 0.0, 0.38548898339412585, 0.6269193883601244, 1.0022861602564634, 0.0, 0.1869765500803764, 0.03474156273982543, 0.3715310534696664, 0.6197122486230232], [0.37939853696836545, 0.2421427374018027, 0.5586150342727723, 0.0, 0.7171485794073893, 0.8021029235865014, 0.11213464903613135, 0.6497896761660467, 0.3274108706187846, 0.0], [0.6674685746225324, 0.5347953626128863, 0.11461835366075113, 0.0, 0.8170639855163434, 0.7291931505979982, 0.3149153087053108, 0.1008681103294512, 0.0, 0.18751172321112997], [0.6985944652913342, 0.6139921045056471, 0.0, 0.4393266955771965, 0.0, 0.47265399761400695, 0.3674241844351025, 0.04731761392352629, 0.21484886069716147, 0.16488710920126137]]) la = LinearAssignment(w1) self.assertAlmostEqual(la.min_cost, 0)
def _find_match(self, struct1, struct2, break_on_match=False, use_rms=False, niggli=True): """ Finds the best match between two structures. Typically, 'best' is determined by minimax cartesian distance on the average lattice Args: struct1: 1st structure struct2: 2nd structure break_on_match: If true, breaks once the max distance is below the stol (RMS distance if use_rms is true) use_rms: If True, finds the match that minimizes RMS instead of minimax niggli: whether to compute the niggli cells of the input structures Returns: the value, distances, s2 lattice, and s2 translation vector for the best match """ struct1 = Structure.from_sites(struct1.sites) struct2 = Structure.from_sites(struct2.sites) if (self._comparator.get_structure_hash(struct1) != self._comparator.get_structure_hash(struct2) and not self._subset): return None #primitive cell transformation if self._primitive_cell and struct1.num_sites != struct2.num_sites: struct1 = struct1.get_primitive_structure() struct2 = struct2.get_primitive_structure() if self._supercell: fu = self._get_supercell_size(struct1, struct2) #force struct1 to be the larger one if fu < 1: struct2, struct1 = struct1, struct2 fu = 1 / fu fu = int(round(fu)) else: fu = 1 #can't do the check until we group with the comparator if (not self._subset) and struct1.num_sites != struct2.num_sites * fu: return None # Get niggli reduced cells. Though technically not necessary, this # minimizes cell lengths and speeds up the matching of skewed # cells considerably. if niggli: struct1 = struct1.get_reduced_structure(reduction_algo="niggli") struct2 = struct2.get_reduced_structure(reduction_algo="niggli") nl1 = struct1.lattice nl2 = struct2.lattice #rescale lattice to same volume if self._scale: ratio = (fu * nl2.volume / nl1.volume)**(1 / 6) nl1 = Lattice(nl1.matrix * ratio) struct1.modify_lattice(nl1) nl2 = Lattice(nl2.matrix / ratio) struct2.modify_lattice(nl2) #fractional tolerance of atomic positions (2x for initial fitting) normalization = ((2 * struct2.num_sites * fu) / (struct1.volume + struct2.volume * fu))**(1 / 3) frac_tol = np.array(struct1.lattice.reciprocal_lattice.abc) * \ self.stol / ((1 - self.ltol) * np.pi) / normalization #make array mask mask = np.zeros((len(struct2) * fu, len(struct1)), dtype=np.bool) i = 0 for site2 in struct2: for repeat in range(fu): for j, site1 in enumerate(struct1): mask[i, j] = not self._comparator.are_equal( site2.species_and_occu, site1.species_and_occu) i += 1 #check that there is some valid mapping between sites nmax = max(mask.shape) sq_mask = np.zeros((nmax, nmax)) sq_mask[mask] = 10000 if LinearAssignment(sq_mask).min_cost > 0: return None #find the best sites for the translation vector num_s1_invalid_matches = np.sum(mask, axis=1) s2_translation_index = np.argmax(num_s1_invalid_matches) s1_translation_indices = np.argwhere( mask[s2_translation_index] == 0).flatten() s1fc = np.array(struct1.frac_coords) s2cc = np.array(struct2.cart_coords) best_match = None for nl in self._get_lattices(struct1, struct2, fu): #if supercell needs to be created, update s2_cart if self._supercell and fu > 1: scale_matrix = np.round(np.dot(nl.matrix, nl2.inv_matrix)) supercell = struct2.copy() supercell.make_supercell(scale_matrix.astype('int')) s2fc = np.array(supercell.frac_coords) else: s2fc = nl.get_fractional_coords(s2cc) #loop over possible translations for s1i in s1_translation_indices: translation = s1fc[s1i] - s2fc[s2_translation_index] t_s2fc = s2fc + translation if self._cmp_fractional_struct(s1fc, t_s2fc, frac_tol, mask): distances, t = self._cart_dists(s1fc, t_s2fc, nl, nl1, mask) if use_rms: val = np.linalg.norm(distances) / len(distances)**0.5 else: val = max(distances) if best_match is None or val < best_match[0]: total_translation = translation + t total_translation -= np.round(total_translation) best_match = val, distances, nl, total_translation if break_on_match and val < self.stol: return best_match if best_match and best_match[0] < self.stol: return best_match