Beispiel #1
0
    def dirs_without_outliers(self, dirs, positions, op_type, use_best_dir):
        def compute_distance_from_line(group_avg_point, test_dir_end):
            def magnitude(point1, point2):
                vec = point2 - point1
                return math.sqrt(vec[0] * vec[0] + vec[1] * vec[1] +
                                 vec[2] * vec[2])

            def get_U(Point, LineStart, LineEnd, LineMag):
                return (((Point[0] - LineStart[0]) *
                         (LineEnd[0] - LineStart[0])) +
                        ((Point[1] - LineStart[1]) *
                         (LineEnd[1] - LineStart[1])) +
                        ((Point[2] - LineStart[2]) *
                         (LineEnd[2] - LineStart[2]))) / (LineMag * LineMag)

            def get_intersect(LineStart, LineEnd, U):
                Intersection = [0, 0, 0]
                Intersection[0] = LineStart[0] + U * (LineEnd[0] -
                                                      LineStart[0])
                Intersection[1] = LineStart[1] + U * (LineEnd[1] -
                                                      LineStart[1])
                Intersection[2] = LineStart[2] + U * (LineEnd[2] -
                                                      LineStart[2])
                return Intersection

            linestart = [0, 0, 0]
            linemag = magnitude(test_dir_end, linestart)
            U = get_U(group_avg_point, linestart, test_dir_end, linemag)
            intersection = get_intersect(linestart, test_dir_end, U)
            return magnitude(group_avg_point, intersection)

        print(
            "======================detecting outliers============================"
        )
        more_dirs = []
        for dir in dirs:
            # 1.Find the distance of each point from the line/plane
            dists = []
            for i in range(len(positions)):
                pos = positions[i]
                if op_type == 'CS':
                    dist = math.fabs(dir[0] * pos[0] + dir[1] * pos[1] +
                                     dir[2] * pos[2])
                else:
                    dist = compute_distance_from_line(pos, dir)
                dists.append(dist)

            # 2. Find median of the distances m
            median = np.median(np.array(dists))
            # 3. for each distance di, if di > 2*m, remove it as an outlier
            with_outliers_removed = list()
            for i in range(len(positions)):
                pos = positions[i]
                dist = dists[i]
                if not (dist / median > 2 or dist / median > 2):
                    with_outliers_removed.append(pos)
            # 4. recompute dirs
            outliers_dirs = self.dir_fit(with_outliers_removed, use_best_dir)
            more_dirs += list(outliers_dirs)
        return np.array(more_dirs)
    def _calculate(self, operation):
        if operation.type == 'CI' or (operation.type == 'SN'
                                      and operation.order == 2):
            dir = [1.0, 0.0, 0.0]
            if operation.type == 'SN':
                op_msg = 'S2'
            else:
                op_msg = 'CI'
            self._log("Operation %s - using just one direction: %s" %
                      (op_msg, dir))
            best_results = self._calculate_for_directions(operation, [dir], 1)
        else:
            if self.selective:
                self._calculate_for_directions(operation,
                                               self._initial_directions, 1)
                best_dirs = []
                sorted_csms = sorted(self.statistics.directions_arr)
                for item in sorted_csms[:self.num_selected]:
                    best_dirs.append(item.start_dir)
                    self._log("Running again on the", self.num_selected,
                              "best directions")
                best_results = self._calculate_for_directions(
                    operation, best_dirs, self._max_iterations)

            else:
                best_results = self._calculate_for_directions(
                    operation, self._initial_directions, self._max_iterations)

        best_result, least_invalid = best_results
        if least_invalid.num_invalid < best_result.num_invalid:
            if least_invalid.csm <= best_result.csm:
                best_result = least_invalid
            else:
                print(
                    "(A result with better preservation of integrity of cycle lengths was found"
                )
                print("Direction: ", least_invalid.dir, " yields a CSM of",
                      format_CSM(least_invalid.csm), "\n",
                      (1 - (least_invalid.num_invalid / len(self._molecule))) *
                      100, "% of the molecule's atoms are in legal cycles)")
        return best_result
    def csm_operation(self, op_type, op_order, molecule, perm=None, timeout=300):
        """
        Calculates minimal csm, directional cosines by applying permutations that keep the similar atoms within the group.
        :param op_type: cannot be CH.
        :param op_order:
        :param molecule:
        :param keep_structure:
        :param perm:
        :param no_constraint:
        :param suppress_print:
        :param timeout:
        :return:
        """
        best_csm = CSMState(molecule=molecule, op_type=op_type, op_order=op_order, csm=MAXDOUBLE)
        traced_state = CSMState(molecule=molecule, op_type=op_type, op_order=op_order)

        if perm:
            permuter = SinglePermPermuter(np.array(perm, dtype="long"), molecule, op_order, op_type)
        else:
            permuter = CythonPermuter(molecule, op_order, op_type, timeout=timeout)

        for calc_state in permuter.permute():
            if permuter.count % 1000000 == 0:
                print("calculated for", int(permuter.count / 1000000), "million permutations thus far...\t Time:",
                      run_time(self.start_time))
            csm, dir = calc_ref_plane(op_order, op_type == 'CS', calc_state)

            if csm < best_csm.csm:
                best_csm = best_csm._replace(csm=csm, dir=dir, perm=list(calc_state.perm))

        self.statistics=ExactStatistics(permuter)

        if best_csm.csm == MAXDOUBLE:
            # failed to find csm value for any permutation
            #best_csm = best_csm._replace(csm=csm, dir=dir, perm=list(calc_state.perm))
            raise CSMValueError("Failed to calculate a csm value for %s %d" % (op_type, op_order), best_csm)
        return best_csm
Beispiel #4
0
def check_perm_validity(mol, perm, **kwargs):
    '''
    Checks whether a user-input permutation is valid-- has legal cycle lengths, only switches between equivalence classes,
    and maintains molecule bonds. Warns for each violation.
    :param mol: the molecule being permuted
    :param perm: the permutation
    '''
    falsecount, num_invalid, cycle_counts, bad_indices = check_perm_cycles(
        perm, kwargs['operation'])
    if falsecount > 0:
        print("Warning: Permutation does not maintain cycle structure")
    if not check_perm_equivalence(mol, perm):
        print(
            "Warning: Permutation contains switches between non-equivalent atoms"
        )
    try:
        if check_perm_structure_preservation(mol, perm) < 1:
            print("Warning: Permutation does not preserve molecule structure")
    except ValueError:  #molecule has no structure
        pass
Beispiel #5
0
    def print_summary(self):
        try:
            percent_structure = check_perm_structure_preservation(self.molecule, self.perm)
            print("The permutation found maintains " +
                    str(round(percent_structure * 100, 2)) + "% of the original molecule's structure")

        except ValueError:
            print("The input molecule does not have bond information and therefore conservation of structure cannot be measured")

        falsecount, num_invalid, cycle_counts, bad_indices = check_perm_cycles(self.perm, self.operation)
        print(
            "The permutation found contains %d invalid %s. %.2lf%% of the molecule's atoms are in legal cycles" % (
                falsecount, "cycle" if falsecount == 1 else "cycles",
                    100 * (len(self.molecule) - num_invalid) / len(self.molecule)))

        for cycle_len in sorted(cycle_counts):
                valid = cycle_len == 1 or cycle_len == self.operation.order or (
                        cycle_len == 2 and self.operation.type == 'SN')
                count = cycle_counts[cycle_len]
                print("There %s %d %s %s of length %d" % (
                    "is" if count == 1 else "are", count, "invalid" if not valid else "",
                    "cycle" if count == 1 else "cycles",
                    cycle_len))

        if self.operation.name == "CHIRALITY":
            print("Minimum chirality was found in", self.overall_statistics["best chirality"])

        print("%s: %.4lf" % (self.operation.name, abs(self.csm)))

        print("Chain perm: " + self.chain_perm_string)