Example #1
0
    def _run_test(self, statistic, sample_1, sample_1_weighted, sample_2,
                  sample_2_weighted):
        """ Run a test for a certain statistic against the two sample datasets

        :param statistic: name of statistic
        :param sample_1: float, expected value for statistic of sample 1 without weights
        :param sample_1_weighted: float, expected value for statistic of sample 1 with weights
        :param sample_2: float, expected value for statistic of sample 2 without weights
        :param sample_2_weighted: float, expected value for statistic of sample 2 with weights
        """

        self.assertAlmostEqual(
            sample_1, PropertyStats.calc_stat(self.sample_1, statistic))
        self.assertAlmostEqual(
            sample_1_weighted,
            PropertyStats.calc_stat(self.sample_1, statistic,
                                    self.sample_1_weights))
        self.assertAlmostEqual(
            sample_2, PropertyStats.calc_stat(self.sample_2, statistic))
        self.assertAlmostEqual(
            sample_2_weighted,
            PropertyStats.calc_stat(self.sample_2, statistic,
                                    self.sample_2_weights))
Example #2
0
    def featurize(self, comp):
        """
        Get elemental property attributes

        Args:
            comp: Pymatgen composition object

        Returns:
            all_attributes: Specified property statistics of features
        """

        all_attributes = []

        for attr in self.features:
            elem_data = self.data_source.get_property(comp, attr)

            for stat in self.stats:
                all_attributes.append(PropertyStats().calc_stat(
                    elem_data, stat))

        return all_attributes
Example #3
0
    def featurize(self, s):
        """
        Calculate all sites' local structure order parameters (LSOPs).

        Args:
            s: Pymatgen Structure object.

            Returns:
                opvals: (2D array of floats) LSOP values of all sites'
                (1st dimension) order parameters (2nd dimension). 46 order
                parameters are computed per site: q_cn (coordination
                number), q_lin, 35 x q_bent (starting with a target angle
                of 5 degrees and, increasing by 5 degrees, until 175 degrees),
                q_tet, q_oct, q_bcc, q_2, q_4, q_6, q_reg_tri, q_sq, q_sq_pyr.
        """
        opvals = [[] for t in self._labels]
        for i, site in enumerate(s.sites):
            if (self.min_oxi is None or site.specie.oxi_state >= self.min_oxi) \
                    and (self.max_oxi is None or site.specie.oxi_state >= self.max_oxi):
                opvalstmp = self.op_site_fp.featurize(s, i)
                for j, opval in enumerate(opvalstmp):
                    if opval is None:
                        opvals[j].append(0.0)
                    else:
                        opvals[j].append(opval)

        if self.stats:
            opstats = []
            for op in opvals:
                if '_mode' in ''.join(self.stats):
                    modes = self.n_numerical_modes(op, self.nmodes, 0.01)
                for stat in self.stats:
                    if '_mode' in stat:
                        opstats.append(modes[int(stat[0])-1])
                    else:
                        opstats.append(PropertyStats().calc_stat(op, stat))

            return opstats
        else:
            return opvals
Example #4
0
    def test_mode(self):
        self._run_test("mode", 1, 1, 0, 0.5)

        # Additional tests
        self.assertAlmostEqual(0, PropertyStats.mode([0, 1, 2], [1, 1, 1]))
Example #5
0
    def featurize(self, struct, idx):
        """
        Get OP fingerprint of site with given index in input
        structure.
        Args:
            struct (Structure): Pymatgen Structure object.
            idx (int): index of target site in structure struct.
        Returns:
            opvals (numpy array): order parameters of target site.
        """
        idop = 1.0 / self.dop
        opvals = {}
        s = struct.sites[idx]
        neigh_dist = []
        r = 6
        while len(neigh_dist) < 12:
            r += 1.0
            neigh_dist = struct.get_neighbors(s, r)
        # Smoothen distance, but use relative distances.
        dmin = min([d for n, d in neigh_dist])
        neigh_dist = [[n, d / dmin] for n, d in neigh_dist]
        neigh_dist_alldrs = {}
        d_sorted_alldrs = {}
        for i in range(-self.ndr, self.ndr + 1):
            opvals[i] = []
            this_dr = self.dr + float(i) * self.ddr
            this_idr = 1.0 / this_dr
            neigh_dist_alldrs[i] = []
            for j in range(len(neigh_dist)):
                neigh_dist_alldrs[i].append([neigh_dist[j][0],
                                             (float(int(neigh_dist[j][1] * this_idr \
                                                        + 0.5)) + 0.5) * this_dr])
            d_sorted_alldrs[i] = []
            for n, d in neigh_dist_alldrs[i]:
                if d not in d_sorted_alldrs[i]:
                    d_sorted_alldrs[i].append(d)
            d_sorted_alldrs[i] = sorted(d_sorted_alldrs[i])

        # Do q_sgl_bd separately.
        if self.optypes[1][0] == "sgl_bd":
            for i in range(-self.ndr, self.ndr + 1):
                site_list = [s]
                for n, dn in neigh_dist_alldrs[i]:
                    site_list.append(n)
                opval = self.ops[1][0].get_order_parameters(
                    site_list,
                    0,
                    indices_neighs=[j for j in range(1, len(site_list))])
                opvals[i].append(opval[0])

        for i in range(-self.ndr, self.ndr + 1):
            prev_cn = 0
            prev_site_list = None
            prev_d_fac = None
            dmin = min(d_sorted_alldrs[i])
            for d in d_sorted_alldrs[i]:
                this_cn = 0
                site_list = [s]
                this_av_inv_drel = 0.0
                for j, [n, dn] in enumerate(neigh_dist_alldrs[i]):
                    if dn <= d:
                        this_cn += 1
                        site_list.append(n)
                        this_av_inv_drel += (1.0 / (neigh_dist[j][1]))
                this_av_inv_drel = this_av_inv_drel / float(this_cn)
                d_fac = this_av_inv_drel**self.dist_exp
                for cn in range(max(2, prev_cn + 1), min(this_cn + 1, 13)):
                    # Set all OPs of non-CN-complying neighbor environments
                    # to zero if applicable.
                    if self.zero_ops and cn != this_cn:
                        for it in range(len(self.optypes[cn])):
                            opvals[i].append(0)
                        continue

                    # Set all (remaining) OPs.
                    for it in range(len(self.optypes[cn])):
                        opval = self.ops[cn][it].get_order_parameters(
                            site_list,
                            0,
                            indices_neighs=[
                                j for j in range(1, len(site_list))
                            ])
                        if opval[0] is None:
                            opval[0] = 0
                        else:
                            opval[0] = d_fac * opval[0]
                        if self.optypes[cn][it] == 'bcc':
                            opval[0] = opval[0] / 0.976
                        opvals[i].append(opval[0])
                prev_site_list = site_list
                prev_cn = this_cn
                prev_d_fac = d_fac
                if prev_cn >= 12:
                    break

        opvals_out = []
        ps = PropertyStats()
        for j in range(len(opvals[0])):
            # Compute histogram, determine peak, and location
            # of peak value.
            op_tmp = [opvals[i][j] for i in range(-self.ndr, self.ndr + 1)]
            minval = float(int(min(op_tmp) * idop - 1.5)) * self.dop
            # print(minval)
            if minval < 0.0:
                minval = 0.0
            if minval > 1.0:
                minval = 1.0
            # print(minval)
            maxval = float(int(max(op_tmp) * idop + 1.5)) * self.dop
            # print(maxval)
            if maxval < 0.0:
                maxval = 0.0
            if maxval > 1.0:
                maxval = 1.0
            # print(maxval)
            if minval == maxval:
                minval = minval - self.dop
                maxval = maxval + self.dop
            # print(minval)
            # print(maxval)
            nbins = int((maxval - minval) * idop)
            # print('{} {} {}'.format(minval, maxval, nbins))
            hist, bin_edges = np.histogram(op_tmp,
                                           bins=nbins,
                                           range=(minval, maxval),
                                           normed=False,
                                           weights=None,
                                           density=False)
            max_hist = max(hist)
            op_peaks = []
            for i, h in enumerate(hist):
                if h == max_hist:
                    op_peaks.append(
                        [i, 0.5 * (bin_edges[i] + bin_edges[i + 1])])
            # Address problem that 2 OP values can be close to a bin edge.
            hist2 = []
            op_peaks2 = []
            i = 0
            while i < len(op_peaks):
                if i < len(op_peaks) - 1:
                    if op_peaks[i + 1][0] - op_peaks[i][0] == 1:
                        op_peaks2.append(0.5 *
                                         (op_peaks[i][1] + op_peaks[i + 1][1]))
                        hist2.append(hist[op_peaks[i][0]] +
                                     hist[op_peaks[i + 1][0]])
                        i += 1
                    else:
                        op_peaks2.append(op_peaks[i][1])
                        hist2.append(hist[op_peaks[i][0]])
                else:
                    op_peaks2.append(op_peaks[i][1])
                    hist2.append(hist[op_peaks[i][0]])
                i += 1
            opvals_out.append(op_peaks2[list(hist2).index(max(hist2))])
        return np.array(opvals_out)
Example #6
0
        """
        prod = 1.0
        for el, amt in comp.get_el_amt_dict().items():
            prod = prod * (Element(el).X**amt)

        return [-prod**(1 / sum(comp.get_el_amt_dict().values()))]

    def feature_labels(self):
        return ["Band Center"]

    def implementors(self):
        return ["Anubhav Jain"]


if __name__ == '__main__':
    print(PropertyStats.holder_mean([1, 2, 3, 4]))

    training_set = pd.DataFrame({
        "composition": [
            Composition("Fe2O3"),
            Composition("Ga1Na6P3"),
            Composition("O4Si1Zn2")
        ]
    })
    print("WARD NPJ ATTRIBUTES")
    print("Stoichiometric attributes")
    p_list = [0, 2, 3, 5, 7, 9]
    print(Stoichiometry().featurize_dataframe(training_set,
                                              col_id="composition"))
    print("Elemental property attributes")
    print(ElementProperty().featurize_dataframe(training_set,