Exemple #1
0
    def randomForest_predict(self, fit_model, data):
        """
		03-17-06
		2006-10-30, add avg_degree(vertex_gradient) and unknown_cut_off
		"""
        if self.debug:
            sys.stderr.write("Predicting by randomForest...\n")
        data = array(data)
        set_default_mode(NO_CONVERSION)
        data_frame = r.as_data_frame(
            {
                "p_value": data[:, 0],
                "recurrence": data[:, 1],
                "connectivity": data[:, 2],
                "cluster_size": data[:, 3],
                "gradient": data[:, 4],
                "avg_degree": data[:, 5],
                "unknown_ratio": data[:, 6],
                "is_correct": r.factor(data[:, -1]),
            }
        )
        set_default_mode(BASIC_CONVERSION)
        pred = r.predict(fit_model, data_frame)
        del data_frame
        if self.debug:
            sys.stderr.write("Done randomForest prediction.\n")
        return pred
Exemple #2
0
    def randomForest_fit(self, known_data, parameter_list, bit_string="1111111"):
        """
		03-17-06
		2006-10-302006-10-30, add avg_degree(vertex_gradient) and unknown_cut_off
		"""
        if self.debug:
            sys.stderr.write("Fitting randomForest...\n")
        mty = parameter_list[0]

        from rpy import r

        r._libPaths(
            os.path.join(lib_path, "R")
        )  # better than r.library("randomForest", lib_loc=os.path.join(lib_path, "R")) (see plone doc)
        r.library("randomForest")

        coeff_name_list = [
            "p_value",
            "recurrence",
            "connectivity",
            "cluster_size",
            "gradient",
            "avg_degree",
            "unknown_ratio",
        ]  # 2006-10-30
        formula_list = []
        for i in range(len(bit_string)):
            if bit_string[i] == "1":
                formula_list.append(coeff_name_list[i])
        formula = r("is_correct~%s" % "+".join(formula_list))

        known_data = array(known_data)
        set_default_mode(NO_CONVERSION)
        data_frame = r.as_data_frame(
            {
                "p_value": known_data[:, 0],
                "recurrence": known_data[:, 1],
                "connectivity": known_data[:, 2],
                "cluster_size": known_data[:, 3],
                "gradient": known_data[:, 4],
                "avg_degree": known_data[:, 5],
                "unknown_ratio": known_data[:, 6],
                "is_correct": r.factor(known_data[:, -1]),
            }
        )  # 03-17-06, watch r.factor	#2006-10-30

        if mty > 0:
            fit = r.randomForest(formula, data=data_frame, mty=mty)
        else:
            fit = r.randomForest(formula, data=data_frame)

        del data_frame
        if self.debug:
            sys.stderr.write("Done fitting randomForest.\n")
        return fit