コード例 #1
0
ファイル: classifier_sklearn.py プロジェクト: Zhanelya/sprks
    def generate_samples(partial_policy, start_index = 0):
        """
        Generates all possible ways to complete a partial policy.
        This is a recursive method meant to be used internally only.
        For the public use of this call generate_training_set
        :partial_policy: The policy that is currently in the process of construction
        :start_index: Used to manage progress to avoid repetitions
        """
        list = [] # policies
        indexedOptions = policy_model.get_ranges().keys()

        for i in range(start_index, len(indexedOptions)): # search for first
            policy = indexedOptions[i]

        # for policy in indexedOptions:
            if not policy in partial_policy:
                for value in policy_model.get_ranges()[policy]:
                    new_partial = partial_policy.copy()
                    new_partial[policy] = value
                    complete_new = classifier_sklearn.generate_samples(new_partial, start_index=i)
                    list.extend(complete_new)

                return list # stop loop when first value is found and recursion for it is done

        return [partial_policy]
コード例 #2
0
    def train_engine(self,
                     risk,
                     employee,
                     location,
                     device,
                     type="classifier"):
        data = self.load_csv_files(risk, employee, location, device, type=type)
        limit = len(policy_model.get_ranges())

        train_data = data[:, 0:
                          limit]  # first several columns represent the data dimension
        train_result = data[:,
                            limit]  # result columns are ones after data dimensions

        # TODO: implement weighting
        # weighted_train_data = self.weight(train_data)

        # for explanation of the following parameters please see
        # http://scikit-learn.org/stable/modules/svm.html#tips-on-practical-use
        params = {'kernel': 'rbf', 'cache_size': 1000, 'C': 0.2, 'gamma': 0.5}
        eng = engine.get_engine(type=type)
        # TODO: implement weighting
        # return eng.get_model(params).fit(weighted_train_data, train_result)
        return eng.get_model(params).fit(train_data, train_result)
コード例 #3
0
ファイル: classifier_sklearn.py プロジェクト: Zhanelya/sprks
    def __init__(self):
        """
        Initializes all implicit models.
        Currently there is one model per risk,
        but should turn into one model per risk per environmental configuration
        :param user_id:
        :param sync_date:
        """
        self.incidents_models = {}
        self.risks = []

        limit = len(policy_model.get_ranges())

        general = numpy.genfromtxt('static/data/pw-train-generated-general.csv', delimiter=',')

        for filename in glob.glob('static/data/pw-train-generated-risk-*.csv'):
            risk = filename[36:-4] # take actual name
            self.risks.append(risk)
            # data = genfromtxt('static/data/pw-train-estimator-risk-' + risk + '.csv', delimiter=',')
            data = numpy.genfromtxt(filename, delimiter=',')
            data = numpy.concatenate((data, general)) # add positive cases that need to contrast negative ones
            train_data = data[:, 0:limit] # first several columns represent the data dimension
            train_result = data[:, limit] # result columns are ones after data dimensions
            self.incidents_models[risk] = svm.SVC().fit(train_data, train_result)