def generate_samples(partial_policy, start_index = 0): """ Generates all possible ways to complete a partial policy. This is a recursive method meant to be used internally only. For the public use of this call generate_training_set :partial_policy: The policy that is currently in the process of construction :start_index: Used to manage progress to avoid repetitions """ list = [] # policies indexedOptions = policy_model.get_ranges().keys() for i in range(start_index, len(indexedOptions)): # search for first policy = indexedOptions[i] # for policy in indexedOptions: if not policy in partial_policy: for value in policy_model.get_ranges()[policy]: new_partial = partial_policy.copy() new_partial[policy] = value complete_new = classifier_sklearn.generate_samples(new_partial, start_index=i) list.extend(complete_new) return list # stop loop when first value is found and recursion for it is done return [partial_policy]
def train_engine(self, risk, employee, location, device, type="classifier"): data = self.load_csv_files(risk, employee, location, device, type=type) limit = len(policy_model.get_ranges()) train_data = data[:, 0: limit] # first several columns represent the data dimension train_result = data[:, limit] # result columns are ones after data dimensions # TODO: implement weighting # weighted_train_data = self.weight(train_data) # for explanation of the following parameters please see # http://scikit-learn.org/stable/modules/svm.html#tips-on-practical-use params = {'kernel': 'rbf', 'cache_size': 1000, 'C': 0.2, 'gamma': 0.5} eng = engine.get_engine(type=type) # TODO: implement weighting # return eng.get_model(params).fit(weighted_train_data, train_result) return eng.get_model(params).fit(train_data, train_result)
def __init__(self): """ Initializes all implicit models. Currently there is one model per risk, but should turn into one model per risk per environmental configuration :param user_id: :param sync_date: """ self.incidents_models = {} self.risks = [] limit = len(policy_model.get_ranges()) general = numpy.genfromtxt('static/data/pw-train-generated-general.csv', delimiter=',') for filename in glob.glob('static/data/pw-train-generated-risk-*.csv'): risk = filename[36:-4] # take actual name self.risks.append(risk) # data = genfromtxt('static/data/pw-train-estimator-risk-' + risk + '.csv', delimiter=',') data = numpy.genfromtxt(filename, delimiter=',') data = numpy.concatenate((data, general)) # add positive cases that need to contrast negative ones train_data = data[:, 0:limit] # first several columns represent the data dimension train_result = data[:, limit] # result columns are ones after data dimensions self.incidents_models[risk] = svm.SVC().fit(train_data, train_result)