def svm_prediction(model, kernel, model_id, predictors): '''@svm_prediction This method generates an svm prediction using the provided prediction feature input(s), and the stored corresponding model, within the NoSQL datastore. @predictors, a list of arguments (floats) required to make an SVM prediction, against the respective svm model. ''' # get necessary model title = Cache_Hset().uncache( model + '_' + kernel + '_title', model_id )['result'] clf = Cache_Model().uncache( model + '_' + kernel + '_model', model_id + '_' + title ) # get encoded labels encoded_labels = Cache_Model().uncache( model + '_' + kernel + '_labels', model_id ) # perform prediction, and return the result numeric_label = (clf.predict([predictors])) textual_label = list(encoded_labels.inverse_transform([numeric_label])) return {'result': textual_label[0][0], 'error': None}
def sv_prediction(model, model_id, predictors): '''@sv_prediction This method generates an sv (i.e. svm, or svr) prediction using the provided prediction feature input(s), and the stored corresponding model, within the NoSQL datastore. @clf, decoded model, containing several methods (i.e. predict) @predictors, a list of arguments (floats) required to make an SVM prediction, against the respective svm model. ''' # local variables list_model_type = current_app.config.get('MODEL_TYPE') # get necessary model title = Cache_Hset().uncache( model + '_title', model_id )['result'] clf = Cache_Model().uncache( model + '_model', model_id + '_' + title ) # svm model: get encoded labels if model == list_model_type[0]: encoded_labels = Cache_Model().uncache( model + '_labels', model_id ) # perform prediction numeric_label = (clf.predict([predictors])) # result: svm model if model == list_model_type[0]: textual_label = list(encoded_labels.inverse_transform([numeric_label])) return {'result': textual_label[0][0], 'error': None} # result: svr model elif model == list_model_type[1]: return {'result': str(numeric_label[0]), 'error': None}
def retrieve_sv_features(): '''@retrieve_sv_features This router function retrieves the generalized features properties that can be expected for any given observation within the supplied dataset. @label_list, this value will be a json object, since it was originally cached into redis using 'json.dumps'. ''' if request.method == 'POST': label_list = Cache_Hset().uncache('svm_rbf_feature_labels', request.get_json()['session_id']) # return all feature labels if label_list['result']: return json.dumps(label_list['result']) else: return json.dumps({'error': label_list['error']})
def retrieve_sv_features(): ''' This router function retrieves the generalized features properties that can be expected for any given observation within the supplied dataset. @label_list, this value will be a json object, since it was originally cached into redis using 'json.dumps'. ''' # get model type model_id = request.get_json()['model_id'] model_type = M_Type().get_model_type(model_id)['result'] # return all feature labels if request.method == 'POST': label_list = Cache_Hset().uncache(model_type + '_feature_labels', model_id) if label_list['result']: return json.dumps(label_list['result']) else: return json.dumps({'error': label_list['error']})
def sv_prediction(model, model_id, predictors): ''' This method generates an sv (i.e. svm, or svr) prediction using the provided prediction feature input(s), and the stored corresponding model, within the NoSQL datastore. Additionally, the following is returned for SVM predictions: - array of probability a given point (predictors) is one of the defined set of classifiers. - array of sum distances a given point (predictors) is to the set of associated hyperplanes. However, the following is returned for SVR predictions: - coefficient of determination (r^2). @clf, decoded model, containing several methods (i.e. predict) @predictors, a list of arguments (floats) required to make an SVM prediction, against the respective svm model. ''' # local variables probability = None decision_function = None list_model_type = current_app.config.get('MODEL_TYPE') # get necessary model title = Cache_Hset().uncache( model + '_title', model_id )['result'] clf = Cache_Model().uncache( model + '_model', model_id + '_' + title ) # perform prediction, and return the result prediction = (clf.predict([predictors])) # case 1: return svm prediction, and confidence level if model == list_model_type[0]: encoded_labels = Cache_Model().uncache( model + '_labels', model_id ) textual_label = encoded_labels.inverse_transform([prediction]) probability = clf.predict_proba(predictors) decision_function = clf.decision_function(predictors) classes = [encoded_labels.inverse_transform(x) for x in clf.classes_] return { 'result': textual_label[0][0], 'model': model, 'confidence': { 'classes': list(classes), 'probability': list(probability[0]), 'decision_function': list(decision_function[0]) }, 'error': None } # case 2: return svr prediction, and confidence level elif model == list_model_type[1]: r2 = Cache_Hset().uncache( model + '_r2', model_id )['result'] return { 'result': str(prediction[0]), 'model': model, 'confidence': { 'score': r2 }, 'error': None }
def generate_model(self): """@generate_model This method generates an svm model, using a chosen dataset from the SQL database. The resulting model is stored into a NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. """ # local variables dataset = self.feature_request.get_dataset(self.session_id) feature_count = self.feature_request.get_count(self.session_id) label_encoder = preprocessing.LabelEncoder() # get dataset if dataset['error']: print dataset['error'] self.list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if feature_count['error']: print feature_count['error'] self.list_error.append(feature_count['error']) feature_count = None else: feature_count = feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): if not (index + 1) % feature_count == 0: # observation labels current_features.append(feature[1][0]) # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) else: # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) current_features.append(feature[1][0]) grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create svm model clf = svm.SVC() clf.fit(grouped_features, encoded_labels) # get svm title, and cache (model, encoded labels, title) entity = Retrieve_Entity() title = entity.get_title(self.session_id)['result'][0][0] Cache_Model(clf).cache('svm_rbf_model', str(self.session_id) + '_' + title) Cache_Model(label_encoder).cache('svm_rbf_labels', self.session_id) Cache_Hset().cache('svm_rbf_title', self.session_id, title) # cache svm feature labels, with respect to given session id Cache_Hset().cache('svm_rbf_feature_labels', str(self.session_id), json.dumps(feature_labels))
def sv_model(model, kernel_type, session_id, feature_request, list_error): '''@sv_model This method generates an sv (i.e. svm, or svr) model using feature data, retrieved from the database. The generated model, is then stored within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables dataset = feature_request.get_dataset(session_id, model) get_feature_count = feature_request.get_count(session_id) label_encoder = preprocessing.LabelEncoder() logger = Logger(__name__, 'error', 'error') list_model_type = current_app.config.get('MODEL_TYPE') # get dataset if dataset['error']: logger.log(dataset['error']) list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if get_feature_count['error']: logger.log(get_feature_count['error']) list_error.append(get_feature_count['error']) feature_count = None else: feature_count = get_feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): # svm: observation labels if model == list_model_type[0]: current_features.append(feature[1][0]) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # svr: observation labels elif model == list_model_type[1]: current_features.append(float(feature[1][0])) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(float(feature[0][0])) current_features = [] # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) # case 1: svm model if model == list_model_type[0]: # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create model clf = svm.SVC(kernel=kernel_type, probability=True) # cache encoded labels Cache_Model(label_encoder).cache(model + '_labels', session_id) # fit model clf.fit(grouped_features, encoded_labels) # case 2: svr model elif model == list_model_type[1]: # create model clf = svm.SVR(kernel=kernel_type) # fit model clf.fit(grouped_features, observation_labels) # compute, and cache coefficient of determination r2 = clf.score(grouped_features, observation_labels) Cache_Hset().cache( model + '_r2', session_id, r2 ) # get title entity = Retrieve_Entity() title = entity.get_title(session_id)['result'][0][0] # cache model, title Cache_Model(clf).cache( model + '_model', str(session_id) + '_' + title ) Cache_Hset().cache(model + '_title', session_id, title) # cache feature labels, with respect to given session id Cache_Hset().cache( model + '_feature_labels', str(session_id), json.dumps(feature_labels) ) # return error(s) if exists return {'error': list_error}
def svm_model(kernel_type, session_id, feature_request, list_error): '''@svm_model This method generates an svm prediction using the provided prediction feature input(s), and the stored corresponding model, within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables dataset = feature_request.get_dataset(session_id) feature_count = feature_request.get_count(session_id) label_encoder = preprocessing.LabelEncoder() logger = Logger(__name__, 'error', 'error') # get dataset if dataset['error']: logger.log(dataset['error']) list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if feature_count['error']: logger.log(feature_count['error']) list_error.append(feature_count['error']) feature_count = None else: feature_count = feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): if not (index+1) % feature_count == 0: # observation labels current_features.append(feature[1][0]) # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) else: # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) current_features.append(feature[1][0]) grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create svm model clf = svm.SVC(kernel=kernel_type) clf.fit(grouped_features, encoded_labels) # get svm title, and cache (model, encoded labels, title) entity = Retrieve_Entity() title = entity.get_title(session_id)['result'][0][0] Cache_Model(clf).cache( 'svm_rbf_model', str(session_id) + '_' + title ) Cache_Model(label_encoder).cache('svm_rbf_labels', session_id) Cache_Hset().cache('svm_rbf_title', session_id, title) # cache svm feature labels, with respect to given session id Cache_Hset().cache( 'svm_rbf_feature_labels', str(session_id), json.dumps(feature_labels) ) # return error(s) if exists return {'error': list_error}