def generate_model(self): """@generate_model This method generates an svm model, using a chosen dataset from the SQL database. The resulting model is stored into a NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. """ # local variables dataset = self.feature_request.get_dataset(self.session_id) feature_count = self.feature_request.get_count(self.session_id) label_encoder = preprocessing.LabelEncoder() # get dataset if dataset['error']: print dataset['error'] self.list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if feature_count['error']: print feature_count['error'] self.list_error.append(feature_count['error']) feature_count = None else: feature_count = feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): if not (index + 1) % feature_count == 0: # observation labels current_features.append(feature[1][0]) # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) else: # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) current_features.append(feature[1][0]) grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create svm model clf = svm.SVC() clf.fit(grouped_features, encoded_labels) # get svm title, and cache (model, encoded labels, title) entity = Retrieve_Entity() title = entity.get_title(self.session_id)['result'][0][0] Cache_Model(clf).cache('svm_rbf_model', str(self.session_id) + '_' + title) Cache_Model(label_encoder).cache('svm_rbf_labels', self.session_id) Cache_Hset().cache('svm_rbf_title', self.session_id, title) # cache svm feature labels, with respect to given session id Cache_Hset().cache('svm_rbf_feature_labels', str(self.session_id), json.dumps(feature_labels))
def sv_model(model, kernel_type, session_id, feature_request, list_error): '''@sv_model This method generates an sv (i.e. svm, or svr) model using feature data, retrieved from the database. The generated model, is then stored within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables dataset = feature_request.get_dataset(session_id, model) get_feature_count = feature_request.get_count(session_id) label_encoder = preprocessing.LabelEncoder() logger = Logger(__name__, 'error', 'error') list_model_type = current_app.config.get('MODEL_TYPE') # get dataset if dataset['error']: logger.log(dataset['error']) list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if get_feature_count['error']: logger.log(get_feature_count['error']) list_error.append(get_feature_count['error']) feature_count = None else: feature_count = get_feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): # svm: observation labels if model == list_model_type[0]: current_features.append(feature[1][0]) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # svr: observation labels elif model == list_model_type[1]: current_features.append(float(feature[1][0])) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(float(feature[0][0])) current_features = [] # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) # case 1: svm model if model == list_model_type[0]: # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create model clf = svm.SVC(kernel=kernel_type, probability=True) # cache encoded labels Cache_Model(label_encoder).cache(model + '_labels', session_id) # fit model clf.fit(grouped_features, encoded_labels) # case 2: svr model elif model == list_model_type[1]: # create model clf = svm.SVR(kernel=kernel_type) # fit model clf.fit(grouped_features, observation_labels) # compute, and cache coefficient of determination r2 = clf.score(grouped_features, observation_labels) Cache_Hset().cache( model + '_r2', session_id, r2 ) # get title entity = Retrieve_Entity() title = entity.get_title(session_id)['result'][0][0] # cache model, title Cache_Model(clf).cache( model + '_model', str(session_id) + '_' + title ) Cache_Hset().cache(model + '_title', session_id, title) # cache feature labels, with respect to given session id Cache_Hset().cache( model + '_feature_labels', str(session_id), json.dumps(feature_labels) ) # return error(s) if exists return {'error': list_error}
def generate_model(self): """@generate_model This method generates an svm model, using a chosen dataset from the SQL database. The resulting model is stored into a NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. """ # local variables dataset = self.feature_request.get_dataset(self.session_id) feature_count = self.feature_request.get_count(self.session_id) label_encoder = preprocessing.LabelEncoder() # get dataset if dataset['error']: print dataset['error'] self.list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if feature_count['error']: print feature_count['error'] self.list_error.append(feature_count['error']) feature_count = None else: feature_count = feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): if not (index+1) % feature_count == 0: # observation labels current_features.append(feature[1][0]) # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) else: # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) current_features.append(feature[1][0]) grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create svm model clf = svm.SVC() clf.fit(grouped_features, encoded_labels) # get svm title, and cache (model, encoded labels, title) entity = Retrieve_Entity() title = entity.get_title(self.session_id)['result'][0][0] Cache_Model(clf).cache( 'svm_rbf_model', str(self.session_id) + '_' + title ) Cache_Model(label_encoder).cache('svm_rbf_labels', self.session_id) Cache_Hset().cache('svm_rbf_title', self.session_id, title) # cache svm feature labels, with respect to given session id Cache_Hset().cache( 'svm_rbf_feature_labels', str(self.session_id), json.dumps(feature_labels) )
def svm_model(kernel_type, session_id, feature_request, list_error): '''@svm_model This method generates an svm prediction using the provided prediction feature input(s), and the stored corresponding model, within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables dataset = feature_request.get_dataset(session_id) feature_count = feature_request.get_count(session_id) label_encoder = preprocessing.LabelEncoder() logger = Logger(__name__, 'error', 'error') # get dataset if dataset['error']: logger.log(dataset['error']) list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if feature_count['error']: logger.log(feature_count['error']) list_error.append(feature_count['error']) feature_count = None else: feature_count = feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): if not (index+1) % feature_count == 0: # observation labels current_features.append(feature[1][0]) # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) else: # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) current_features.append(feature[1][0]) grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create svm model clf = svm.SVC(kernel=kernel_type) clf.fit(grouped_features, encoded_labels) # get svm title, and cache (model, encoded labels, title) entity = Retrieve_Entity() title = entity.get_title(session_id)['result'][0][0] Cache_Model(clf).cache( 'svm_rbf_model', str(session_id) + '_' + title ) Cache_Model(label_encoder).cache('svm_rbf_labels', session_id) Cache_Hset().cache('svm_rbf_title', session_id, title) # cache svm feature labels, with respect to given session id Cache_Hset().cache( 'svm_rbf_feature_labels', str(session_id), json.dumps(feature_labels) ) # return error(s) if exists return {'error': list_error}
def sv_model(model, kernel_type, session_id, feature_request, list_error): ''' This method generates an sv (i.e. svm, or svr) model using feature data, retrieved from the database. The generated model, is then stored within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables dataset = feature_request.get_dataset(session_id, model) get_feature_count = feature_request.get_count(session_id) label_encoder = preprocessing.LabelEncoder() logger = Logger(__name__, 'error', 'error') list_model_type = current_app.config.get('MODEL_TYPE') # get dataset if dataset['error']: logger.log(dataset['error']) list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if get_feature_count['error']: logger.log(get_feature_count['error']) list_error.append(get_feature_count['error']) feature_count = None else: feature_count = get_feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): # svm: observation labels if model == list_model_type[0]: current_features.append(feature[1][0]) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # svr: observation labels elif model == list_model_type[1]: current_features.append(float(feature[1][0])) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(float(feature[0][0])) current_features = [] # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) # case 1: svm model if model == list_model_type[0]: # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create model clf = svm.SVC(kernel=kernel_type, probability=True) # cache encoded labels Cache_Model(label_encoder).cache(model + '_labels', session_id) # fit model clf.fit(grouped_features, encoded_labels) # case 2: svr model elif model == list_model_type[1]: # create model clf = svm.SVR(kernel=kernel_type) # fit model clf.fit(grouped_features, observation_labels) # compute, and cache coefficient of determination r2 = clf.score(grouped_features, observation_labels) Cache_Hset().cache( model + '_r2', session_id, r2 ) # get title entity = Retrieve_Entity() title = entity.get_title(session_id)['result'][0][0] # cache model, title Cache_Model(clf).cache( model + '_model', str(session_id) + '_' + title ) Cache_Hset().cache(model + '_title', session_id, title) # cache feature labels, with respect to given session id Cache_Hset().cache( model + '_feature_labels', str(session_id), json.dumps(feature_labels) ) # return error(s) if exists return {'error': list_error}