def __init__(self, model_deciding=None):
     get_time()
     self.data_path = data_path if model_deciding == 'all' else features_data_path
     self.data = get_data(path=self.data_path, is_for_model=False)
     self.columns = list(self.data.columns)
     self.features = decide_feature_name(feature_path)
     self.model_deciding = model_deciding
Beispiel #2
0
 def model_train(self):
     print("Auto Encoder is initialized!!")
     get_time()
     self.train_test_split()
     self.get_x_values(is_for_prediction=False)
     self.auto_encoder()
     if len(self.gpu_devices) != 0:
         with tf.Session() as sess:
             sess.run(tf.global_variables_initializer())
             config = tf.ConfigProto(log_device_placement=True)
             self.model_ae.fit(self.X,
                               self.X,
                               epochs=int(self.params['epochs']),
                               batch_size=int(self.params['batch_size']),
                               validation_split=0.2,
                               shuffle=True)
     else:
         self.model_ae.fit(self.X,
                           self.X,
                           epochs=int(self.params['epochs']),
                           batch_size=int(self.params['batch_size']),
                           validation_split=0.2,
                           shuffle=True)
     self.model_from_to_json(auto_encoder_model_paths['ae'],
                             self.model_ae,
                             is_writing=True)
 def __init__(self,
              hyper_parameters=None,
              model_deciding=None,
              last_day_predictor=None,
              params=None):
     get_time()
     self.data = get_data(main_data_path + params['args']['data'],
                          True)  # data that created at feature engineering
     self.features = list(
         decide_feature_name(main_data_path +
                             params['args']['feature_set']).keys())
     self.params = hyper_parameters  # get hyper parameters for model: hyper_parameters.json
     self.model_params = params
     self.train, self.test = None, None
     self.X = None
     self.optimum_cluster_centroids = None
     self.centroids = None
     self.po_data = None  # Possible_outlier_transactions data
     self.model_dbscan = None
     self.m_s, self.eps = [], []
     self.o_min_sample = None
     self.o_epsilon = None
     self.o_devision = None
     self.last_day_predictor = last_day_predictor  # splitting data indicator
     self.uids = None
 def prediction_iso_f(self):
     print("Isolation Forest Prediction Process Initialized!")
     get_time()
     self.train_test_split()
     self.model_iso = self.model_from_to_pickle(is_writing=False)
     self.get_x_values(is_for_prediction=True)
     self.model_iso.n_jobs = -1
     self.test[self.model_params['args']
               ['pred_field']] = self.model_iso.predict(self.X)
     print("Isolation Forest Prediction Process Done!")
Beispiel #5
0
def main(args):
    logger.get_time()
    if is_local_run:
        args = sample_args
    sys.stdout = logger.Logger()
    print("*"*3, " args :", args)
    if len(args) != 0:
        if (args[1]) == 'feature_engineering':
            """
            run from terminal: python main.py feature_engineering all
            all: create all features which are at features.json
            Ex: 'python main.py feature_engineering c_m_ratios' create only 'c_m_ratios' adds to features set.
            """
            create_feature = CreateFeatures(model_deciding=args[2])
            create_feature.compute_features()

        if (args[1]) == 'train_process':
            # TODO: description must be updated
            """
            run from terminal: python main.py train_process 0
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
            Models: isolation forest and AutoEncoder for Multivariate and Univariate Models
            """
            train = trainModel(args=args)
            train.process()

        if sys.argv[1] == 'prediction':
            # TODO: description must be updated
            """
            run from terminal: python main.py prediction 0
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
            It creates prediction values for each transaction is added to raw data set
            """
            prediction = trainModel(args=args, is_prediction=True)
            prediction.process()

        if args[1] == 'dashboard':
            # TODO: description must be updated
            """
            run from terminal: python main.py dashboard 0 # 10.20.10.196:3030
            run from terminal: python main.py dashboard 0 uni # 10.20.10.196:3031
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
            uni: creates only for univariate models. I order to run for multivariate dashboard assign null
            Dashboard for Multi - Uni Models is created
            """
            # TODO: get prediction data from predicted .csv file
            model = ModelTrainIsolationForest(last_day_predictor=int(args[2]))
            model.train_test_split()
            create_dahboard(model.train, get_data(pred_data_path, True))
        logger.get_time()
Beispiel #6
0
 def learning_process_prediction_ext_iso_f(self):
     print("Extended isolation forest train process is initialized!!")
     get_time()
     self.get_x_values()
     self.model_e_iso_f = iso.iForest(
         self.X,
         ntrees=self.params['num_of_trees'],
         sample_size=self.params['sample_size'],
         ExtensionLevel=len(self.features) - 1)
     self.data[self.model_params['args']
               ['pred_field']] = self.model_e_iso_f.compute_paths(
                   X_in=self.X)
     self.train_test_split()
     print("Extended Isolation Forest Model Train Process Done!")
Beispiel #7
0
 def __init__(self,
              hyper_parameters=None,
              last_day_predictor=None,
              params=None):
     get_time()
     self.data = get_data(features_data_path,
                          True)  # data that created at feature engineering
     # TODO: get specific feature from specific model.
     self.features = list(decide_feature_name(feature_path).keys())
     self.params = hyper_parameters  # get hyper parameters for model: hyper_parameters.json
     self.model_params = params
     self.train, self.test = None, None
     self.X = None
     self.model_e_iso_f = None
     self.last_day_predictor = last_day_predictor  # splitting data indicator
 def learning_process_iso_f(self):
     print("isolation forest train process is initialized!!")
     get_time()
     self.train_test_split()
     self.get_x_values(is_for_prediction=False)
     self.model_iso = IsolationForest(
         n_estimators=self.params['num_of_trees'],
         max_samples='auto',
         contamination=self.params['contamination'],
         bootstrap=False,
         n_jobs=-1,
         random_state=42,
         verbose=1).fit(self.X)
     self.model_from_to_pickle(True)
     print("Isolation Forest Model Train Process Done!")
def insert_image_record(*args, **kwargs):

	db_connection = connect_to_database()
	db = db_connection.cursor()

	name = kwargs.get('name')	
	image_location = kwargs.get('image_location')
	thumb_location = kwargs.get('thumb_location')
	date_added = get_time()
	date_taken =  kwargs.get('date_taken')
	caption = kwargs.get('caption')
	width = kwargs.get('width')
	height = kwargs.get('height')

	try:		
		db.execute('INSERT INTO images VALUES (?,?,?,?,?,?,?,?,?)', (None, name, image_location, thumb_location, date_added, date_taken, caption, width, height) )
		last_row = db.lastrowid
		db_connection.commit()
		db_connection.close()
		return(last_row)	
	except Exception, err:
		tryToCloseDB(db_connection)

		for error in err:			
			log("Database: Unable to insert image record - " + str(error), "DATABASE","MEDIUM")			
 def compute_features(self):
     print("*" * 20, "Feature Engineering Process", "*" * 20)
     get_time()
     self.deciding_computing_features()
     self.features_data_arrange()
     for f in self.features:
         print("Feature :", f)
         if self.features[f]['args']['num_of_transaction_removing']:
             self.data = self.features[f]['args']['noisy_data_remover'](
                 self.data,
                 self.features[f]['args']['num_of_transaction_removing'],
                 self.features[f]['args']['num_of_days_removing'],
             )
         self.data = self.features[f]['calling'](self.data, f)
         print("data sample size :", len(self.data))
     self.assign_last_day_label()
     write_to_csv(self.data, features_data_path)
     print("*" * 20, "Feature Engineering Process Has Done", "*" * 20)
Beispiel #11
0
 def __init__(self,
              hyper_parameters=None,
              last_day_predictor=None,
              params=None):
     get_time()
     self.data = get_data(features_data_path, True)
     self.features = list(decide_feature_name(feature_path).keys())
     self.params = hyper_parameters
     self.last_day_predictor = last_day_predictor
     self.model_params = params
     self.train, self.test = None, None
     self.X, self.y_pred, self.y = None, None, None
     self.input, self.fr_output = None, None
     self.model_ae, self.model_ae_l, self.model_u = None, None, None
     self.gpu_devices = [
         d for d in device_lib.list_local_devices()
         if d.device_type == "GPU"
     ] if run_gpu else []
Beispiel #12
0
 def compute_features(self):
     get_time()
     self.features_data_arrange()
     for f in self.features:
         print("Feature :", f)
         self.check_features_existed(
             self.features[f]['args']['feature'],
             self.features[f]['args']['related_columns'])
         if self.features[f]['args']['num_of_transaction_removing']:
             self.data = self.features[f]['args']['noisy_data_remover'](
                 self.data,
                 self.features[f]['args']['num_of_transaction_removing'],
                 self.features[f]['args']['num_of_days_removing'],
             )
         self.data = self.features[f]['calling'](self.data, f)
         self.labeling_anormalities(f)
         print("data sample size :", len(self.data))
     self.assign_target_variable()
     self.assign_last_day_label()
     write_to_csv(self.data, features_data_path)
Beispiel #13
0
 def process(self):
     print("*" *
           20, "Train Process", "*" * 20) if not self.is_pred else print(
               "*" * 20, "Prediction Process", "*" * 20)
     get_time()
     self.define_train_args()
     for m in self.models:
         if self.models[m]['args']['py_file'] in self.files:
             if self.params['run_model'] == 'all' or self.params[
                     'run_model'] == m:
                 print("Model :", self.models[m]['name'])
                 _file_path = join(dirname(__file__),
                                   self.models[m]['args']['py_file'])
                 model_py = callfunc(_file_path)
                 model = [
                     o[1] for o in getmembers(model_py)
                     if o[0] == self.models[m]['args']['calling']
                 ][0]
                 model = model(
                     hyper_parameters=self.hyper_parameters[m],
                     last_day_predictor=self.
                     params['is_training_with_c_of_last_transactions'],
                     params=self.models[m])
                 _module = self.models[m]['args'][
                     'prediction'] if self.is_pred else self.models[m][
                         'args']['train']
                 model_process = [
                     o[1] for o in getmembers(model) if o[0] == _module
                 ][0]
                 model_process()
                 if self.is_pred:  # if it is on prediction env. concat outputs in prediction_data
                     self.get_pred_concat(m, model.test)
         else:
             print("Pls add .py file for model :", m)
     if self.is_pred:  # import data to merged prediction data
         self.pred_data.to_csv(pred_data_path, index=False)
Beispiel #14
0
import sys

from feature_engineering import CreateFeatures
from configs import is_local_run, sample_args, pred_data_path
from model_train_iso_f import ModelTrainIsolationForest
from dashboard import create_dahboard
import logger
from data_access import model_from_to_json, get_data
from model_processor import trainModel

if __name__ == "__main__":
    logger.get_time()
    if is_local_run:
        sys.argv = sample_args
    sys.stdout = logger.Logger()
    print("*" * 3, " args :", sys.argv)
    if len(sys.argv) != 0:
        if sys.argv[1] == 'feature_engineering':
            """
            run from terminal: python main.py feature_engineering all
            all: create all features which are at features.json
            Ex: 'python main.py feature_engineering c_m_ratios' create only 'c_m_ratios' adds to features set.
            """
            create_feature = CreateFeatures(model_deciding=sys.argv[2])
            create_feature.compute_features()

        if sys.argv[1] == 'train_process':
            """
            run from terminal: python main.py train_process 0
            0/1: 0; test data splits from date
                 1: test data is last day of each customer
def create_test_data():
	
	db_connection = connect_to_database()
	db = db_connection.cursor()

	isave = locations.image_save_location()
	tsave = locations.thumbnail_save_location()

	#Buid data string to insert
	imageData = [
		(None, 'Callie Hanging out', os.path.join(isave, '1.jpg'), os.path.join(tsave, '1.jpg'),get_time(), get_time(), "Callie hanging out"),
		(None, 'Callie Christmas', os.path.join(isave, '2.jpg'), os.path.join(tsave, '2.jpg'),get_time(), get_time(), ""),
		(None, 'Boop', os.path.join(isave, '3.jpg'), os.path.join(tsave, '3.jpg'),get_time(), get_time(), ""),
		(None, 'Squeak', os.path.join(isave, '4.jpg'), os.path.join(tsave, '4.jpg'),get_time(), get_time(), ""),		
	]		

	tagData = [
		(None, 0, "Kids"),
		(None, 0, "Josh"),
		(None, 0, "Linz"),				
		(None, 0, "Family"),		
		(None, 0, "Holidays"),
		(None, 0, "Friends")
	]

	subTagData = [
		(None, 0, "Kids", "Callie"),

		(None, 0, "Josh", "Portraits"),
		(None, 0, "Josh", "With Callie"),
		(None, 0, "Josh", "Photography"),
		(None, 0, "Josh", "Birthdays"),

		(None, 0, "Linz", "Portraits"),
		(None, 0, "Linz", "With Callie"),
		(None, 0, "Linz", "Photography"),
		(None, 0, "Linz", "Birthdays"),

		(None, 0, "Family", "Johnson"),
		(None, 0, "Family", "Zamudio"),
		(None, 0, "Family", "Brownell"),
		(None, 0, "Family", "Murello"),
		(None, 0, "Family", "Williams"),
		(None, 0, "Family", "Puppies"),

		(None, 0, "Holidays", "Christmas"),
		(None, 0, "Holidays", "New Years"),
		(None, 0, "Holidays", "Easter"),
		(None, 0, "Holidays", "Valentines Day"),
		(None, 0, "Holidays", "Dragon Day"),
		(None, 0, "Holidays", "Thanksgiving"),
		(None, 0, "Holidays", "4th of July"),
		(None, 0, "Holidays", "Halloween"),
		
		(None, 0, "Friends", "Childhood"),
		(None, 0, "Friends", "Adult"),
	]

	eventTagData = [		
		(None, 0, "Kids", "Callie", "Growing Girl"),
		(None, 0, "Kids", "Callie", "Birthdays"),				
		(None, 0, "Kids", "Callie", "Silly"),
		(None, 0, "Kids", "Callie", "Portraits"),

		(None, 0, "Family", "Zamudio", "Brandon and Dakota"),
		(None, 0, "Family", "Zamudio", "Roger"),
		(None, 0, "Family", "Zamudio", "Richie and Angie"),
		(None, 0, "Family", "Johnson", "I I I"),
		(None, 0, "Family", "Johnson", "Larry and Annette"),
		(None, 0, "Family", "Johnson", "Misty"),
		(None, 0, "Family", "Johnson", "Anthony and Latonya"),
		(None, 0, "Family", "Williams", "Teresa"),
		(None, 0, "Family", "Brownell", "Christine"),
						
		(None, 0, "Holidays", "Halloween", "2013"),
		(None, 0, "Holidays", "Halloween", "2014"),
		(None, 0, "Holidays", "Christmas", "2013"),
		(None, 0, "Holidays", "Christmas", "2014"),
		(None, 0, "Holidays", "New Years", "2013"),
		(None, 0, "Holidays", "New Years", "2014"),
		(None, 0, "Holidays", "Easter", "2013"),
		(None, 0, "Holidays", "Easter", "2014"),
		(None, 0, "Holidays", "Valentines Day", "2013"),
		(None, 0, "Holidays", "Valentines Day", "2014"),
		(None, 0, "Holidays", "Dragon Day", "2013"),
		(None, 0, "Holidays", "Dragon Day", "2014"),
		(None, 0, "Holidays", "Thanksgiving", "2013"),
		(None, 0, "Holidays", "Thanksgiving", "2014"),
		(None, 0, "Holidays", "4th of July", "2013"),
		(None, 0, "Holidays", "4th of July", "2014")

	]

	alertData = [
		(None, "Outage Coming up on the 4th", "active", time.time(), time.time() + 2),
		(None, "Callies Birthday Coming up!!", "active", time.time(), time.time() + 0.5)

	]

	#db.executemany('INSERT INTO images VALUES (?,?,?,?,?,?,?)', imageData)
	db.executemany('INSERT INTO tags VALUES (?,?,?)', tagData)
	db.executemany('INSERT INTO sub_tags VALUES (?,?,?,?)', subTagData)
	db.executemany('INSERT INTO event_tags VALUES (?,?,?,?,?)', eventTagData)
	#db.executemany('INSERT INTO alerts VALUES (?,?,?,?,?)', alertData)

	try:
		db_connection.commit()
		db_connection.close()
		log("DataBase: Inserted test data into db", "DATABASE", "INFO")
	except Exception, err:
		tryToCloseDB(db_connection)
		for error in err:
			log("DataBase: Unable to insert test data - " + str(error), "DATABASE","INFO")					
 def learning_process_dbscan(self):
     print("DBSCAN train process is initialized!!")
     get_time()
     print("KMeans Finding Best Centroids process is started!!")
     self.find_optimum_centroids_with_kmeans()
     print("Parameter Tuning For Epsilon and Min_Samples!!")
     self.optimum_min_samples()
     self.optimum_epsilon()
     print("number of data for DBSCAN :", len(self.po_data))
     print({
         'eps': self.o_epsilon,
         'min_samples': self.o_min_sample,
         'centroids': {c
                       for c in self.centroids}
     })
     print("Optimum Centriod Divison is Initialized!!!")
     cal_divs = []
     for div in range(2, self.params['centroid_divide_range']):
         print("divide :", div)
         self.get_x_values(div)
         print(len(self.po_data) - self.o_min_sample)
         self.po_data['label_dbscan'] = DBSCAN(
             eps=self.o_epsilon,
             min_samples=len(self.po_data) - self.o_min_sample,
             n_jobs=-1).fit(self.X).labels_
         cal_divs.append({
             "cal":
             np.mean(
                 np.abs(
                     np.sum([
                         self.get_distance_of_outliers(
                             "label_dbscan != -1", max),
                         np.multiply(
                             self.get_distance_of_outliers(
                                 "label_dbscan == -1", min), -1)
                     ]))),
             "div":
             div
         })
     print("optimum centriod distance to outliers results :")
     print(cal_divs)
     self.o_devision = list(
         pd.DataFrame(cal_divs).sort_values(by='cal',
                                            ascending=False)['div'])[0]
     print("optimum ", self.o_devision)
     print({
         'eps': self.o_epsilon,
         'min_samples': self.o_min_sample,
         'centroids': {c
                       for c in self.centroids},
         "div": self.o_devision
     })
     model_from_to_json(
         main_data_path + self.model_params['args']['model_file'], {
             'eps': self.o_epsilon,
             'min_samples': self.o_min_sample,
             'centroids':
             {c[0]: c[1]
              for c in zip(self.features, self.centroids)},
             'optimum_divison': self.o_devision
         }, True)
     print("DBSCAN Train Process Done!")