Ejemplo n.º 1
0
 def get_connection(self):
     config = conf('config')
     if self.data_source in ['postgresql', 'awsredshift', 'mysql']:
         server, db, user, pw, port = str(config['db_connection']['server']), str(config['db_connection']['db']), \
                                      str(config['db_connection']['user']), str(config['db_connection']['password']),\
                                      int(config['db_connection']['port'])
     if self.data_source == 'mysql':
         from mysql import connector
         self.conn = connector.connect(host=server,
                                       database=db,
                                       user=user,
                                       password=pw)
     if self.data_source in ['postgresql', 'awsredshift']:
         import psycopg2
         self.conn = psycopg2.connect(user=user,
                                      password=pw,
                                      host=server,
                                      port=port,
                                      database=db)
     if self.data_source == 'googlebigquery':
         from google.cloud.bigquery.client import Client
         os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = join(
             conf('data_main_path'), "", config['db_connection']['db'])
         self.conn = Client()
     print("db connection is done!")
Ejemplo n.º 2
0
 def down_web(self):
     try:
         from configs import conf
     except Exception as e:
         from .configs import conf
     request_url(url='http://' + conf('web_host') + ':' +
                 str(conf('web_port')) + '/shutdown')
Ejemplo n.º 3
0
 def job_init():
     exception = ''
     req = dict(request.form)
     jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
     process = read_yaml(conf('log_main_path'), 'process.yaml')
     job_names = list(jobs.keys())
     dates = {j: jobs[j]['job_start_date'] for j in job_names}
     current_active_status = {j: jobs[j]['active'] for j in job_names}
     log_infos = get_logs(job_names, dates, current_active_status, process)
     log_infos = update_logs(jobs, log_infos, req)
     return render_template(
         "ml_execute.html",
         train_process=log_infos['train']['process'],
         prediction_process=log_infos['prediction']['process'],
         tuning_process=log_infos['parameter_tuning']['process'],
         train_status=log_infos['train']['status'],
         prediction_status=log_infos['prediction']['status'],
         tuning_status=log_infos['parameter_tuning']['status'],
         train_precent=str(log_infos['train']['percent']),
         prediction_precent=str(log_infos['prediction']['percent']),
         tuning_percent=str(log_infos['parameter_tuning']['percent']),
         train_date=str(log_infos['train']['start_time'])[0:16],
         prediction_date=str(log_infos['prediction']['start_time'])[0:16],
         tuning_date=str(log_infos['parameter_tuning']['start_time'])[0:16],
         exception=exception)
Ejemplo n.º 4
0
 def show_dash():
     jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
     connection = check_available_data_for_dashboard(jobs)
     return render_template('dashboard.html',
                            connection=connection,
                            dash_url="http://" + conf('web_host') + ":" +
                            str(conf('config')['web_port']) + "/dash/")
Ejemplo n.º 5
0
def start_job(job):
    Logger('ml_execute_' + job)
    print("received :", {
        'job': job,
        'process': 'start'
    }, " time :", get_time())
    j = CreateJobs(read_yaml(conf('docs_main_path'), 'ml_execute.yaml'), job)
    if j.job['day'] == 'only once':
        j.job_that_executes_once()
        return 'done!!!!'
    if j.job['day'] in ['Monthly', 'Every 2 Weeks']:
        while True:
            jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
            j.job_that_executes_monthly_weekly()
            print("job is working - ", job)
            if jobs[job]['active'] is False:
                print("job is stopped !!")
                break
            time.sleep(60)
    elif j.job['day'] not in ['only once', 'Monthly', 'Every 2 Weeks']:
        j.job_schedule()
        while True:
            schedule.run_pending()
            jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
            print("job is working - ", job)
            if jobs[job]['active'] is False:
                print("job is stopped !!")
                break
            time.sleep(10)
Ejemplo n.º 6
0
def start_job_and_update_job_active(jobs, job):
    jobs[job]['active'] = True
    write_yaml(conf('docs_main_path'), "ml_execute.yaml", jobs)
    ml_execute_api = read_yaml(conf('docs_main_path'),
                               'apis.yaml')['ml_execute']
    url = get_api_url(ml_execute_api['host'], ml_execute_api['port'],
                      ml_execute_api['api_name'])
    request_url(url, {'job': job})
Ejemplo n.º 7
0
def get_results(date_col):
    results = []
    for f in listdir(dirname(join(conf('data_main_path'), ""))):
        f_splits = f.split(conf('result_file'))
        if f_splits[0] == "":
            results += pd.read_csv(join(conf('data_main_path'), "", f)).to_dict('results')
    results = pd.DataFrame(results)
    if len(results) >= 1000:
        results = results.sort_values(by=date_col, ascending=True)[-1000:]
    return results
Ejemplo n.º 8
0
 def home():
     jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
     configs = read_yaml(conf('docs_main_path'), 'configs.yaml')
     model_configuration = read_yaml(conf('model_main_path'),
                                     'model_configuration.yaml')
     process = read_yaml(conf('log_main_path'), 'process.yaml')
     req = dict(request.form)
     if 'save' in req.keys():
         if bool(req['save']):
             ml_execute_reset(jobs)
             db_connection_reset(configs)
             models_reset(model_configuration)
             logs_reset(process)
     return render_template("home.html", reset_script=reset_script)
Ejemplo n.º 9
0
 def get_time():
     try:
         print("browser time: ", request.args['time'])
         print("server time : ", time.strftime('%A %B, %d %Y %H:%M:%S'))
         jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
         for j in jobs:
             jobs[j]['browser_time'] = str(
                 datetime.datetime.strptime(
                     " ".join(request.args['time'].split()[0:5]),
                     "%a %b %d %Y %H:%M:%S"))[0:13]
         write_yaml(conf('docs_main_path'), "ml_execute.yaml", jobs)
     except Exception as e:
         print(e)
     return "Done"
Ejemplo n.º 10
0
def get_filters(data):
    jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
    #model_conf = read_yaml(conf('model_main_path'), 'model_configuration.yaml')
    model_infos = jobs[list(jobs.keys())[0]]['execute'][0]['params']
    groups, date_col, feature = split_groups(model_infos['groups']), model_infos['time_indicator'], model_infos['feature']
    #t_dimensions = model_conf['infos']['time_groups'].split("*")
    #data = check_for_time_part_groups_on_data(data, t_dimensions, date_col)
    #groups += t_dimensions
    if groups not in ['None', None, []]:
        if len(groups) > 3:
            groups = random.sample(groups, 3)
    else:
        groups = list(set(list(data.columns)) - set([date_col, feature, 'Unnamed: 0.1', 'Unnamed: 0']))
        if groups >= 4:
            groups = list(filter(lambda col: type(col) == str, groups))
            if len(groups) >= 4:
                groups = random.sample(groups, 3) if len(groups) > 3 else [date_col]
    print("filters :", groups)
    num_f_p = len(groups)
    filter_datas = []
    for g in groups:
        filter_datas.append(list(data[data[g] == data[g]][g].unique()) + ['ALL'])
    filter_ids = groups

    filter_sizes = [30] * num_f_p
    multiple_selection = [False] * num_f_p
    values = ['ALL'] * num_f_p
    filters = list(zip(filter_ids, filter_datas, filter_sizes, multiple_selection, values))
    hover_data = [{date_col: min(data[model_infos['time_indicator']])}] * 3
    return num_f_p, filters, hover_data, groups, filter_ids, date_col, feature, data
Ejemplo n.º 11
0
def ml_execute_update(**update_dict):
    keys = [
        'jobs', 'description', 'data_query_path', 'data_source', 'groups',
        'dates', 'data_query_path', 'data_source', 'groups', 'dates',
        'time_indicator', 'feature', 'description', 'days'
    ]
    infos = {k: update_dict.get(k, None) for k in keys}
    jobs = infos['jobs']
    for j in jobs:
        jobs[j]['description'] = infos['description']
        jobs[j]['day'] = infos['days'][j] if infos['days'] else None
        jobs[j]['job_start_date'] = str(
            infos['dates'][j][0])[0:16] if infos['dates'] else None
        jobs[j]['job_end_date'] = None if not infos[
            'dates'] else None if infos['dates'][j][1] else str(
                infos['dates'][j][1])[0:16]
        e2 = []
        for e in jobs[j]['execute']:
            for p in e['params']:
                if p in infos.keys():
                    e['params'][p] = str(infos[p])
                if p == 'time_period':
                    e['params'][
                        p] = infos['days'][j] if infos['days'] else None
            e2.append(e)
        jobs[j]['execute'] = e2
    print("ml_execute.yaml is updated!!")
    write_yaml(conf('docs_main_path'), "ml_execute.yaml", jobs)
Ejemplo n.º 12
0
def db_connection_update(**args):
    configs = read_yaml(conf('docs_main_path'), 'configs.yaml')
    configs['db_connection']['data_source'] = args['data_source']
    configs['db_connection']['is_from_db'] = False if args['data_source'] in [
        'csv', 'json', 'pickle'
    ] else True
    infos = {
        'db': args.get('db_name', None),
        'password': args.get('pw', None),
        'port': args.get('port', None),
        'server': args.get('host', None),
        'user': args.get('user', None)
    }
    for i in infos:
        configs['db_connection'][i] = infos[i]
    write_yaml(conf('docs_main_path'), "configs.yaml", configs)
Ejemplo n.º 13
0
 def create_task():
     exception = ""
     job = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
     params = get_model_arguments(job)
     data, cols, connection = get_sample_data(params,
                                              connection=True,
                                              create_sample_data=False)
     if bool(request.args['messages']
             ) and params['data_source'] and connection:
         if dict(request.form) != {}:
             update_dict = get_request_values(job, params, request)
             if update_dict['feature'] and update_dict[
                     'time_indicator'] and dict(
                         request.form)['date1_prediction'] != '':
                 ml_execute_update(**update_dict)
             else:
                 exception = "Pls make sure you have entered Anomaly Feature and Date Indicator!!!!"
             if get_dash(request):
                 return redirect(url_for('show_dash'))
             else:
                 return render_template("configs_data2.html",
                                        cols=cols,
                                        exception=exception)
         else:
             return render_template("configs_data2.html",
                                    cols=cols,
                                    exception=exception)
     else:
         return redirect(url_for('get_data'))
Ejemplo n.º 14
0
 def date_dimension_deciding(self):
     if self.job != 'prediction':
         self.calculate_date_parts()
         info = {'infos': {'min_date': str(min(list(self.data[self.time_indicator])))[0:19],
                           'max_date': str(max(list(self.data[self.time_indicator])))[0:19],
                           'time_groups': "*".join(self.time_groups)}
                 }
         write_yaml(conf('model_main_path'), 'model_configuration.yaml', info)
     else:
         self.time_groups = read_yaml(conf('model_main_path'), "model_configuration.yaml")['infos'][
             'time_groups'].split("*")
         for t_dimension in self.time_groups:
             if t_dimension not in self.groups:
                 self.data[t_dimension] = self.data[self.date].apply(lambda x: date_part(x, t_dimension))
         if self.time_groups != ['']:
             self.groups += self.time_groups
     print("time parts : ", self.time_groups)
Ejemplo n.º 15
0
def save_model_configurations(job, data, time_indicator, time_groups):
    info = {}
    if job == 'prediction':
        info = {'infos': {'min_date': min(list(data[time_indicator])), 'max_date': max(list(data[time_indicator])),
                          'time_groups': time_groups}
                }
        with open(join(conf('model_main_path'), "model_configuration.yaml"), 'w') as file:
            yaml.dump(info, file)
Ejemplo n.º 16
0
    def query_data_source(self):
        self.check_data_with_filtering()

        # import data via pandas
        if self.data_source in ['mysql', 'postgresql', 'awsredshift']:
            self.get_connection()

            self.data = pd.read_sql(
                self.query + " LIMIT " +
                str(self.nrows) if self.nrows else self.query, self.conn)

        # import data via google
        if self.data_source == 'googlebigquery':
            self.get_connection()
            self.data = self.conn.query(
                self.query + " LIMIT " +
                str(self.nrows) if self.nrows else self.query).to_dataframe()

        # import via pandas
        if self.data_source == 'csv':
            try:
                for sep in [',', ';', ':']:

                    self.data = pd.read_csv(filepath_or_buffer=join(
                        conf('data_main_path'), self.data_query_path),
                                            error_bad_lines=False,
                                            encoding="ISO-8859-1",
                                            sep=sep,
                                            nrows=self.nrows)
                    if len(self.data.columns) > 1:
                        break
            except Exception as e:
                print(e)

        if self.data_source == 'json':
            self.data = read_write_to_json(conf('directory'),
                                           self.data_query_path,
                                           None,
                                           is_writing=False)

        if self.data_source == 'yaml':
            self.data = read_yaml(conf('data_main_path'), self.data_query_path)

        if self.data_source in ('json', 'yaml', 'csv'):
            self.data = self.query(self.data)
Ejemplo n.º 17
0
 def anomaly_prediction(self):
     self.model = model_from_to_json(
         path=join(conf('model_main_path'),
                   model_path(self.comb, self.groups, 'lstm')))
     self.prediction = self.scale.inverse_transform(
         self.model.predict(self.prediction))
     self.result = self.result[(len(self.result) - len(self.prediction)):]
     self.result['predict'] = self.prediction.reshape(
         1, len(self.prediction)).tolist()[0]
Ejemplo n.º 18
0
def get_reset_script():
    jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
    configs = read_yaml(conf('docs_main_path'), 'configs.yaml')
    reset_script = ""
    if configs['db_connection']['data_source'] not in ['', None]:
        reset_script += "You have a data source connction form " + configs[
            'db_connection']['data_source'] + "."
        active_jobs = []
        for j in jobs:
            if jobs[j]['active'] is True:
                active_jobs.append([j, jobs[j]['day']])
        if len(active_jobs) == 1:
            reset_script += " You also have active job which is " + j[
                0] + " running " + j[1] + "."
        if len(active_jobs) > 2:
            reset_script += " You also have active job which are " + ", ".join(
                [j[0] for j in active_jobs]) + " running " + ", ".join(
                    [j[1] for j in active_jobs]) + ". "
    reset_script += " Would you like to reset them all?"
    return reset_script
Ejemplo n.º 19
0
 def parameter_tuning(self):
     if len(self.levels) == 0:
         self.optimized_parameters = self.parameter_tuning_threading(
             has_comb=False)
     else:
         for self.comb in self.levels:
             self.optimized_parameters[
                 self.get_param_key()] = self.parameter_tuning_threading()
             if not check_request_stoped(self.job):
                 break
     print("updating model parameters")
     pt_config = read_yaml(conf('docs_main_path'), 'parameter_tunning.yaml')
     pt_config['has_param_tuning_first_run']['lstm'] = True
     _key = 'hyper_parameters' if len(
         self.levels) == 0 else 'combination_params'
     pt_config[_key]['lstm'] = self.optimized_parameters
     write_yaml(conf('docs_main_path'),
                "parameter_tunning.yaml",
                pt_config,
                ignoring_aliases=True)
     self.params = hyper_conf('lstm')
     self.combination_params = hyper_conf('lstm_cp')
Ejemplo n.º 20
0
 def __init__(self, jobs, job_name):
     self.job_name = job_name
     self.jobs_yaml = jobs
     self.job = self.jobs_yaml[job_name]
     self.api_infos = read_yaml(conf('docs_main_path'), 'apis.yaml')
     self.api_info = None
     self.url = None
     self.logger = LoggerProcess(job=self.job_name)
     self.browser_time, self.diff, self.start_time, self.time = ml_execute_times(
         self.job)
     self.schedule = None
     self.total_minutes_in_month = 30 * 24 * 60
     self.total_minutes_in_2_weeks = 15 * 24 * 60
Ejemplo n.º 21
0
 def stop_job(self, request=True):
     if self.job[
             'active'] is True:  # if there is active job update ml_execute.yaml
         self.logger.regenerate_file()
         self.jobs_yaml[self.job_name]['active'] = False
         write_yaml(conf('docs_main_path'), "ml_execute.yaml",
                    self.jobs_yaml)
         for j in self.job['execute']:
             self.api_info = self.api_infos['model_' + j['params']['model']]
             self.url = get_api_url(host=self.api_info['host'],
                                    port=self.api_info['port'],
                                    api_name=self.api_info['api_name'])
             if request:
                 request_url(self.url, self.job['stop_job'])
Ejemplo n.º 22
0
 def train_model(self, save_model=True):
     self.model = IsolationForest(n_estimators=self._p['num_of_trees'],
                                  max_samples='auto',
                                  contamination=self._p['contamination'],
                                  bootstrap=False,
                                  n_jobs=-1,
                                  random_state=42,
                                  verbose=1).fit(self.train[[self.feature
                                                             ]].values)
     if save_model:
         model_from_to_pkl(directory=conf('model_main_path'),
                           path=model_path(self.comb, self.groups, 'iso_f'),
                           model=self.model,
                           is_writing=True)
Ejemplo n.º 23
0
 def run_platform(self):
     self.platform = BuildPlatform(conf=self.conf,
                                   environment=self.env,
                                   master_node=self.conf.master_node)
     self.platform.initialize()
     if self.conf.master_node:
         try:
             from configs import conf
         except Exception as e:
             from .configs import conf
         self.web_port = conf('web_port')
         self.web_host = conf('web_host')
         print("platform is up!!!")
         print("*" * 5, " WEB APPLICATION ", "*" * 5)
         print('Running on ',
               'http://' + self.web_host + ':' + str(self.web_port) + '/')
     else:
         print("platform is up!!!")
         print("Running Services:")
         for api in self.platform.api_file:
             print(
                 api, " :", 'http://' + api['host'] + ':' + api['port'] +
                 '/' + api['api_name'] + '/')
Ejemplo n.º 24
0
 def learning_process(self, save_model=True):
     self.model.fit(self.train['x_train'],
                    self.train['y_train'],
                    batch_size=self._p['batch_size'],
                    epochs=self._p['epochs'],
                    verbose=0,
                    validation_data=(self.train['x_test'],
                                     self.train['y_test']),
                    shuffle=False)
     if save_model:
         model_from_to_json(path=join(
             conf('model_main_path'),
             model_path(self.comb, self.groups, 'lstm')),
                            model=self.model,
                            is_writing=True)
Ejemplo n.º 25
0
def ml_execute_reset(jobs):
    for j in jobs:
        jobs[j]['description'] = None
        jobs[j]['day'] = None
        jobs[j]['job_start_date'] = None
        jobs[j]['job_end_date'] = None
        e2 = []
        for e in jobs[j]['execute']:
            for p in e['params']:
                if p not in ['model', 'job']:
                    e['params'][p] = None
            e2.append(e)
        jobs[j]['execute'] = e2
    print("reset ml-execute.yaml !!!")
    write_yaml(conf('docs_main_path'), "ml_execute.yaml", jobs)
Ejemplo n.º 26
0
def data_source():
    jobs = read_yaml(conf('docs_main_path'), 'ml_execute.yaml')
    model_infos = jobs[list(jobs.keys())[0]]['execute'][0]['params']
    try:
        source = GetData(data_query_path="sample_data.csv",
                         data_source="csv",
                         time_indicator=model_infos['time_indicator'],
                         feature=model_infos['feature'], test=1000)
        source.query_data_source()
        source.convert_feature()
        data = source.data
    except Exception as e:
        data = pd.DataFrame()
        print("no data is available")
    return data
Ejemplo n.º 27
0
 def update_api_file(self, apis=None):
     try:
         from configs import conf
     except Exception as e:
         from .configs import conf
     self.api_file = read_yaml(conf('docs_main_path'), "apis.yaml")
     if apis is not None:
         if not self.master_node:
             self.api_file = {a: self.api_file[a] for a in apis}
         if type(apis) == dict:
             for a in apis:
                 for p in apis[a]:
                     self.api_file[a][p] = apis[a][p]
         else:
             self.api_file = self.api_file[apis]
Ejemplo n.º 28
0
def get_sample_data(params, connection, create_sample_data=True):
    data, cols = None, None
    try:
        sample_size = 10 if not create_sample_data else 1000
        d = GetData(data_query_path=params['data_query_path'],
                    data_source=params['data_source'],
                    test=sample_size)
        d.query_data_source()
        cols = d.data.columns.values
        # data = d.data.to_html(classes=["table table-bordered table-striped table-hover table-sm"])
        if create_sample_data:
            d.data.to_csv(join(conf('data_main_path'), 'sample_data.csv'))
    except Exception as e:
        print(e)
        connection = False
    return data, cols, connection
Ejemplo n.º 29
0
 def prediction_execute(self):
     for self.comb in self.levels:
         print(
             "*" * 4, "ISO FOREST - ",
             self.get_query().replace(" and ", "; ").replace(" == ", " - "),
             "*" * 4)
         if check_model_exists(model_path(self.comb, self.groups, 'iso_f'),
                               conf('model_main_path')):
             self.f_w_data = self.data.query(
                 self.get_query()).sort_values(by=self.date)
             self.split_data(is_prediction=True)
             print("prediction size :", len(self.prediction))
             self.detect_anomalies()
         self.logger.counter()
         if not check_request_stoped(self.job):
             break
     self.anomaly = DataFrame(self.anomaly)
Ejemplo n.º 30
0
 def check_for_ports(self, service_count):
     """
     checks for available ports. It picks prosts from the range between 6000 - 7000.
     :param service_count: number of service which cpecifically assigned for this configuration.
                           By defaults finds available port for each services.
     """
     try:
         from configs import conf
     except Exception as e:
         from .configs import conf
     if self.cd.check_for_directory():
         count = 0
         available_ports = conf('available_ports')
         while len(self.ports) != service_count:
             if not is_port_in_use(available_ports[count]):
                 self.ports.append(int(available_ports[count]))
             count += 1