def create_model(current_game_name): models = 'db_models.py' model_found = False ldb = LotteryDatabase(True) curr_game = ldb.fetchone(LottoGame, {'game_name': current_game_name}) if ldb.check_model_exist_by_table_name(curr_game.input_model): model_class = ldb.set_model_by_table_name(curr_game.input_model) model_class.__table__.drop(ldb.engine) with open(models, 'r') as models_file: models_code = models_file.readlines() try: with open(models, 'w') as models_file: for code_line in models_code: if curr_game.input_model in code_line: model_found = True if model_found: if code_line.strip() == '': model_found = False if not model_found: models_file.write(code_line) model_list = [ 'class {}(BASE):'.format(curr_game.input_model) + '\n', " __tablename__ = '{}'".format('INPUT_' + curr_game.game_table) + '\n', " id = Column('id', Integer, primary_key=True)" + '\n' ] model_list += [ " {} = Column('{}', Integer)".format( 'NR_'.lower() + str(n), 'NR_' + str(n)) + '\n' for n in range(1, curr_game.game_len + 1) ] models_file.write('\n\n') models_file.writelines(model_list) BASE.metadata.create_all(bind=ldb.engine) except Exception: with open(models, 'w') as models_file: models_file.writelines(models_code) raise
class ConvertModel: def __init__(self, worker, initial_table=True, last_draw=None, limit=0): # class initialize self.worker = worker self.table_name = self.worker.table_name self.game_name = self.worker.window.line_current_game.text() if initial_table: self.ldb = self.worker.window.ldb self.curr_game = self.ldb.fetchone(LottoGame, {'name': self.game_name}) self.input_table = 'INPUT_' + self.curr_game.input_table # features table_headers = {'__tablename__': self.table_name, 'id': Column('id', Integer, primary_key=True), 'DRAFT_ID': Column('DRAFT_ID', Integer)} for feature in self.curr_game.model_features: match_items = self.worker.window.list_model.findItems(feature.name, Qt.MatchExactly) if len(match_items) > 0: feature_len = feature.length + 1 feature_header = feature.header for n in range(1, feature_len): table_headers[feature_header + str(n)] = Column(feature_header + str(n), Integer) table_headers['LABEL'] = Column('LABEL', Integer) self.ldb.delete_table(self.table_name) self.ldb.delete_model_by_table_name(self.table_name) self.ldb.create_class_model(self.curr_game.training_model, table_headers) self.ldb.meta_create_all() self.input_record_count = self.ldb.get_table_length(DYNAMIC_CLASS[self.curr_game.input_model]) + 1 - limit # variables if last_draw is None: self.last_draw = [13, 18, 29, 32, 37] else: self.last_draw = last_draw self.total_game_numbers = self.curr_game.total_numbers + 1 self.training_size_limit = int(self.worker.window.combo_test_size.currentText()) self.single_draw_len = self.curr_game.length + 1 self.labels = [0, 12, 34, 56] self.win = {'LABEL': 1} self.loss = {'LABEL': 0} self.rash_one = 5 self.rash_two = 5 self.rash_three = 5 self.rash_default = 5 @staticmethod def __append_drawn(current_array, set_of_six): new_drawn = [] for drawn in set_of_six: index = drawn - 1 new_drawn.append(current_array[index]) return new_drawn def __append_rash_group(self, sample): new_set = [] for num in sample: if num in self.curr_game['groups']['first_group']: new_set += [1] elif num in self.curr_game['groups']['second_group']: new_set += [2] elif num in self.curr_game['groups']['third_group']: new_set += [3] return new_set def __append_alpha_group(self, sample): alpha_group = [] for n in sample: for g in self.curr_game['alphabetic_groups'].items(): if n in g[1]: alpha_group += [1] else: alpha_group += [0] return alpha_group def __map_last_draw(self, curr_draw): return {'IN_LAST_' + str(n): i for n, i in zip(range(1, self.single_draw_len), curr_draw)} def __append_in_last_draw(self, sample, curr_draw): if curr_draw.count(0) == 0: last_draw = [0 for _ in range(1, self.total_game_numbers)] else: last_draw = [1 if n in sample else 0 for n in range(1, self.total_game_numbers)] return last_draw def __append_numbers_cycle(self, sample, curr_cycle): if sum(map((0).__eq__, curr_cycle.values())) == 0: numbers_cycle = {'CYCLE_' + str(n): 0 for n in range(1, self.total_game_numbers)} else: numbers_cycle = {'CYCLE_' + str(n): 1 if n in sample else curr_cycle['CYCLE_' + str(n)] for n in range(1, self.total_game_numbers)} return numbers_cycle def append_hot_cold_warm_cool(self, top_numbers, n_top, large_small): if large_small == 'L': hw = nlargest(n_top, top_numbers, key=top_numbers.get) hc_wc = [1 if str(n) in hw else 0 for n in range(1, self.total_game_numbers)] else: cc = nsmallest(n_top, top_numbers, key=top_numbers.get) hc_wc = [1 if str(n) in cc else 0 for n in range(1, self.total_game_numbers)] return hc_wc @staticmethod def __append_count_label(sample, count_list): label = 0 count = Counter(sample) for x in count_list: label = label + count[x] return label def __map_number_map(self, sample): return {'MAP_' + str(n): 1 if n in sample else 0 for n in range(1, self.total_game_numbers)} def __map_original_numbers(self, sample): return {'ORIGINAL_' + str(n): i for n, i in zip(range(1, self.single_draw_len), sample)} def __append_rash(self, array): rash_group = [] for num in array: if num in self.curr_game['groups']['first_group']: data = 1 # first_data_group.get(num) if data is None: per = round(float(1 / self.rash_one) * 100, 2) else: per = round(float(data / self.rash_one) * 100, 2) rash_group.append(per) elif num in self.curr_game['groups']['second_group']: data = 1 # second_data_group.get(num) if data is None: per = round(float(1 / self.rash_two) * 100, 2) else: per = round(float(data / self.rash_two) * 100, 2) rash_group.append(per) elif num in self.curr_game['groups']['third_group']: data = 1 # third_data_group.get(num) if data is None: per = round(float(1 / self.rash_three) * 100, 2) else: per = round(float(data / self.rash_three) * 100, 2) rash_group.append(per) return rash_group def __append_db(self, params): self.ldb.add_record(DYNAMIC_CLASS[self.curr_game.training_model], params) def create_prediction_model(self, input_array): self.ldb.create_new_session() list_model = self.worker.window.list_model my_list = list(map(int, input_array.split(" "))) my_list = self.__add_random(my_list) ids = 1 combined_set = [] top_numbers = self.__create_top_numbers(self.input_record_count - 100) number_cycles = self.__get_latest_number_cycle() curr_draw = self.__get_latest_draw() total_combinations = int(math.factorial(42)/(math.factorial(5)*(math.factorial(42-5)))) for a in my_list: # if 1 <= a <= 11: for b in my_list: if a < b: # and 5 <= b <= 23: for c in my_list: if b < c: # and 11 <= c <= 33: for d in my_list: if c < d: # and 20 <= d <= 40: for e in my_list: # diff = e-a # sample_sum = a+b+c+d+e if d < e: # and 29 <= e <= 42: # and 17 < diff < 40 and 69 < sample_sum < 151: # for f in my_list: # if e < f: sample_array = [a, b, c, d, e] for i in range(self.worker.window.list_model.count()): if list_model.item(i).text() == 'number_map': combined_set += self.__map_number_map(sample_array) elif list_model.item(i).text() == 'number_cycles': combined_set += self.__append_numbers_cycle(sample_array, number_cycles) elif list_model.item(i).text() == 'original_numbers': combined_set += sample_array elif list_model.item(i).text() == 'in_last_draw': combined_set += curr_draw elif list_model.item(i).text() == 'rash_group': combined_set += self.__append_rash_group(sample_array) elif list_model.item(i).text() == 'alphabetic_group': combined_set += self.__append_alpha_group(sample_array) elif list_model.item(i).text() == 'hot numbers': combined_set += self.append_hot_cold_warm_cool( top_numbers, 10, 'L') elif list_model.item(i).text() == 'cold numbers': combined_set += self.append_hot_cold_warm_cool( top_numbers, 10, 'S') elif list_model.item(i).text() == 'warm numbers': combined_set += self.append_hot_cold_warm_cool( top_numbers, 20, 'L') elif list_model.item(i).text() == 'cool numbers': combined_set += self.append_hot_cold_warm_cool( top_numbers, 20, 'S') label = [self.__append_count_label(sample_array, self.last_draw)] self.__append_db(ids, [0], combined_set, label) combined_set = [] self.worker.signal_progress_bar.emit((ids/total_combinations)*100) ids += 1 self.worker.signal_progress_bar.emit(0) self.ldb.db_commit() def convert_to_original(self): self.ldb = LotteryDatabase() combo_predict = self.worker.window.combo_predict_model self.table_name = 'PREDICT_' + combo_predict.currentText() now = datetime.datetime.now() file_name = str.format('{} {}', combo_predict.currentText(), now.strftime("%Y-%m-%d %H %M %S")) export_columns = ['FIRST', 'SECOND', 'THIRD', 'FOURTH', 'FIFTH', 'SIXTH', 'LABEL', 'OUTPUT'] with open('archived/' + file_name + '.csv', 'a', newline='') as csv_file: writer = csv.writer(csv_file) writer.writerow(export_columns) for o in range(1, self.input_record_count): fetch_one = list(self.ldb.fetchone(self.table_name, o)) fetch_output = list(self.ldb.fetchone('OUTPUT_prediction', o)) originals = fetch_one[1:50] label_column = [fetch_one[-1]] output_column = [fetch_output[-1]] output_list = [n + 1 for n in range(0, len(originals)) if originals[n] == 1] output_list = output_list + label_column + output_column writer.writerow(output_list) self.worker.signal_status.emit('Export in progress: {} of {}.'.format(o, self.input_record_count - 1)) self.worker.signal_status.emit('') def __get_latest_number_cycle(self): curr_cycle, fetch_one = [], [] for o in range(1, self.input_record_count): curr_cycle = self.__append_numbers_cycle(fetch_one[1:self.single_draw_len], curr_cycle) self.ldb.db_commit() fetch_one = list(self.ldb.fetchone(self.input_table, o)) curr_cycle = self.__append_numbers_cycle(fetch_one[1:self.single_draw_len], curr_cycle) return curr_cycle def __get_latest_draw(self): curr_draw, fetch_one = [], [] for o in range(1, self.input_record_count): curr_draw = self.__append_in_last_draw(fetch_one[1:self.single_draw_len], curr_draw) self.ldb.session_commit() fetch_one = list(self.ldb.fetchone(self.input_table, o)) curr_draw = self.__append_in_last_draw(fetch_one[1:self.single_draw_len], curr_draw) return curr_draw def __create_top_numbers(self, offset): top_numbers = {} last = self.ldb.limit_offset_query(DYNAMIC_CLASS[self.curr_game.input_model], offset, offset-200) for sample in last: for number in range(1, self.single_draw_len): number = str(getattr(sample, 'NR_' + str(number))) if number not in top_numbers: top_numbers[number] = 0 top_numbers[number] += 1 return top_numbers def get_latest_pairs(self, pair_size): pairs = {} sql_ct = str.format("SELECT * FROM {} limit {} offset {}", self.input_table, 366, self.input_record_count - 367) self.ldb.execute(sql_ct) last = self.ldb.c.fetchmany(self.input_record_count) for sample in last: comb = combinations(sample, pair_size) for c in comb: if c not in pairs: pairs[c] = 1 else: pairs[c] += 1 pairs_largest = nlargest(100, pairs, key=pairs.get) return pairs_largest def get_latest_top(self): return self.__create_top_numbers(self.input_record_count - 100) def __add_random(self, o_num, limit=True): if limit: sample_size = self.training_size_limit else: sample_size = self.total_game_numbers + 1 while True: r = random.randrange(1, self.total_game_numbers) if r not in o_num: o_num = o_num + [r] if len(o_num) == sample_size: o_num.sort() break return o_num def create_training_model(self): self.ldb.create_new_session() list_model = self.worker.window.list_model ids = 1 avg_time = 0 win_count, loss_count = 0, 0 zero, one, two, three, four = 0, 0, 0, 0, 0 combined_set, curr_cycle = {}, {} fetch_one, curr_draw = [], [] start_time = time.time() for o in range(1, self.input_record_count): curr_cycle = self.__append_numbers_cycle(fetch_one, curr_cycle) curr_draw = self.__append_in_last_draw(fetch_one, curr_draw) top_numbers = self.__create_top_numbers(o) record_set = self.ldb.fetchone(DYNAMIC_CLASS[self.curr_game.input_model], {'id': o}) fetch_one = [getattr(record_set, 'NR_' + str(i)) for i in range(1, self.single_draw_len)] my_list = self.__add_random(fetch_one) end_time = time.time() avg_time = (avg_time + (end_time - start_time)) / o eta = avg_time * self.input_record_count - avg_time * o self.worker.signal_status.emit(self.__print_run_time(eta)) self.worker.signal_progress_bar.emit(((o + 1) / self.input_record_count) * 100) for a in my_list: for b in my_list: if a < b: for c in my_list: if b < c: for d in my_list: if c < d: for e in my_list: if d < e: sample_array = [a, b, c, d, e] for i in range(self.worker.window.list_model.count()): if list_model.item(i).text() == 'number_map': combined_set = {**combined_set, **self.__map_number_map(sample_array)} elif list_model.item(i).text() == 'number_cycles': combined_set = {**combined_set, **self.__append_numbers_cycle( fetch_one, curr_cycle)} elif list_model.item(i).text() == 'original_numbers': combined_set = {**combined_set, **self.__map_original_numbers(sample_array)} elif list_model.item(i).text() == 'in_last_draw': combined_set = {**combined_set, **self.__map_last_draw(curr_draw)} elif list_model.item(i).text() == 'rash_group': combined_set = {**combined_set, **self.__append_rash_group(sample_array)} elif list_model.item(i).text() == 'alphabetic_group': combined_set = {**combined_set, **self.__append_alpha_group(sample_array)} elif list_model.item(i).text() == 'hot numbers': combined_set = {**combined_set, **self.append_hot_cold_warm_cool( top_numbers, 10, 'L')} elif list_model.item(i).text() == 'cold numbers': combined_set = {**combined_set, **self.append_hot_cold_warm_cool( top_numbers, 10, 'S')} elif list_model.item(i).text() == 'warm numbers': combined_set = {**combined_set, **self.append_hot_cold_warm_cool( top_numbers, 20, 'L')} elif list_model.item(i).text() == 'cool numbers': combined_set = {**combined_set, **self.append_hot_cold_warm_cool( top_numbers, 20, 'S')} label = [self.__append_count_label(sample_array, fetch_one)] if self.worker.window.check_win_loss.isChecked(): if label < [3]: self.__append_db({**{'DRAFT_ID': o}, **combined_set, **self.loss}) loss_count += 1 else: self.__append_db({**{'DRAFT_ID': o}, **combined_set, **self.win}) win_count += 1 ids += 1 else: if label[0] in [0, 1]: # and number_limit[0] < 25: self.__append_db({**{'DRAFT_ID': o}, **combined_set, **{'LABEL': 0}}) # number_limit[0] = number_limit[0] + 1 zero += 1 ids += 1 elif label[0] == 2: # and number_limit[1] < 25: self.__append_db({**{'DRAFT_ID': o}, **combined_set, **{'LABEL': 1}}) # number_limit[1] = number_limit[1] + 1 one += 1 ids += 1 elif label[0] == 3: # and number_limit[1] < 25: self.__append_db({**{'DRAFT_ID': o}, **combined_set, **{'LABEL': 2}}) # number_limit[1] = number_limit[1] + 1 two += 1 ids += 1 elif label[0] in [4, 5]: # and number_limit[2] < 25: self.__append_db({**{'DRAFT_ID': o}, **combined_set, **{'LABEL': 3}}) # number_limit[2] = number_limit[2] + 1 three += 1 ids += 1 combined_set = {} self.ldb.session_commit() self.ldb.session_close() if self.worker.window.check_win_loss.isChecked(): return win_count, loss_count else: return zero, one, two, three, four @staticmethod def __print_run_time(seconds): seconds = int(seconds) hours = seconds // 3600 minutes = (seconds - 3600 * hours) // 60 seconds = seconds - 3600 * hours - 60 * minutes print_it = str.format('Estimate time remaining: {}:{}:{}'.format( '{:02}'.format(hours), '{:02}'.format(minutes), '{:02}'.format(seconds))) return print_it def __create_rash_group(self): self.ldb = LotteryDatabase() fetch_a = self.ldb.execute(self.input_table) first_data_group = defaultdict(int) second_data_group = defaultdict(int) third_data_group = defaultdict(int) new_set = [] for line in fetch_a: line = line[1:7] for num in line: if num in self.curr_game['groups']['first_group']: new_set.append(1) elif num in self.curr_game['groups']['second_group']: new_set.append(2) elif num in self.curr_game['groups']['third_group']: new_set.append(3) count = Counter(new_set) if count[1] == 4: self.rash_one += 1 for x, y in zip(line, new_set): if y == 1: first_data_group[x] += 1 elif count[2] == 4: self.rash_two += 1 for x, y in zip(line, new_set): if y == 2: second_data_group[x] += 1 elif count[3] == 4: self.rash_three += 1 for x, y in zip(line, new_set): if y == 3: third_data_group[x] += 1 rash_group = [] for num in line: if num in self.curr_game['groups']['first_group']: d = first_data_group.get(num) if d == "": per = round(float(1 / self.rash_one) * 10, 2) else: per = round(float(d / self.rash_one) * 10, 2) rash_group.append(per) elif num in self.curr_game['groups']['second_group']: d = second_data_group.get(num) if d == "": per = round(float(1 / self.rash_two) * 10, 2) else: per = round(float(d / self.rash_two) * 10, 2) rash_group.append(per) elif num in self.curr_game['groups']['third_group']: d = third_data_group.get(num) if d == "": per = round(float(1 / self.rash_three) * 10, 2) else: per = round(float(d / self.rash_three) * 10, 2) rash_group.append(per) self.ldb.__del__()
def run(self): process_name = self.window.select_thread if process_name == "process_input": if self.window.input_line.text() == "": self.signal_infobox.emit( 'Missing input', 'No input numbers to proceed. Please try again.') else: self.table_name = 'PREDICT_' + self.window.combo_predict_model.currentText( ) try: convert = ConvertModel(self) ml = MachineLearning(self) convert.create_prediction_model( self.window.input_line.text()) ml.random_forest_predict() self.signal_progress_bar.emit(0) self.signal_infobox.emit('Completed', 'Prediction model created!') except Exception as exc: self.signal_infobox.emit( 'Error', 'Something went wrong!! ' + str(exc)) self.signal_progress_bar.emit(0) elif process_name == 'update_la_jolla': try: ldb = LotteryDatabase() imported, rejected = ldb.import_la_jolla() self.signal_infobox.emit( 'Completed', str.format( 'Lottery data imported! \n Imported: {} \n Rejected: {}', imported, rejected)) except Exception as exc: self.signal_infobox.emit('Error', 'Something went wrong!! ' + str(exc)) self.signal_progress_bar.emit(0) elif process_name == "create_model": if self.window.combo_db.currentText() != '': self.table_name = 'MODEL_' + self.window.combo_db.currentText() self.signal_qbox.emit('Create', 'Do you want create new model?') while self.window.response is None: pass if self.window.response == QMessageBox.Yes: self.window.response = None try: convert = ConvertModel(self) if self.window.check_win_loss.isChecked(): win, loss = convert.create_training_model() self.signal_progress_bar.emit(0) self.signal_infobox.emit( 'Completed', 'Training model created! \n' + 'Win Classification: ' + str(win) + '\n' + 'Loss Classification: ' + str(loss)) else: zero, one, two, three, four = convert.create_training_model( ) self.signal_progress_bar.emit(0) self.signal_infobox.emit( 'Completed', 'Training model created! \n' + 'First Classification: ' + str(zero) + '\n' + 'Second Classification: ' + str(one) + '\n' + 'Third Classification: ' + str(two) + '\n' + 'Fourth Classification: ' + str(three) + '\n' + 'Fifth Classification: ' + str(four)) except Exception as exc: self.signal_infobox.emit( 'Error', 'Something went wrong!! ' + str(exc)) self.signal_progress_bar.emit(0) else: self.signal_infobox.emit('Missing', 'Select model first!') elif process_name == "process_embedded": if self.window.input_line.text( ) == "" and not self.window.check_latest.isChecked(): self.signal_infobox.emit( 'Missing input', 'No input numbers to proceed. Please try again.') elif self.window.check_latest.isChecked(): ldb = LotteryDatabase() ldb_original = 'INPUT_' + CONFIG['games']['mini_lotto'][ 'database'] original_len = ldb.get_table_length(ldb_original) for i in range(1, 32): try: fetch_one = list( ldb.fetchone(ldb_original, original_len - i + 1)) for j in range(self.window.combo_db.count()): self.window.combo_db.setCurrentIndex(j) self.table_name = 'MODEL_' + self.window.combo_db.currentText( ) self.currentThread().__name__ = "MainThread" ml = MachineLearning(self) _ = ml.embedded_learning( " ".join(map(str, fetch_one[1:6])), i, fetch_one[0]) except Exception as exc: self.signal_infobox.emit( 'Error', 'Something went wrong!! \n' + str(exc)) self.signal_infobox.emit('Done', 'Finished!!') else: self.table_name = 'MODEL_' + self.window.combo_db.currentText() self.currentThread().__name__ = "MainThread" ml = MachineLearning(self) try: output = ml.embedded_learning( self.window.input_line.text()) self.signal_infobox.emit( 'Completed', 'Embedded Training finished! \n' + output) except Exception as exc: self.signal_infobox.emit( 'Error', 'Something went wrong!! \n' + str(exc)) elif process_name == "sklearn_ml": if len(self.window.list_ml.selectedItems()) > 0: self.table_name = 'MODEL_' + self.window.combo_db.currentText() self.currentThread().__name__ = "MainThread" ml = MachineLearning(self) try: ml.sklearn_model_train() self.signal_infobox.emit('Completed', 'Training model finished!') except Exception as exc: self.signal_infobox.emit( 'Error', 'Something went wrong!! \n' + str(exc)) else: self.signal_infobox.emit('Missing', 'Algorithm has not been selected!') elif process_name == "keras_ml": self.table_name = 'MODEL_' + self.window.combo_db.currentText() self.currentThread().__name__ = "MainThread" ml = MachineLearning(self) try: ml.keras_model_train() self.signal_infobox.emit('Completed', 'Training model finished!') except Exception as exc: self.signal_infobox.emit( 'Error', 'Something went wrong!! \n' + str(exc)) print(traceback.format_exc()) elif process_name == "export_to_csv": export_to = ConvertModel(self, False) try: export_to.convert_to_original() self.signal_progress_bar.emit(0) self.signal_infobox.emit('Completed', 'Export complete!') except Exception as exc: self.signal_infobox.emit( 'Error', 'Something went wrong!! \n' + str(exc)) self.signal_progress_bar.emit(0) elif process_name == "import_data": options = QFileDialog.Options() options |= QFileDialog.DontUseNativeDialog file_name, _ = QFileDialog.getOpenFileName( self.window, "Import file", "", "All Files (*);;Text Files (*.txt)", options=options) if file_name: ldb = LotteryDatabase() curr_game = ldb.fetchone( LottoGame, {'name': self.window.line_current_game.text()}) if not ldb.check_model_exist_by_table_name( 'INPUT_' + curr_game.input_table): input_params = { '__tablename__': 'INPUT_' + curr_game.input_table, 'id': Column('id', Integer, primary_key=True) } for i in range(1, curr_game.length + 1): input_params['NR_' + str(i)] = Column( 'NR_' + str(i), Integer) ldb.create_class_model(curr_game.input_model, input_params) ldb.meta_create_all() imported, rejected = ldb.import_file( 'INPUT_' + curr_game.input_table, file_name, curr_game.length + 1) self.signal_infobox.emit( 'Completed', str.format( 'Lottery data imported! \n ' 'Imported: {} \n ' 'Rejected: {}', imported, rejected))
class DatabaseManagerDialog(QtBaseDbClass, Ui_DbDialog): def __init__(self, window, parent=None): super(DatabaseManagerDialog, self).__init__(parent) # class initialize self.setupUi(self) self.window = window self.ldb = LotteryDatabase() self.db_manager_init() # variables self.deleted = {} self.created = {} # buttons self.btn_add_model.clicked.connect(self.add_model) self.btn_delete_model.clicked.connect(self.delete_model) self.btn_add_predict.clicked.connect(self.add_predict) self.btn_delete_predict.clicked.connect(self.delete_predict) self.btn_save.clicked.connect(self.save_database) self.btn_cancel.clicked.connect(self.close_db_manager) def db_manager_init(self): curr_game = self.ldb.fetchone( LottoGame, {'name': self.window.line_current_game.text()}) for table in curr_game.user_tables: if table.database_name.startswith('MODEL_'): self.list_model_db.addItem( table.database_name.replace('MODEL_', '')) elif table.database_name.startswith('PREDICT_'): self.list_predict_db.addItem( table.database_name.replace('PREDICT_', '')) def add_model(self): text = self.line_model.text().strip() if text != '': if ' ' in text: QMessageBox.information( self, 'Whitespace', 'Your database name contain whitespaces. Please check!') else: if self.list_model_db.findItems(text, QtCore.Qt.MatchExactly): QMessageBox.information( self, 'Already exist', 'Your database name already exist. Try again!') else: self.created[text] = 0 self.list_model_db.addItem(text) self.line_model.clear() def add_predict(self): text = self.line_predict.text().strip() if text != '': if ' ' in text: QMessageBox.information( self, 'Whitespace', 'Your database name contain whitespaces. Please check!') else: if self.list_predict_db.findItems(text, QtCore.Qt.MatchExactly): QMessageBox.information( self, 'Already exist', 'Your database name already exist. Try again!') else: self.created[text] = 1 self.list_predict_db.addItem(text) self.line_predict.clear() def delete_model(self): if len(self.list_model_db.selectedItems()) > 0: self.deleted[self.list_model_db.currentItem().text()] = 0 self.list_model_db.takeItem(self.list_model_db.currentRow()) def delete_predict(self): if len(self.list_predict_db.selectedItems()) > 0: self.deleted[self.list_predict_db.currentItem().text()] = 1 self.list_predict_db.takeItem(self.list_predict_db.currentRow()) def save_database(self): self.window.combo_db.clear() self.window.combo_predict_model.clear() self.ldb.create_new_session() for key, value in self.deleted.items(): if value == 0: self.ldb.delete_record( DatabaseModels, { 'game': self.window.line_current_game.text(), 'database_name': 'MODEL_' + key }) elif value == 1: self.ldb.delete_record( DatabaseModels, { 'game': self.window.line_current_game.text(), 'database_name': 'PREDICT_' + key }) for key, value in self.created.items(): if value == 0: self.ldb.add_record( DatabaseModels, { 'game': self.window.line_current_game.text(), 'database_name': 'MODEL_' + key }) self.window.combo_db.addItem(key) elif value == 1: self.ldb.add_record( DatabaseModels, { 'game': self.window.line_current_game.text(), 'database_name': 'PREDICT_' + key }) self.window.combo_predict_model.addItem(key) self.ldb.session_commit() self.ldb.session_close() self.close_db_manager() def close_db_manager(self): self.close()
class Window(QtBaseClass, Ui_MainWindow): def __init__(self, parent=None): super(QtBaseClass, self).__init__(parent) # class initialize self.setupUi(self) self.worker = ThreadClass(self) self.ldb = LotteryDatabase(True) self.update_algorithms() self.update_combobox_ml() self.get_user_settings() # sys.stdout = OutLog(self.stdout_text, sys.stdout, QtGui.QColor(255, 255, 255)) # sys.stderr = OutLog(self.stdout_text, sys.stderr, QtGui.QColor(255, 255, 255)) # variables self.select_thread = None self.response = None # signals # self.ldb.signal_db_error.connect(self.info_box) self.worker.signal_progress_bar.connect(self.update_progress_bar) self.worker.signal_infobox.connect(self.info_box) self.worker.signal_status.connect(self.update_status_bar) self.worker.signal_qbox.connect(self.question_box) # buttons self.push_delete.clicked.connect(self.delete_feature) self.push_create.clicked.connect(self.create_model) self.push_predict.clicked.connect(self.process_input) self.push_embedded.clicked.connect(self.process_embedded) self.push_add.clicked.connect(self.load_add_ui) self.push_ml.clicked.connect(self.sklearn_ml) self.push_knn.clicked.connect(self.keras_ml) # menu bar actions self.actionDatabase_Manager.triggered.connect(self.load_db_manager) self.actionExit_Program.triggered.connect(self.close_app) self.actionImport_from_file.triggered.connect(self.import_data) self.actionVersion.triggered.connect(self.program_version) self.actionExport_to.triggered.connect(self.export_to_csv) self.actionImport_La_Jolla.triggered.connect(self.update_la_jolla) # tooltips self.check_add_random.setToolTip('Add random numbers to each sample.') self.combo_test_size.setToolTip( 'Determine testing size for each sample.') def save_user_settings(self): list_model = '|'.join([ str(self.list_model.item(i).text()) for i in range(self.list_model.count()) ]) user_config = { 'check_win_loss': self.check_win_loss.isChecked(), 'check_add_random': self.check_add_random.isChecked(), 'check_latest': self.check_latest.isChecked(), 'check_sampling': self.check_sampling.isChecked(), 'check_keras': self.check_keras.isChecked(), 'combo_predict_model': self.combo_predict_model.currentText(), 'combo_predict_ml': self.combo_predict_ml.currentText(), 'combo_db': self.combo_db.currentText(), 'combo_test_size': self.combo_test_size.currentText(), 'combo_scoring': self.combo_scoring.currentText(), 'list_model': list_model } self.ldb.update_record( UserSettings, { 'user_parent': 'default', 'line_current_game': self.line_current_game.text() }, user_config) def get_user_settings(self): user_config = self.ldb.fetchone( UserSettings, { 'user_parent': 'default', 'line_current_game': self.line_current_game.text() }) self.check_win_loss.setChecked(user_config.check_win_loss) self.check_add_random.setChecked(user_config.check_add_random) self.check_latest.setChecked(user_config.check_latest) self.check_sampling.setChecked(user_config.check_sampling) self.check_keras.setChecked(user_config.check_keras) self.combo_predict_model.setCurrentText( user_config.combo_predict_model) self.combo_predict_ml.setCurrentText(user_config.combo_predict_ml) self.combo_db.setCurrentText(user_config.combo_db) self.combo_test_size.setCurrentText(user_config.combo_test_size) self.combo_scoring.setCurrentText(user_config.combo_scoring) if user_config.list_model != '': for each_key in user_config.list_model.split("|"): self.list_model.addItem(each_key) def load_add_ui(self): feature_dialog = ModelAddDialog(self) feature_dialog.signal_add_to_model.connect( self.update_add_feature_list) for i in range(self.list_model.count()): feature_dialog.signal_add_to_list.emit( self.list_model.item(i).text()) feature_dialog.exec_() def load_db_manager(self): db_manager = DatabaseManagerDialog(self) db_manager.exec_() def delete_feature(self): self.list_model.takeItem(self.list_model.currentRow()) list_model = '|'.join([ str(self.list_model.item(i).text()) for i in range(self.list_model.count()) ]) user_config = {'list_model': list_model} self.ldb.update_record( UserSettings, { 'user_parent': 'default', 'line_current_game': self.line_current_game.text() }, user_config) def update_add_feature_list(self, item): self.list_model.addItem(item) def update_algorithms(self): self.list_ml.addItem('RandomForestClassifier') self.list_ml.addItem('RandomForestRegressor') self.list_ml.addItem('LogisticRegression') self.list_ml.addItem('SGDClassifier') def update_combobox_ml(self): tables = self.ldb.fetchall(DatabaseModels, {}) self.combo_predict_model.clear() self.combo_db.clear() self.combo_test_size.clear() self.combo_predict_ml.clear() for alg in range(self.list_ml.count()): self.combo_predict_ml.addItem(self.list_ml.item(alg).text()) for table in tables: if table.database_name.startswith('MODEL_'): self.combo_db.addItem(table.database_name.replace( 'MODEL_', '')) elif table.database_name.startswith('PREDICT_'): self.combo_predict_model.addItem( table.database_name.replace('PREDICT_', '')) for n in range(6, 13): self.combo_test_size.addItem(str(n)) def update_status_bar(self, status): self.statusBar().showMessage(status) def update_progress_bar(self, progress_val): if self.select_thread == "process_input": self.progress_predict.setValue(progress_val) elif self.select_thread == "create_model": self.progress_create.setValue(progress_val) else: self.progress_ml.setValue(progress_val) def sklearn_ml(self): self.select_thread = inspect.stack()[0][3] self.worker.start() def keras_ml(self): self.select_thread = inspect.stack()[0][3] self.worker.start() def create_model(self): self.select_thread = inspect.stack()[0][3] self.worker.start() def import_data(self): self.select_thread = inspect.stack()[0][3] self.worker.start() def process_input(self): self.select_thread = inspect.stack()[0][3] self.worker.start() def export_to_csv(self): self.select_thread = inspect.stack()[0][3] self.worker.start() def process_embedded(self): self.select_thread = inspect.stack()[0][3] self.worker.start() def update_la_jolla(self): self.select_thread = inspect.stack()[0][3] self.worker.start() def info_box(self, info_head, info_text): QMessageBox.information(self, info_head, info_text) def question_box(self, question_head, question_text): self.response = QMessageBox.question(self, question_head, question_text, QMessageBox.Yes | QMessageBox.No, QMessageBox.No) def closeEvent(self, event): odp = QMessageBox.question(self, 'Exit', "Are you sure you want exit?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if odp == QMessageBox.Yes: self.save_user_settings() event.accept() sys.exit() else: event.ignore() def keyPressEvent(self, event): if event.key() == QtCore.Qt.Key_Escape: odp = QMessageBox.question(self, 'Exit', "Are you sure you want exit?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if odp == QMessageBox.Yes: self.save_user_settings() event.accept() sys.exit() else: event.ignore() def close_app(self): odp = QMessageBox.question(self, 'Exit', "Are you sure you want exit?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if odp == QMessageBox.Yes: self.save_user_settings() sys.exit() def program_version(self): self.info_box('Program Version', VERSION)
class MachineLearning: def __init__(self, worker): self.worker = worker self.ldb = LotteryDatabase() # variables self.x = None self.y = None self.x_train = None self.x_validation = None self.y_train = None self.y_validation = None self.N_FOLDS = 5 self.MAX_EVALS = 20 self.RANDOM_STATE = 42 self.training_size = 15 self.n_increment = 10 self.curr_game = CONFIG['games']['mini_lotto'] # features self.table_headers = [] self.table_name = self.worker.table_name self.features = self.curr_game['features'] for i in range(self.worker.window.list_model.count()): feature_len = self.features[self.worker.window.list_model.item( i).text()]['length'] + 1 feature_header = self.features[self.worker.window.list_model.item( i).text()]['header'] self.table_headers += [ feature_header + str(n) + ' INTEGER' for n in range(1, feature_len) ] def generate_df_pieces(self, connection, chunk_size, offset, ids): last_row = self.ldb.get_table_length(self.table_name) chunks = int(math.ceil(last_row / chunk_size)) n_chunk = 1 self.ldb.delete_view('tempView') self.ldb.create_view( 'tempView', ",".join(['DRAFT_ID'] + self.table_headers + ['LABEL']), self.table_name) while True: self.worker.signal_status.emit( str.format('Collecting data from database... {} of {}', n_chunk, chunks)) sql_ct = "SELECT * FROM tempView WHERE DRAFT_ID <= %d limit %d offset %d" % ( ids, chunk_size, offset) df_piece = pd.read_sql_query(sql_ct, connection) if not df_piece.shape[0]: break yield df_piece if df_piece.shape[0] < chunk_size: break offset += chunk_size n_chunk += 1 self.worker.signal_status.emit('') def embedded_learning(self, input_array, limit=0, draft_id=0): original_len = self.ldb.get_table_length('INPUT_' + self.curr_game['database']) dataset = pd.concat( self.generate_df_pieces(self.ldb.conn, 100000, 0, original_len - limit)) array = dataset.values self.x = array[:, 1:len(self.table_headers) + 1] self.y = array[:, len(self.table_headers) + 1] tb._SYMBOLIC_SCOPE.value = True model = self.choose_model(keras=True) self.worker.table_name = 'PREDICT_' + self.worker.window.combo_predict_model.currentText( ) convert = ConvertModel(self.worker, list(map(int, input_array.split(" "))), limit) convert.create_prediction_model(input_array) self.table_name = 'PREDICT_' + self.worker.window.combo_predict_model.currentText( ) output_dataset = pd.concat( self.generate_df_pieces(self.ldb.conn, 10000, 0, 0)) output_array = output_dataset.values output_x = output_array[:, 1:len(self.table_headers) + 1] original_len = self.ldb.get_table_length(self.worker.table_name) + 1 now = datetime.datetime.now() file_name_r = str.format( '{} {}', self.worker.window.combo_predict_model.currentText(), now.strftime("%Y-%m-%d %H %M %S")) + model['info'] export_columns = ['DRAFT_NR', 'FIRST', 'SECOND', 'THIRD', 'FOURTH', 'FIFTH', 'ODD_EVEN', 'LOW_HIGH', 'LA_JOLLA', 'SCORE_ALL', 'SCORE_TOP', 'SCORE_LESS', 'SCORE_2', 'SCORE_3', 'LABEL'] + \ ['OUTPUT ' + str(n) for n in range(1, self.training_size+1)] with open('archived/' + file_name_r + '.csv', 'a', newline='') as csv_file: writer = csv.writer(csv_file) writer.writerow(export_columns) score_all, score_2, score_3, score_top, score_less = 0, 0, 0, 0, 0 pairs_two = convert.get_latest_pairs(2) pairs_three = convert.get_latest_pairs(3) latest_numbers = convert.get_latest_top() top_numbers = nlargest(20, latest_numbers, key=latest_numbers.get) less_numbers = nsmallest(20, latest_numbers, key=latest_numbers.get) head = ','.join(['LA_JOLLA_' + str(n) for n in range(1, 6)]) # self.ldb.db_delete_view('LA_JOLLA_VIEW') # self.ldb.db_create_view('LA_JOLLA_VIEW', head, 'LA JOLLA') # self.ldb.db_execute('SELECT * FROM LA_JOLLA_VIEW') la_jolla_db = self.ldb.c.fetchall() for o in range(1, original_len): fetch_one = list(self.ldb.fetchone(self.table_name, o)) originals = fetch_one[2:self.curr_game['length'] + 2] label_column = [fetch_one[-1]] output_list = [ n + 1 for n in range(0, len(originals)) if originals[n] == 1 ] odd_count = len( list(filter(lambda x: (x % 2 != 0), output_list))) even_count = len( list(filter(lambda x: (x % 2 == 0), output_list))) if even_count > odd_count: odd_even_check = 1 else: odd_even_check = 0 high_low = sum(x > 21 for x in output_list) decrease = 0 for top in top_numbers: if int(top) in output_list: score_all += (1 - decrease) score_top += (1 - decrease) decrease += 0.05 decrease = 0 for top in less_numbers: if int(top) in output_list: # score_all += (1 - decrease) score_less += (1 - decrease) decrease += 0.05 output_counter = Counter(combinations(output_list, 2)) decrease = 0 for pair in pairs_two: if pair in output_counter: score_all += (1 - decrease) score_2 += (1 - decrease) decrease += 0.01 output_counter = Counter(combinations(output_list, 3)) decrease = 0 for pair in pairs_three: if pair in output_counter: score_all += (1 - decrease) score_3 += (1 - decrease) decrease += 0.01 if output_list in la_jolla_db: la_jolla = 1 else: la_jolla = 0 output_list = [draft_id] + output_list + [odd_even_check] + [high_low] + [la_jolla] +\ [score_all] + [score_top] + [score_less] + [score_2] + [score_3] + label_column writer.writerow(output_list) score_all, score_2, score_3, score_top, score_less = 0, 0, 0, 0, 0 self.worker.signal_status.emit('') if self.worker.window.check_keras.isChecked(): model['model'].fit(self.x, self.y, nb_epoch=100, batch_size=212, verbose=2) prediction = model['model'].predict(output_x) combined_set = list(map(str, prediction)) with open('archived/' + file_name_r + '.csv', 'r') as read_csv_file: csv_input = csv.reader(read_csv_file) next(csv_input) now = datetime.datetime.now() file_name_w = str.format( '{} {}', self.worker.window.combo_predict_model.currentText(), now.strftime("%Y-%m-%d %H %M %S")) + model['info'] with open('archived/' + file_name_w + '.csv', 'w', newline='') as csv_file: writer = csv.writer(csv_file) writer.writerow(export_columns) for row, o in zip(csv_input, combined_set): writer.writerow(row + [o]) os.remove('archived/' + file_name_r + '.csv') file_name_r = file_name_w elif self.worker.window.combo_predict_ml.currentText() == 'LogisticRegression' or \ self.worker.window.combo_predict_ml.currentText() == 'SGDClassifier': model['model'].fit(self.x, self.y) prediction = model['model'].predict(output_x) combined_set = list(map(str, prediction)) with open('archived/' + file_name_r + '.csv', 'r') as read_csv_file: csv_input = csv.reader(read_csv_file) next(csv_input) now = datetime.datetime.now() file_name_w = str.format( '{} {}', self.worker.window.combo_predict_model.currentText(), now.strftime("%Y-%m-%d %H %M %S")) + model['info'] with open('archived/' + file_name_w + '.csv', 'w', newline='') as csv_file: writer = csv.writer(csv_file) writer.writerow(export_columns) for row, o in zip(csv_input, combined_set): writer.writerow(row + [o]) os.remove('archived/' + file_name_r + '.csv') file_name_r = file_name_w else: for t in range(self.training_size): self.worker.signal_status.emit( str.format('Training in process... {} of {}', t + 1, self.training_size)) model['model'].n_estimators += self.n_increment model['model'].fit(self.x, self.y) prediction = model['model'].predict(output_x) combined_set = list(map(str, prediction)) with open('archived/' + file_name_r + '.csv', 'r') as read_csv_file: csv_input = csv.reader(read_csv_file) next(csv_input) now = datetime.datetime.now() file_name_w = str.format( '{} {}', self.worker.window.combo_predict_model.currentText(), now.strftime("%Y-%m-%d %H %M %S")) + model['info'] with open('archived/' + file_name_w + '.csv', 'w', newline='') as csv_file: writer = csv.writer(csv_file) writer.writerow(export_columns) for row, o in zip(csv_input, combined_set): writer.writerow(row + [o]) os.remove('archived/' + file_name_r + '.csv') file_name_r = file_name_w self.worker.signal_status.emit('') msg = '' # msg = "Algorithm: RandomForestClassifier" + '\n' + \ # "Number of estimators: {}".format(forest.n_estimators) + '\n' + \ # "Accuracy on training set: {:.3f}".format(forest.score(x_train, y_train)) + '\n' + \ # "Accuracy on test set: {:.3f}".format(forest.score(x_validation, y_validation)) return msg def choose_model(self, params=None, fresh=False, keras=False): model, info = None, None if keras: model = Sequential() model.add( Dense(output_dim=220, kernel_initializer='uniform', input_dim=int(self.x.shape[1]))) model.add(Activation(activation='relu')) model.add(Dropout(0.27208339620963506)) model.add( Dense(output_dim=205, kernel_initializer="glorot_uniform")) model.add(Activation(activation='relu')) model.add(Dropout(0.29152160619480066)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop') elif self.worker.window.list_ml.currentItem().text( ) == 'RandomForestClassifier': if params is not None: model = RandomForestClassifier(**params) elif fresh: model = RandomForestClassifier() else: model = RandomForestClassifier(warm_start=True, n_estimators=1, n_jobs=-1, random_state=self.RANDOM_STATE) # model = RandomForestClassifier(warm_start=True, n_estimators=1, n_jobs=-1, random_state=self.RANDOM_STATE) info = ' RFC ' + self.worker.window.combo_db.currentText() elif self.worker.window.list_ml.currentItem().text( ) == 'RandomForestRegressor': if params is not None: model = RandomForestRegressor(**params) elif fresh: model = RandomForestRegressor() else: model = RandomForestRegressor(warm_start=True, n_estimators=1, n_jobs=-1, random_state=self.RANDOM_STATE) info = ' RFR ' + self.worker.window.combo_db.currentText() elif self.worker.window.list_ml.currentItem().text( ) == 'LogisticRegression': if params is not None: model = linear_model.LogisticRegression(**params) elif fresh: model = linear_model.LogisticRegression() else: model = linear_model.LogisticRegression(C=50, solver='liblinear') info = ' LR ' + self.worker.window.combo_db.currentText() elif self.worker.window.list_ml.currentItem().text( ) == 'SGDClassifier': if params is not None: model = linear_model.SGDClassifier(**params) elif fresh: model = linear_model.SGDClassifier() else: model = linear_model.SGDClassifier(class_weight='balanced', loss='hinge', max_iter=2426, tol=1.6246894453989777e-05, warm_start=True) # model = linear_model.SGDClassifier(class_weight='balanced', loss='log', max_iter=2330, tol=7.289319599768096e-05) # model = linear_model.SGDClassifier(max_iter=1486, tol=4.663673194605843e-05, loss='log', fit_intercept=False) # model = linear_model.SGDClassifier(max_iter=840, tol=15.8197115265907305e-05, class_weight='balanced', loss='modified_huber') info = ' SGD ' + self.worker.window.combo_db.currentText() return {'model': model, 'info': info} def choose_space(self, keras=False): space = {} if keras: space = { 'choice': hp.choice('num_layers', [{ 'layers': 'two', }, { 'layers': 'three', 'units3': hp.uniform('units3', 64, 1024), 'dropout3': hp.uniform('dropout3', .25, .75) }]), 'units1': hp.uniform('units1', 64, 1024), 'units2': hp.uniform('units2', 64, 1024), 'dropout1': hp.uniform('dropout1', .25, .75), 'dropout2': hp.uniform('dropout2', .25, .75), 'batch_size': hp.uniform('batch_size', 28, 128), 'nb_epochs': 100, 'optimizer': hp.choice('optimizer', ['adadelta', 'adam', 'rmsprop']), 'activation': 'relu' } elif self.worker.window.list_ml.currentItem().text( ) == 'RandomForestClassifier': space = { 'n_estimators': hp.choice('n_estimators', range(100, 1500)), 'class_weight': hp.choice('class_weight', ['balanced', 'balanced_subsample', None]), 'max_features': hp.choice('max_features', ['auto', 'sqrt', 'log2']), 'bootstrap': hp.choice('bootstrap', [True, False]), 'max_depth': hp.choice('max_depth', [None, 1, 3]), 'criterion': hp.choice('criterion', ['gini', 'entropy']) } elif self.worker.window.list_ml.currentItem().text( ) == 'RandomForestRegressor': space = { 'n_estimators': hp.choice('n_estimators', range(10, 150)), 'warm_start': hp.choice('warm_start', [True, False]), 'class_weight': hp.choice('class_weight', ['balanced', 'balanced_subsample', None]), 'max_features': hp.choice('max_features', ['auto', 'sqrt']), 'bootstrap': hp.choice('bootstrap', [True, False]), 'max_depth': hp.choice('max_depth', [None, 1, 2, 3]), 'min_samples_split': hp.choice('min_samples_split', [2, 3]), 'min_samples_leaf': hp.choice('min_samples_leaf', [1, 2]) } elif self.worker.window.list_ml.currentItem().text( ) == 'LogisticRegression': space = { 'solver': hp.choice('solver', ['newton-cg', 'lbfgs', 'sag']), 'warm_start': hp.choice('warm_start', [True, False]), 'class_weight': hp.choice('class_weight', ['balanced', None]), 'tol': hp.uniform('tol', 0.00001, 0.0001), 'C': hp.uniform('C', 1.0, 50.0), 'fit_intercept': hp.choice('fit_intercept', [True, False]), 'max_iter': hp.choice('max_iter', range(100, 3000)) } elif self.worker.window.list_ml.currentItem().text( ) == 'SGDClassifier': space = { 'class_weight': hp.choice('class_weight', [None, 'balanced']), 'warm_start': hp.choice('warm_start', [True, False]), 'fit_intercept': hp.choice('fit_intercept', [True, False]), 'tol': hp.uniform('tol', 0.00001, 0.0001), 'loss': hp.choice('loss', ['hinge', 'log', 'squared_hinge', 'modified_huber']), 'max_iter': hp.choice('max_iter', range(500, 3000)) } return space def sklearn_model_train(self): print(self.worker.window.list_ml.currentItem().text()) dataset = pd.concat( self.generate_df_pieces(self.ldb.conn, 100000, offset=0, ids=5000)) array = dataset.values self.x = array[:, :len(self.table_headers)] self.y = array[:, len(self.table_headers)] x_train, x_validation, y_train, y_validation = model_selection.train_test_split( self.x, self.y, test_size=0.2, random_state=self.RANDOM_STATE) space = self.choose_space() bayes_trials = Trials() best = fmin(fn=self.sklearn_objective, space=space, algo=tpe.suggest, max_evals=self.MAX_EVALS, trials=bayes_trials) print(best) for bt in bayes_trials: print(bt['result']['loss']) print(bt['result']['params']) model = self.choose_model(space_eval(space, best)) model['model'].fit(x_train, y_train) y_pred = model['model'].predict(x_validation) msg = 'Accuracy Score : ' + str(accuracy_score(y_validation, y_pred)) + '\n' + \ 'Precision Score : ' + str(precision_score(y_validation, y_pred)) + '\n' + \ 'Recall Score : ' + str(recall_score(y_validation, y_pred)) + '\n' + \ 'F1 Score : ' + str(f1_score(y_validation, y_pred)) + '\n' + \ 'ROC_AUC Score:' + str(roc_auc_score(y_validation, y_pred)) # 'Confusion Matrix : \n' + str(confusion_matrix(y_validation, y_pred)) plt.figure() self.plot_confusion_matrix(confusion_matrix(y_validation, y_pred), classes=[1, 0]) self.worker.signal_status.emit('') self.worker.signal_infobox.emit("Completed", msg) plt.show() def sklearn_objective(self, params, n_folds=5): clf = self.choose_model(fresh=True) rus = RandomUnderSampler() # pipeline = make_pipeline(rus, clf) scores = model_selection.cross_val_score(clf['model'], self.x, self.y, cv=n_folds, scoring='f1_macro') best_score = max(scores) loss = 1 - best_score return {'loss': loss, 'params': params, 'status': STATUS_OK} def keras_model_train(self): dataset = pd.concat( self.generate_df_pieces(self.ldb.conn, 100000, offset=0, ids=5000)) array = dataset.values self.x = array[:, :len(self.table_headers)] self.y = array[:, len(self.table_headers)] self.x_train, self.x_validation, self.y_train, self.y_validation = model_selection.train_test_split( self.x, self.y, test_size=0.2, random_state=self.RANDOM_STATE) space = self.choose_space(keras=True) tb._SYMBOLIC_SCOPE.value = True bayes_trials = Trials() best = fmin(fn=self.keras_objective, space=space, algo=tpe.suggest, max_evals=self.MAX_EVALS, trials=bayes_trials) print(best) for bt in bayes_trials: print(bt['result']['loss']) print(bt['result']['params']) def keras_objective(self, params): model = Sequential() model.add( Dense(output_dim=params['units1'], input_dim=self.x_train.shape[1])) model.add(Activation(params['activation'])) model.add(Dropout(params['dropout1'])) model.add(Dense(output_dim=params['units2'], init="glorot_uniform")) model.add(Activation(params['activation'])) model.add(Dropout(params['dropout2'])) if params['choice']['layers'] == 'three': model.add( Dense(output_dim=params['choice']['units3'], init="glorot_uniform")) model.add(Activation(params['activation'])) model.add(Dropout(params['choice']['dropout3'])) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer=params['optimizer']) model.fit(self.x_train, self.y_train, nb_epoch=params['nb_epochs'], batch_size=params['batch_size'], verbose=0) pred_auc = model.predict_proba(self.x_validation, batch_size=128, verbose=0) acc = roc_auc_score(self.y_validation, pred_auc) # print('AUC:', acc) # sys.stdout.flush() return {'loss': -acc, 'params': params} @staticmethod def plot_confusion_matrix(cm, classes, normalize=False, cmap=plt.cm.Blues): plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title('Confusion matrix') plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i, j in product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.ylabel('True label') plt.xlabel('Predicted label') plt.tight_layout() def validate_estimators(self, x, y): x_train, x_validation, y_train, y_validation = model_selection.train_test_split( x, y, test_size=0.3, random_state=0) n_estimators = [] train_results = [] test_results = [] rf = RandomForestRegressor(warm_start=True, n_estimators=0, n_jobs=-1) # rf = RandomForestClassifier(warm_start=True, n_estimators=0, n_jobs=-1) for t in range(self.training_size): rf.n_estimators += 3 # rf.n_iter += 2 # n_estimators += [rf.n_iter] n_estimators += [rf.n_estimators] self.worker.signal_status.emit( 'Validating estimators: {} of {}. Current estimator: {}'. format(t + 1, self.training_size, rf.n_estimators)) rf.fit(x_train, y_train) train_pred = rf.predict(x_train) false_positive_rate, true_positive_rate, thresholds = roc_curve( y_train, train_pred) roc_auc = auc(false_positive_rate, true_positive_rate) train_results.append(roc_auc) y_pred = rf.predict(x_validation) false_positive_rate, true_positive_rate, thresholds = roc_curve( y_validation, y_pred) roc_auc = auc(false_positive_rate, true_positive_rate) test_results.append(roc_auc) line1, = plt.plot(n_estimators, train_results, 'b', label="Train AUC") line2, = plt.plot(n_estimators, test_results, 'r', label="Test AUC") plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)}) plt.ylabel('AUC score') plt.xlabel('n_estimators') plt.show() def validate_max_depth(self, x, y): x_train, x_validation, y_train, y_validation = model_selection.train_test_split( x, y, test_size=0.3, random_state=0) max_depths = np.linspace(1, 32, 32, endpoint=True) train_results = [] test_results = [] for max_depth in max_depths: rf = RandomForestClassifier(warm_start=True, n_estimators=10, max_depth=max_depth, n_jobs=-1) self.worker.signal_status.emit( 'Validating max depth: {} of {}.'.format( max_depth, len(max_depths))) rf.fit(x_train, y_train) train_pred = rf.predict(x_train) false_positive_rate, true_positive_rate, thresholds = roc_curve( y_train, train_pred) roc_auc = auc(false_positive_rate, true_positive_rate) train_results.append(roc_auc) y_pred = rf.predict(x_validation) false_positive_rate, true_positive_rate, thresholds = roc_curve( y_validation, y_pred) roc_auc = auc(false_positive_rate, true_positive_rate) test_results.append(roc_auc) line1, = plt.plot(max_depths, train_results, 'b', label="Train AUC") line2, = plt.plot(max_depths, test_results, 'r', label="Test AUC") plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)}) plt.ylabel('AUC score') plt.xlabel('Tree depth') plt.show() def validate_min_sample_split(self, x, y): x_train, x_validation, y_train, y_validation = model_selection.train_test_split( x, y, test_size=0.3, random_state=0) min_samples_splits = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0] train_results = [] test_results = [] for min_samples_split in min_samples_splits: rf = RandomForestClassifier(warm_start=True, n_estimators=10, min_samples_split=min_samples_split, n_jobs=-1) self.worker.signal_status.emit( 'Validating min sample split: {} of {}.'.format( min_samples_split, len(min_samples_splits))) rf.fit(x_train, y_train) train_pred = rf.predict(x_train) false_positive_rate, true_positive_rate, thresholds = roc_curve( y_train, train_pred) roc_auc = auc(false_positive_rate, true_positive_rate) train_results.append(roc_auc) y_pred = rf.predict(x_validation) false_positive_rate, true_positive_rate, thresholds = roc_curve( y_validation, y_pred) roc_auc = auc(false_positive_rate, true_positive_rate) test_results.append(roc_auc) line1, = plt.plot(min_samples_splits, train_results, 'b', label="Train AUC") line2, = plt.plot(min_samples_splits, test_results, 'r', label="Test AUC") plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)}) plt.ylabel('AUC score') plt.xlabel('min samples split') plt.show() def validate_min_sample_leaf(self, x, y): x_train, x_validation, y_train, y_validation = model_selection.train_test_split( x, y, test_size=0.3, random_state=0) min_samples_leafs = [1, 2, 3, 4, 5] train_results = [] test_results = [] for min_samples_leaf in min_samples_leafs: rf = RandomForestClassifier(warm_start=True, n_estimators=10, min_samples_leaf=min_samples_leaf, n_jobs=-1) self.worker.signal_status.emit( 'Validating min sample leaf: {} of {}.'.format( min_samples_leaf, len(min_samples_leafs))) rf.fit(x_train, y_train) train_pred = rf.predict(x_train) false_positive_rate, true_positive_rate, thresholds = roc_curve( y_train, train_pred) roc_auc = auc(false_positive_rate, true_positive_rate) train_results.append(roc_auc) y_pred = rf.predict(x_validation) false_positive_rate, true_positive_rate, thresholds = roc_curve( y_validation, y_pred) roc_auc = auc(false_positive_rate, true_positive_rate) test_results.append(roc_auc) line1, = plt.plot(min_samples_leafs, train_results, 'b', label="Train AUC") line2, = plt.plot(min_samples_leafs, test_results, 'r', label="Test AUC") plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)}) plt.ylabel('AUC score') plt.xlabel('min samples leaf') plt.show() def validate_max_features(self): pass def random_forest_predict(self): output_headers = ",".join(['ID INTEGER PRIMARY KEY'] + ['OUTPUT_LABEL INTEGER']) self.ldb.delete_table('OUTPUT_prediction') self.ldb.create_table('OUTPUT_prediction', output_headers) ids = 1 dataset = pd.concat( self.generate_df_pieces(self.ldb.conn, 1000, offset=0)) array = dataset.values x = array[:, :len(self.table_headers)] filename = 'random_forest.sav'