Beispiel #1
0
    def __init__(self, worker):

        self.worker = worker
        self.ldb = LotteryDatabase()

        # variables
        self.x = None
        self.y = None

        self.x_train = None
        self.x_validation = None
        self.y_train = None
        self.y_validation = None

        self.N_FOLDS = 5
        self.MAX_EVALS = 20
        self.RANDOM_STATE = 42
        self.training_size = 15
        self.n_increment = 10
        self.curr_game = CONFIG['games']['mini_lotto']

        # features
        self.table_headers = []
        self.table_name = self.worker.table_name
        self.features = self.curr_game['features']

        for i in range(self.worker.window.list_model.count()):
            feature_len = self.features[self.worker.window.list_model.item(
                i).text()]['length'] + 1
            feature_header = self.features[self.worker.window.list_model.item(
                i).text()]['header']
            self.table_headers += [
                feature_header + str(n) + ' INTEGER'
                for n in range(1, feature_len)
            ]
Beispiel #2
0
    def convert_to_original(self):

        self.ldb = LotteryDatabase()
        combo_predict = self.worker.window.combo_predict_model
        self.table_name = 'PREDICT_' + combo_predict.currentText()

        now = datetime.datetime.now()
        file_name = str.format('{} {}', combo_predict.currentText(), now.strftime("%Y-%m-%d %H %M %S"))
        export_columns = ['FIRST', 'SECOND', 'THIRD', 'FOURTH', 'FIFTH', 'SIXTH', 'LABEL', 'OUTPUT']

        with open('archived/' + file_name + '.csv', 'a', newline='') as csv_file:

            writer = csv.writer(csv_file)
            writer.writerow(export_columns)

            for o in range(1, self.input_record_count):
                fetch_one = list(self.ldb.fetchone(self.table_name, o))
                fetch_output = list(self.ldb.fetchone('OUTPUT_prediction', o))

                originals = fetch_one[1:50]
                label_column = [fetch_one[-1]]
                output_column = [fetch_output[-1]]

                output_list = [n + 1 for n in range(0, len(originals)) if originals[n] == 1]
                output_list = output_list + label_column + output_column

                writer.writerow(output_list)

                self.worker.signal_status.emit('Export in progress: {} of {}.'.format(o, self.input_record_count - 1))

        self.worker.signal_status.emit('')
Beispiel #3
0
    def __init__(self):

        self.ldb = LotteryDatabase(CONFIG['database'])

        self.x = None
        self.y = None

        self.x_train = None
        self.x_validation = None
        self.y_train = None
        self.y_validation = None

        self.curr_game = CONFIG['games']['mini_lotto']

        self.table_headers = []
        self.features = self.curr_game['features']

        feat = ['number_map', 'number_cycles', 'cool numbers']

        for i in feat:
            feature_len = self.features[i]['length'] + 1
            feature_header = self.features[i]['header']
            self.table_headers += [
                feature_header + str(n) + ' INTEGER'
                for n in range(1, feature_len)
            ]
Beispiel #4
0
def create_model(current_game_name):

    models = 'db_models.py'

    model_found = False
    ldb = LotteryDatabase(True)

    curr_game = ldb.fetchone(LottoGame, {'game_name': current_game_name})

    if ldb.check_model_exist_by_table_name(curr_game.input_model):
        model_class = ldb.set_model_by_table_name(curr_game.input_model)
        model_class.__table__.drop(ldb.engine)

    with open(models, 'r') as models_file:
        models_code = models_file.readlines()

    try:
        with open(models, 'w') as models_file:
            for code_line in models_code:
                if curr_game.input_model in code_line:
                    model_found = True
                if model_found:
                    if code_line.strip() == '':
                        model_found = False
                if not model_found:
                    models_file.write(code_line)

            model_list = [
                'class {}(BASE):'.format(curr_game.input_model) + '\n',
                "    __tablename__ = '{}'".format('INPUT_' +
                                                  curr_game.game_table) + '\n',
                "    id = Column('id', Integer, primary_key=True)" + '\n'
            ]

            model_list += [
                "    {} = Column('{}', Integer)".format(
                    'NR_'.lower() + str(n), 'NR_' + str(n)) + '\n'
                for n in range(1, curr_game.game_len + 1)
            ]

            models_file.write('\n\n')
            models_file.writelines(model_list)

        BASE.metadata.create_all(bind=ldb.engine)

    except Exception:
        with open(models, 'w') as models_file:
            models_file.writelines(models_code)
        raise
Beispiel #5
0
    def __init__(self, parent=None):

        super(QtBaseClass, self).__init__(parent)

        #  class initialize
        self.setupUi(self)
        self.worker = ThreadClass(self)
        self.ldb = LotteryDatabase(True)
        self.update_algorithms()
        self.update_combobox_ml()
        self.get_user_settings()

        # sys.stdout = OutLog(self.stdout_text, sys.stdout, QtGui.QColor(255, 255, 255))
        # sys.stderr = OutLog(self.stdout_text, sys.stderr, QtGui.QColor(255, 255, 255))

        #  variables
        self.select_thread = None
        self.response = None

        # signals
        # self.ldb.signal_db_error.connect(self.info_box)

        self.worker.signal_progress_bar.connect(self.update_progress_bar)
        self.worker.signal_infobox.connect(self.info_box)
        self.worker.signal_status.connect(self.update_status_bar)
        self.worker.signal_qbox.connect(self.question_box)

        # buttons
        self.push_delete.clicked.connect(self.delete_feature)
        self.push_create.clicked.connect(self.create_model)
        self.push_predict.clicked.connect(self.process_input)
        self.push_embedded.clicked.connect(self.process_embedded)
        self.push_add.clicked.connect(self.load_add_ui)
        self.push_ml.clicked.connect(self.sklearn_ml)
        self.push_knn.clicked.connect(self.keras_ml)

        # menu bar actions
        self.actionDatabase_Manager.triggered.connect(self.load_db_manager)
        self.actionExit_Program.triggered.connect(self.close_app)
        self.actionImport_from_file.triggered.connect(self.import_data)
        self.actionVersion.triggered.connect(self.program_version)
        self.actionExport_to.triggered.connect(self.export_to_csv)
        self.actionImport_La_Jolla.triggered.connect(self.update_la_jolla)

        # tooltips
        self.check_add_random.setToolTip('Add random numbers to each sample.')
        self.combo_test_size.setToolTip(
            'Determine testing size for each sample.')
Beispiel #6
0
    def __init__(self, window, parent=None):
        super(ModelAddDialog, self).__init__(parent)

        # class initialize
        self.setupUi(self)
        self.window = window
        self.ldb = LotteryDatabase()
        self.list_add_available_init()

        #  signals
        self.signal_add_to_list.connect(self.list_add_selected)

        #  buttons
        self.push_add_ok.clicked.connect(self.add_feature)
        self.push_add_cancel.clicked.connect(self.close_dialog)
        self.feature_add.clicked.connect(self.list_add_selected)
        self.feature_remove.clicked.connect(self.list_remove_selected)

        self.feature_sortUp.clicked.connect(self.move_item_up)
        self.feature_sortDown.clicked.connect(self.move_item_down)
Beispiel #7
0
    def __init__(self, window, parent=None):
        super(DatabaseManagerDialog, self).__init__(parent)

        # class initialize
        self.setupUi(self)
        self.window = window
        self.ldb = LotteryDatabase()
        self.db_manager_init()

        # variables
        self.deleted = {}
        self.created = {}

        # buttons
        self.btn_add_model.clicked.connect(self.add_model)
        self.btn_delete_model.clicked.connect(self.delete_model)

        self.btn_add_predict.clicked.connect(self.add_predict)
        self.btn_delete_predict.clicked.connect(self.delete_predict)

        self.btn_save.clicked.connect(self.save_database)
        self.btn_cancel.clicked.connect(self.close_db_manager)
Beispiel #8
0
def declarative_change():

    ldb = LotteryDatabase(True)

    class1 = type('Class1', (BASE, ), {
        '__tablename__': 'test',
        'id': Column('id', Integer, primary_key=True)
    })

    # unregister_class(class1)

    class1 = type('Class1', (BASE, ), {
        '__tablename__': 'test1',
        'id': Column('id', Integer, primary_key=True)
    })

    print(class1)
Beispiel #9
0
    def __create_rash_group(self):

        self.ldb = LotteryDatabase()
        fetch_a = self.ldb.execute(self.input_table)

        first_data_group = defaultdict(int)
        second_data_group = defaultdict(int)
        third_data_group = defaultdict(int)
        new_set = []

        for line in fetch_a:
            line = line[1:7]
            for num in line:
                if num in self.curr_game['groups']['first_group']:
                    new_set.append(1)
                elif num in self.curr_game['groups']['second_group']:
                    new_set.append(2)
                elif num in self.curr_game['groups']['third_group']:
                    new_set.append(3)

            count = Counter(new_set)

            if count[1] == 4:
                self.rash_one += 1
                for x, y in zip(line, new_set):
                    if y == 1:
                        first_data_group[x] += 1

            elif count[2] == 4:
                self.rash_two += 1
                for x, y in zip(line, new_set):
                    if y == 2:
                        second_data_group[x] += 1

            elif count[3] == 4:
                self.rash_three += 1
                for x, y in zip(line, new_set):
                    if y == 3:
                        third_data_group[x] += 1

            rash_group = []

            for num in line:
                if num in self.curr_game['groups']['first_group']:
                    d = first_data_group.get(num)
                    if d == "":
                        per = round(float(1 / self.rash_one) * 10, 2)
                    else:
                        per = round(float(d / self.rash_one) * 10, 2)

                    rash_group.append(per)
                elif num in self.curr_game['groups']['second_group']:
                    d = second_data_group.get(num)
                    if d == "":
                        per = round(float(1 / self.rash_two) * 10, 2)
                    else:
                        per = round(float(d / self.rash_two) * 10, 2)
                    rash_group.append(per)
                elif num in self.curr_game['groups']['third_group']:
                    d = third_data_group.get(num)
                    if d == "":
                        per = round(float(1 / self.rash_three) * 10, 2)
                    else:
                        per = round(float(d / self.rash_three) * 10, 2)

                    rash_group.append(per)

        self.ldb.__del__()
Beispiel #10
0
class ConvertModel:

    def __init__(self, worker, initial_table=True, last_draw=None, limit=0):

        # class initialize
        self.worker = worker
        self.table_name = self.worker.table_name
        self.game_name = self.worker.window.line_current_game.text()

        if initial_table:

            self.ldb = self.worker.window.ldb

            self.curr_game = self.ldb.fetchone(LottoGame, {'name': self.game_name})
            self.input_table = 'INPUT_' + self.curr_game.input_table

            # features
            table_headers = {'__tablename__': self.table_name,
                             'id': Column('id', Integer, primary_key=True),
                             'DRAFT_ID': Column('DRAFT_ID', Integer)}

            for feature in self.curr_game.model_features:
                match_items = self.worker.window.list_model.findItems(feature.name, Qt.MatchExactly)
                if len(match_items) > 0:
                    feature_len = feature.length + 1
                    feature_header = feature.header
                    for n in range(1, feature_len):
                        table_headers[feature_header + str(n)] = Column(feature_header + str(n), Integer)

            table_headers['LABEL'] = Column('LABEL', Integer)

            self.ldb.delete_table(self.table_name)
            self.ldb.delete_model_by_table_name(self.table_name)
            self.ldb.create_class_model(self.curr_game.training_model, table_headers)
            self.ldb.meta_create_all()
            self.input_record_count = self.ldb.get_table_length(DYNAMIC_CLASS[self.curr_game.input_model]) + 1 - limit

            # variables
            if last_draw is None:
                self.last_draw = [13, 18, 29, 32, 37]
            else:
                self.last_draw = last_draw

            self.total_game_numbers = self.curr_game.total_numbers + 1
            self.training_size_limit = int(self.worker.window.combo_test_size.currentText())
            self.single_draw_len = self.curr_game.length + 1

            self.labels = [0, 12, 34, 56]
            self.win = {'LABEL': 1}
            self.loss = {'LABEL': 0}

            self.rash_one = 5
            self.rash_two = 5
            self.rash_three = 5
            self.rash_default = 5

    @staticmethod
    def __append_drawn(current_array, set_of_six):
        new_drawn = []
        for drawn in set_of_six:
            index = drawn - 1
            new_drawn.append(current_array[index])

        return new_drawn

    def __append_rash_group(self, sample):
        new_set = []
        for num in sample:
            if num in self.curr_game['groups']['first_group']:
                new_set += [1]
            elif num in self.curr_game['groups']['second_group']:
                new_set += [2]
            elif num in self.curr_game['groups']['third_group']:
                new_set += [3]

        return new_set

    def __append_alpha_group(self, sample):
        alpha_group = []
        for n in sample:
            for g in self.curr_game['alphabetic_groups'].items():
                if n in g[1]:
                    alpha_group += [1]
                else:
                    alpha_group += [0]

        return alpha_group

    def __map_last_draw(self, curr_draw):
        return {'IN_LAST_' + str(n): i for n, i in zip(range(1, self.single_draw_len), curr_draw)}

    def __append_in_last_draw(self, sample, curr_draw):
        if curr_draw.count(0) == 0:
            last_draw = [0 for _ in range(1, self.total_game_numbers)]
        else:
            last_draw = [1 if n in sample else 0 for n in range(1, self.total_game_numbers)]

        return last_draw

    def __append_numbers_cycle(self, sample, curr_cycle):
        if sum(map((0).__eq__, curr_cycle.values())) == 0:
            numbers_cycle = {'CYCLE_' + str(n): 0 for n in range(1, self.total_game_numbers)}
        else:
            numbers_cycle = {'CYCLE_' + str(n): 1 if n in sample else curr_cycle['CYCLE_' + str(n)]
                             for n in range(1, self.total_game_numbers)}
        return numbers_cycle

    def append_hot_cold_warm_cool(self, top_numbers, n_top, large_small):
        if large_small == 'L':
            hw = nlargest(n_top, top_numbers, key=top_numbers.get)
            hc_wc = [1 if str(n) in hw else 0 for n in range(1, self.total_game_numbers)]
        else:
            cc = nsmallest(n_top, top_numbers, key=top_numbers.get)
            hc_wc = [1 if str(n) in cc else 0 for n in range(1, self.total_game_numbers)]

        return hc_wc

    @staticmethod
    def __append_count_label(sample, count_list):
        label = 0
        count = Counter(sample)
        for x in count_list:
            label = label + count[x]
        return label

    def __map_number_map(self, sample):
        return {'MAP_' + str(n): 1 if n in sample else 0 for n in range(1, self.total_game_numbers)}

    def __map_original_numbers(self, sample):
        return {'ORIGINAL_' + str(n): i for n, i in zip(range(1, self.single_draw_len), sample)}

    def __append_rash(self, array):
        rash_group = []
        for num in array:
            if num in self.curr_game['groups']['first_group']:
                data = 1  # first_data_group.get(num)
                if data is None:
                    per = round(float(1 / self.rash_one) * 100, 2)
                else:
                    per = round(float(data / self.rash_one) * 100, 2)
                rash_group.append(per)

            elif num in self.curr_game['groups']['second_group']:
                data = 1  # second_data_group.get(num)
                if data is None:
                    per = round(float(1 / self.rash_two) * 100, 2)
                else:
                    per = round(float(data / self.rash_two) * 100, 2)
                rash_group.append(per)

            elif num in self.curr_game['groups']['third_group']:
                data = 1  # third_data_group.get(num)
                if data is None:
                    per = round(float(1 / self.rash_three) * 100, 2)
                else:
                    per = round(float(data / self.rash_three) * 100, 2)
                rash_group.append(per)

        return rash_group

    def __append_db(self, params):
        self.ldb.add_record(DYNAMIC_CLASS[self.curr_game.training_model], params)

    def create_prediction_model(self, input_array):

        self.ldb.create_new_session()
        list_model = self.worker.window.list_model

        my_list = list(map(int, input_array.split(" ")))
        my_list = self.__add_random(my_list)

        ids = 1
        combined_set = []
        top_numbers = self.__create_top_numbers(self.input_record_count - 100)
        number_cycles = self.__get_latest_number_cycle()
        curr_draw = self.__get_latest_draw()
        total_combinations = int(math.factorial(42)/(math.factorial(5)*(math.factorial(42-5))))

        for a in my_list:
            # if 1 <= a <= 11:
            for b in my_list:
                if a < b:  # and 5 <= b <= 23:
                    for c in my_list:
                        if b < c:  # and 11 <= c <= 33:
                            for d in my_list:
                                if c < d:  # and 20 <= d <= 40:
                                    for e in my_list:
                                        # diff = e-a
                                        # sample_sum = a+b+c+d+e
                                        if d < e:  # and 29 <= e <= 42: # and 17 < diff < 40 and 69 < sample_sum < 151:
                                            # for f in my_list:
                                            #     if e < f:

                                            sample_array = [a, b, c, d, e]

                                            for i in range(self.worker.window.list_model.count()):

                                                if list_model.item(i).text() == 'number_map':
                                                    combined_set += self.__map_number_map(sample_array)

                                                elif list_model.item(i).text() == 'number_cycles':
                                                    combined_set += self.__append_numbers_cycle(sample_array,
                                                                                                number_cycles)

                                                elif list_model.item(i).text() == 'original_numbers':
                                                    combined_set += sample_array

                                                elif list_model.item(i).text() == 'in_last_draw':
                                                    combined_set += curr_draw

                                                elif list_model.item(i).text() == 'rash_group':
                                                    combined_set += self.__append_rash_group(sample_array)

                                                elif list_model.item(i).text() == 'alphabetic_group':
                                                    combined_set += self.__append_alpha_group(sample_array)

                                                elif list_model.item(i).text() == 'hot numbers':
                                                    combined_set += self.append_hot_cold_warm_cool(
                                                        top_numbers, 10, 'L')

                                                elif list_model.item(i).text() == 'cold numbers':
                                                    combined_set += self.append_hot_cold_warm_cool(
                                                        top_numbers, 10, 'S')

                                                elif list_model.item(i).text() == 'warm numbers':
                                                    combined_set += self.append_hot_cold_warm_cool(
                                                        top_numbers, 20, 'L')

                                                elif list_model.item(i).text() == 'cool numbers':
                                                    combined_set += self.append_hot_cold_warm_cool(
                                                        top_numbers, 20, 'S')

                                            label = [self.__append_count_label(sample_array,
                                                                               self.last_draw)]

                                            self.__append_db(ids, [0], combined_set, label)

                                            combined_set = []

                                            self.worker.signal_progress_bar.emit((ids/total_combinations)*100)

                                            ids += 1

        self.worker.signal_progress_bar.emit(0)
        self.ldb.db_commit()

    def convert_to_original(self):

        self.ldb = LotteryDatabase()
        combo_predict = self.worker.window.combo_predict_model
        self.table_name = 'PREDICT_' + combo_predict.currentText()

        now = datetime.datetime.now()
        file_name = str.format('{} {}', combo_predict.currentText(), now.strftime("%Y-%m-%d %H %M %S"))
        export_columns = ['FIRST', 'SECOND', 'THIRD', 'FOURTH', 'FIFTH', 'SIXTH', 'LABEL', 'OUTPUT']

        with open('archived/' + file_name + '.csv', 'a', newline='') as csv_file:

            writer = csv.writer(csv_file)
            writer.writerow(export_columns)

            for o in range(1, self.input_record_count):
                fetch_one = list(self.ldb.fetchone(self.table_name, o))
                fetch_output = list(self.ldb.fetchone('OUTPUT_prediction', o))

                originals = fetch_one[1:50]
                label_column = [fetch_one[-1]]
                output_column = [fetch_output[-1]]

                output_list = [n + 1 for n in range(0, len(originals)) if originals[n] == 1]
                output_list = output_list + label_column + output_column

                writer.writerow(output_list)

                self.worker.signal_status.emit('Export in progress: {} of {}.'.format(o, self.input_record_count - 1))

        self.worker.signal_status.emit('')

    def __get_latest_number_cycle(self):

        curr_cycle, fetch_one = [], []
        for o in range(1, self.input_record_count):
            curr_cycle = self.__append_numbers_cycle(fetch_one[1:self.single_draw_len], curr_cycle)

            self.ldb.db_commit()
            fetch_one = list(self.ldb.fetchone(self.input_table, o))

        curr_cycle = self.__append_numbers_cycle(fetch_one[1:self.single_draw_len], curr_cycle)
        return curr_cycle

    def __get_latest_draw(self):

        curr_draw, fetch_one = [], []
        for o in range(1, self.input_record_count):
            curr_draw = self.__append_in_last_draw(fetch_one[1:self.single_draw_len], curr_draw)

            self.ldb.session_commit()
            fetch_one = list(self.ldb.fetchone(self.input_table, o))

        curr_draw = self.__append_in_last_draw(fetch_one[1:self.single_draw_len], curr_draw)
        return curr_draw

    def __create_top_numbers(self, offset):

        top_numbers = {}
        last = self.ldb.limit_offset_query(DYNAMIC_CLASS[self.curr_game.input_model], offset, offset-200)
        for sample in last:
            for number in range(1, self.single_draw_len):
                number = str(getattr(sample, 'NR_' + str(number)))
                if number not in top_numbers:
                    top_numbers[number] = 0
                top_numbers[number] += 1

        return top_numbers

    def get_latest_pairs(self, pair_size):

        pairs = {}
        sql_ct = str.format("SELECT * FROM {} limit {} offset {}", self.input_table, 366, self.input_record_count - 367)
        self.ldb.execute(sql_ct)
        last = self.ldb.c.fetchmany(self.input_record_count)
        for sample in last:
            comb = combinations(sample, pair_size)
            for c in comb:
                if c not in pairs:
                    pairs[c] = 1
                else:
                    pairs[c] += 1

        pairs_largest = nlargest(100, pairs, key=pairs.get)
        return pairs_largest

    def get_latest_top(self):
        return self.__create_top_numbers(self.input_record_count - 100)

    def __add_random(self, o_num, limit=True):

        if limit:
            sample_size = self.training_size_limit
        else:
            sample_size = self.total_game_numbers + 1

        while True:
            r = random.randrange(1, self.total_game_numbers)
            if r not in o_num:
                o_num = o_num + [r]
                if len(o_num) == sample_size:
                    o_num.sort()
                    break
        return o_num

    def create_training_model(self):

        self.ldb.create_new_session()
        list_model = self.worker.window.list_model

        ids = 1
        avg_time = 0
        win_count, loss_count = 0, 0
        zero, one, two, three, four = 0, 0, 0, 0, 0

        combined_set, curr_cycle = {}, {}
        fetch_one, curr_draw = [], []
        start_time = time.time()
        
        for o in range(1, self.input_record_count):

            curr_cycle = self.__append_numbers_cycle(fetch_one, curr_cycle)
            curr_draw = self.__append_in_last_draw(fetch_one, curr_draw)
            top_numbers = self.__create_top_numbers(o)

            record_set = self.ldb.fetchone(DYNAMIC_CLASS[self.curr_game.input_model], {'id': o})
            fetch_one = [getattr(record_set, 'NR_' + str(i)) for i in range(1, self.single_draw_len)]

            my_list = self.__add_random(fetch_one)

            end_time = time.time()
            avg_time = (avg_time + (end_time - start_time)) / o
            eta = avg_time * self.input_record_count - avg_time * o

            self.worker.signal_status.emit(self.__print_run_time(eta))
            self.worker.signal_progress_bar.emit(((o + 1) / self.input_record_count) * 100)

            for a in my_list:
                for b in my_list:
                    if a < b:
                        for c in my_list:
                            if b < c:
                                for d in my_list:
                                    if c < d:
                                        for e in my_list:
                                            if d < e:

                                                sample_array = [a, b, c, d, e]

                                                for i in range(self.worker.window.list_model.count()):

                                                    if list_model.item(i).text() == 'number_map':
                                                        combined_set = {**combined_set,
                                                                        **self.__map_number_map(sample_array)}

                                                    elif list_model.item(i).text() == 'number_cycles':
                                                        combined_set = {**combined_set,
                                                                        **self.__append_numbers_cycle(
                                                                            fetch_one, curr_cycle)}

                                                    elif list_model.item(i).text() == 'original_numbers':
                                                        combined_set = {**combined_set,
                                                                        **self.__map_original_numbers(sample_array)}

                                                    elif list_model.item(i).text() == 'in_last_draw':
                                                        combined_set = {**combined_set,
                                                                        **self.__map_last_draw(curr_draw)}

                                                    elif list_model.item(i).text() == 'rash_group':
                                                        combined_set = {**combined_set,
                                                                        **self.__append_rash_group(sample_array)}

                                                    elif list_model.item(i).text() == 'alphabetic_group':
                                                        combined_set = {**combined_set,
                                                                        **self.__append_alpha_group(sample_array)}

                                                    elif list_model.item(i).text() == 'hot numbers':
                                                        combined_set = {**combined_set,
                                                                        **self.append_hot_cold_warm_cool(
                                                                            top_numbers, 10, 'L')}

                                                    elif list_model.item(i).text() == 'cold numbers':
                                                        combined_set = {**combined_set,
                                                                        **self.append_hot_cold_warm_cool(
                                                                            top_numbers, 10, 'S')}

                                                    elif list_model.item(i).text() == 'warm numbers':
                                                        combined_set = {**combined_set,
                                                                        **self.append_hot_cold_warm_cool(
                                                                            top_numbers, 20, 'L')}

                                                    elif list_model.item(i).text() == 'cool numbers':
                                                        combined_set = {**combined_set,
                                                                        **self.append_hot_cold_warm_cool(
                                                                            top_numbers, 20, 'S')}

                                                label = [self.__append_count_label(sample_array, fetch_one)]

                                                if self.worker.window.check_win_loss.isChecked():

                                                    if label < [3]:
                                                        self.__append_db({**{'DRAFT_ID': o},
                                                                          **combined_set,
                                                                          **self.loss})
                                                        loss_count += 1
                                                    else:
                                                        self.__append_db({**{'DRAFT_ID': o},
                                                                          **combined_set,
                                                                          **self.win})
                                                        win_count += 1

                                                    ids += 1

                                                else:

                                                    if label[0] in [0, 1]:  # and number_limit[0] < 25:
                                                        self.__append_db({**{'DRAFT_ID': o},
                                                                          **combined_set,
                                                                          **{'LABEL': 0}})
                                                        # number_limit[0] = number_limit[0] + 1
                                                        zero += 1
                                                        ids += 1
                                                    elif label[0] == 2:  # and number_limit[1] < 25:
                                                        self.__append_db({**{'DRAFT_ID': o},
                                                                          **combined_set,
                                                                          **{'LABEL': 1}})
                                                        # number_limit[1] = number_limit[1] + 1
                                                        one += 1
                                                        ids += 1
                                                    elif label[0] == 3:  # and number_limit[1] < 25:
                                                        self.__append_db({**{'DRAFT_ID': o},
                                                                          **combined_set,
                                                                          **{'LABEL': 2}})
                                                        # number_limit[1] = number_limit[1] + 1
                                                        two += 1
                                                        ids += 1
                                                    elif label[0] in [4, 5]:  # and number_limit[2] < 25:
                                                        self.__append_db({**{'DRAFT_ID': o},
                                                                          **combined_set,
                                                                          **{'LABEL': 3}})
                                                        # number_limit[2] = number_limit[2] + 1
                                                        three += 1
                                                        ids += 1

                                                combined_set = {}

        self.ldb.session_commit()
        self.ldb.session_close()
        if self.worker.window.check_win_loss.isChecked():
            return win_count, loss_count
        else:
            return zero, one, two, three, four

    @staticmethod
    def __print_run_time(seconds):
        seconds = int(seconds)
        hours = seconds // 3600
        minutes = (seconds - 3600 * hours) // 60
        seconds = seconds - 3600 * hours - 60 * minutes
        print_it = str.format('Estimate time remaining: {}:{}:{}'.format(
            '{:02}'.format(hours), '{:02}'.format(minutes), '{:02}'.format(seconds)))
        return print_it

    def __create_rash_group(self):

        self.ldb = LotteryDatabase()
        fetch_a = self.ldb.execute(self.input_table)

        first_data_group = defaultdict(int)
        second_data_group = defaultdict(int)
        third_data_group = defaultdict(int)
        new_set = []

        for line in fetch_a:
            line = line[1:7]
            for num in line:
                if num in self.curr_game['groups']['first_group']:
                    new_set.append(1)
                elif num in self.curr_game['groups']['second_group']:
                    new_set.append(2)
                elif num in self.curr_game['groups']['third_group']:
                    new_set.append(3)

            count = Counter(new_set)

            if count[1] == 4:
                self.rash_one += 1
                for x, y in zip(line, new_set):
                    if y == 1:
                        first_data_group[x] += 1

            elif count[2] == 4:
                self.rash_two += 1
                for x, y in zip(line, new_set):
                    if y == 2:
                        second_data_group[x] += 1

            elif count[3] == 4:
                self.rash_three += 1
                for x, y in zip(line, new_set):
                    if y == 3:
                        third_data_group[x] += 1

            rash_group = []

            for num in line:
                if num in self.curr_game['groups']['first_group']:
                    d = first_data_group.get(num)
                    if d == "":
                        per = round(float(1 / self.rash_one) * 10, 2)
                    else:
                        per = round(float(d / self.rash_one) * 10, 2)

                    rash_group.append(per)
                elif num in self.curr_game['groups']['second_group']:
                    d = second_data_group.get(num)
                    if d == "":
                        per = round(float(1 / self.rash_two) * 10, 2)
                    else:
                        per = round(float(d / self.rash_two) * 10, 2)
                    rash_group.append(per)
                elif num in self.curr_game['groups']['third_group']:
                    d = third_data_group.get(num)
                    if d == "":
                        per = round(float(1 / self.rash_three) * 10, 2)
                    else:
                        per = round(float(d / self.rash_three) * 10, 2)

                    rash_group.append(per)

        self.ldb.__del__()
Beispiel #11
0
    def run(self):

        process_name = self.window.select_thread

        if process_name == "process_input":

            if self.window.input_line.text() == "":
                self.signal_infobox.emit(
                    'Missing input',
                    'No input numbers to proceed. Please try again.')
            else:
                self.table_name = 'PREDICT_' + self.window.combo_predict_model.currentText(
                )

                try:
                    convert = ConvertModel(self)
                    ml = MachineLearning(self)

                    convert.create_prediction_model(
                        self.window.input_line.text())
                    ml.random_forest_predict()
                    self.signal_progress_bar.emit(0)
                    self.signal_infobox.emit('Completed',
                                             'Prediction model created!')
                except Exception as exc:
                    self.signal_infobox.emit(
                        'Error', 'Something went wrong!! ' + str(exc))
                    self.signal_progress_bar.emit(0)

        elif process_name == 'update_la_jolla':

            try:
                ldb = LotteryDatabase()
                imported, rejected = ldb.import_la_jolla()
                self.signal_infobox.emit(
                    'Completed',
                    str.format(
                        'Lottery data imported! \n Imported: {} \n Rejected: {}',
                        imported, rejected))

            except Exception as exc:

                self.signal_infobox.emit('Error',
                                         'Something went wrong!! ' + str(exc))
                self.signal_progress_bar.emit(0)

        elif process_name == "create_model":

            if self.window.combo_db.currentText() != '':

                self.table_name = 'MODEL_' + self.window.combo_db.currentText()
                self.signal_qbox.emit('Create',
                                      'Do you want create new model?')

                while self.window.response is None:
                    pass

                if self.window.response == QMessageBox.Yes:
                    self.window.response = None

                    try:
                        convert = ConvertModel(self)

                        if self.window.check_win_loss.isChecked():

                            win, loss = convert.create_training_model()
                            self.signal_progress_bar.emit(0)
                            self.signal_infobox.emit(
                                'Completed', 'Training model created! \n' +
                                'Win Classification: ' + str(win) + '\n' +
                                'Loss Classification: ' + str(loss))

                        else:

                            zero, one, two, three, four = convert.create_training_model(
                            )
                            self.signal_progress_bar.emit(0)
                            self.signal_infobox.emit(
                                'Completed', 'Training model created! \n' +
                                'First Classification: ' + str(zero) + '\n' +
                                'Second Classification: ' + str(one) + '\n' +
                                'Third Classification: ' + str(two) + '\n' +
                                'Fourth Classification: ' + str(three) + '\n' +
                                'Fifth Classification: ' + str(four))

                    except Exception as exc:
                        self.signal_infobox.emit(
                            'Error', 'Something went wrong!! ' + str(exc))
                        self.signal_progress_bar.emit(0)

            else:
                self.signal_infobox.emit('Missing', 'Select model first!')

        elif process_name == "process_embedded":

            if self.window.input_line.text(
            ) == "" and not self.window.check_latest.isChecked():
                self.signal_infobox.emit(
                    'Missing input',
                    'No input numbers to proceed. Please try again.')
            elif self.window.check_latest.isChecked():

                ldb = LotteryDatabase()
                ldb_original = 'INPUT_' + CONFIG['games']['mini_lotto'][
                    'database']
                original_len = ldb.get_table_length(ldb_original)

                for i in range(1, 32):

                    try:

                        fetch_one = list(
                            ldb.fetchone(ldb_original, original_len - i + 1))

                        for j in range(self.window.combo_db.count()):

                            self.window.combo_db.setCurrentIndex(j)

                            self.table_name = 'MODEL_' + self.window.combo_db.currentText(
                            )
                            self.currentThread().__name__ = "MainThread"
                            ml = MachineLearning(self)
                            _ = ml.embedded_learning(
                                " ".join(map(str, fetch_one[1:6])), i,
                                fetch_one[0])

                    except Exception as exc:
                        self.signal_infobox.emit(
                            'Error', 'Something went wrong!! \n' + str(exc))

                self.signal_infobox.emit('Done', 'Finished!!')

            else:

                self.table_name = 'MODEL_' + self.window.combo_db.currentText()
                self.currentThread().__name__ = "MainThread"
                ml = MachineLearning(self)

                try:
                    output = ml.embedded_learning(
                        self.window.input_line.text())
                    self.signal_infobox.emit(
                        'Completed', 'Embedded Training finished! \n' + output)
                except Exception as exc:
                    self.signal_infobox.emit(
                        'Error', 'Something went wrong!! \n' + str(exc))

        elif process_name == "sklearn_ml":

            if len(self.window.list_ml.selectedItems()) > 0:
                self.table_name = 'MODEL_' + self.window.combo_db.currentText()
                self.currentThread().__name__ = "MainThread"
                ml = MachineLearning(self)

                try:
                    ml.sklearn_model_train()
                    self.signal_infobox.emit('Completed',
                                             'Training model finished!')
                except Exception as exc:
                    self.signal_infobox.emit(
                        'Error', 'Something went wrong!! \n' + str(exc))
            else:
                self.signal_infobox.emit('Missing',
                                         'Algorithm has not been selected!')

        elif process_name == "keras_ml":

            self.table_name = 'MODEL_' + self.window.combo_db.currentText()
            self.currentThread().__name__ = "MainThread"
            ml = MachineLearning(self)

            try:
                ml.keras_model_train()
                self.signal_infobox.emit('Completed',
                                         'Training model finished!')
            except Exception as exc:
                self.signal_infobox.emit(
                    'Error', 'Something went wrong!! \n' + str(exc))
                print(traceback.format_exc())

        elif process_name == "export_to_csv":

            export_to = ConvertModel(self, False)

            try:
                export_to.convert_to_original()
                self.signal_progress_bar.emit(0)
                self.signal_infobox.emit('Completed', 'Export complete!')
            except Exception as exc:
                self.signal_infobox.emit(
                    'Error', 'Something went wrong!! \n' + str(exc))
                self.signal_progress_bar.emit(0)

        elif process_name == "import_data":

            options = QFileDialog.Options()
            options |= QFileDialog.DontUseNativeDialog
            file_name, _ = QFileDialog.getOpenFileName(
                self.window,
                "Import file",
                "",
                "All Files (*);;Text Files (*.txt)",
                options=options)
            if file_name:

                ldb = LotteryDatabase()
                curr_game = ldb.fetchone(
                    LottoGame, {'name': self.window.line_current_game.text()})

                if not ldb.check_model_exist_by_table_name(
                        'INPUT_' + curr_game.input_table):

                    input_params = {
                        '__tablename__': 'INPUT_' + curr_game.input_table,
                        'id': Column('id', Integer, primary_key=True)
                    }

                    for i in range(1, curr_game.length + 1):
                        input_params['NR_' + str(i)] = Column(
                            'NR_' + str(i), Integer)
                    ldb.create_class_model(curr_game.input_model, input_params)
                    ldb.meta_create_all()

                imported, rejected = ldb.import_file(
                    'INPUT_' + curr_game.input_table, file_name,
                    curr_game.length + 1)

                self.signal_infobox.emit(
                    'Completed',
                    str.format(
                        'Lottery data imported! \n '
                        'Imported: {} \n '
                        'Rejected: {}', imported, rejected))
Beispiel #12
0
class DatabaseManagerDialog(QtBaseDbClass, Ui_DbDialog):
    def __init__(self, window, parent=None):
        super(DatabaseManagerDialog, self).__init__(parent)

        # class initialize
        self.setupUi(self)
        self.window = window
        self.ldb = LotteryDatabase()
        self.db_manager_init()

        # variables
        self.deleted = {}
        self.created = {}

        # buttons
        self.btn_add_model.clicked.connect(self.add_model)
        self.btn_delete_model.clicked.connect(self.delete_model)

        self.btn_add_predict.clicked.connect(self.add_predict)
        self.btn_delete_predict.clicked.connect(self.delete_predict)

        self.btn_save.clicked.connect(self.save_database)
        self.btn_cancel.clicked.connect(self.close_db_manager)

    def db_manager_init(self):

        curr_game = self.ldb.fetchone(
            LottoGame, {'name': self.window.line_current_game.text()})

        for table in curr_game.user_tables:
            if table.database_name.startswith('MODEL_'):
                self.list_model_db.addItem(
                    table.database_name.replace('MODEL_', ''))
            elif table.database_name.startswith('PREDICT_'):
                self.list_predict_db.addItem(
                    table.database_name.replace('PREDICT_', ''))

    def add_model(self):

        text = self.line_model.text().strip()

        if text != '':
            if ' ' in text:
                QMessageBox.information(
                    self, 'Whitespace',
                    'Your database name contain whitespaces. Please check!')
            else:
                if self.list_model_db.findItems(text, QtCore.Qt.MatchExactly):
                    QMessageBox.information(
                        self, 'Already exist',
                        'Your database name already exist. Try again!')
                else:
                    self.created[text] = 0
                    self.list_model_db.addItem(text)
                    self.line_model.clear()

    def add_predict(self):

        text = self.line_predict.text().strip()

        if text != '':
            if ' ' in text:
                QMessageBox.information(
                    self, 'Whitespace',
                    'Your database name contain whitespaces. Please check!')
            else:
                if self.list_predict_db.findItems(text,
                                                  QtCore.Qt.MatchExactly):
                    QMessageBox.information(
                        self, 'Already exist',
                        'Your database name already exist. Try again!')
                else:
                    self.created[text] = 1
                    self.list_predict_db.addItem(text)
                    self.line_predict.clear()

    def delete_model(self):
        if len(self.list_model_db.selectedItems()) > 0:
            self.deleted[self.list_model_db.currentItem().text()] = 0
            self.list_model_db.takeItem(self.list_model_db.currentRow())

    def delete_predict(self):
        if len(self.list_predict_db.selectedItems()) > 0:
            self.deleted[self.list_predict_db.currentItem().text()] = 1
            self.list_predict_db.takeItem(self.list_predict_db.currentRow())

    def save_database(self):

        self.window.combo_db.clear()
        self.window.combo_predict_model.clear()
        self.ldb.create_new_session()

        for key, value in self.deleted.items():
            if value == 0:
                self.ldb.delete_record(
                    DatabaseModels, {
                        'game': self.window.line_current_game.text(),
                        'database_name': 'MODEL_' + key
                    })
            elif value == 1:
                self.ldb.delete_record(
                    DatabaseModels, {
                        'game': self.window.line_current_game.text(),
                        'database_name': 'PREDICT_' + key
                    })

        for key, value in self.created.items():
            if value == 0:
                self.ldb.add_record(
                    DatabaseModels, {
                        'game': self.window.line_current_game.text(),
                        'database_name': 'MODEL_' + key
                    })
                self.window.combo_db.addItem(key)
            elif value == 1:
                self.ldb.add_record(
                    DatabaseModels, {
                        'game': self.window.line_current_game.text(),
                        'database_name': 'PREDICT_' + key
                    })
                self.window.combo_predict_model.addItem(key)

        self.ldb.session_commit()
        self.ldb.session_close()
        self.close_db_manager()

    def close_db_manager(self):
        self.close()
Beispiel #13
0
class ModelAddDialog(QtBaseAddClass, Ui_AddDialog):

    signal_add_to_list = QtCore.pyqtSignal(str)
    signal_add_to_model = QtCore.pyqtSignal(str)

    def __init__(self, window, parent=None):
        super(ModelAddDialog, self).__init__(parent)

        # class initialize
        self.setupUi(self)
        self.window = window
        self.ldb = LotteryDatabase()
        self.list_add_available_init()

        #  signals
        self.signal_add_to_list.connect(self.list_add_selected)

        #  buttons
        self.push_add_ok.clicked.connect(self.add_feature)
        self.push_add_cancel.clicked.connect(self.close_dialog)
        self.feature_add.clicked.connect(self.list_add_selected)
        self.feature_remove.clicked.connect(self.list_remove_selected)

        self.feature_sortUp.clicked.connect(self.move_item_up)
        self.feature_sortDown.clicked.connect(self.move_item_down)

    def list_add_selected(self):
        for item in self.list_add_available.selectedItems():
            if not self.list_feature_order.findItems(item.text(),
                                                     QtCore.Qt.MatchExactly):
                self.list_feature_order.addItem(item.text())

    def list_remove_selected(self):
        self.list_feature_order.takeItem(self.list_feature_order.currentRow())

    def move_item_up(self):
        if self.list_feature_order.currentRow() > 0:
            current_row = self.list_feature_order.currentRow()
            current_item = self.list_feature_order.takeItem(current_row)
            self.list_feature_order.insertItem(current_row - 1, current_item)
            self.list_feature_order.setCurrentRow(current_row - 1)
            self.list_feature_order.item(current_row - 1).setSelected(True)

    def move_item_down(self):
        if self.list_feature_order.currentRow(
        ) < self.list_feature_order.count() - 1:
            current_row = self.list_feature_order.currentRow()
            current_item = self.list_feature_order.takeItem(current_row)
            self.list_feature_order.insertItem(current_row + 1, current_item)
            self.list_feature_order.setCurrentRow(current_row + 1)
            self.list_feature_order.item(current_row + 1).setSelected(True)

    def list_add_available_init(self):
        features = self.ldb.fetchall(
            ModelFeatures, {'game': self.window.line_current_game.text()})
        for feature in features:
            self.list_add_available.addItem(feature.name)
        for i in range(self.window.list_model.count()):
            self.list_feature_order.addItem(
                self.window.list_model.item(i).text())

    def add_feature(self):

        self.window.list_model.clear()

        feature_count = self.list_feature_order.count()

        list_model = '|'.join([
            str(self.list_feature_order.item(i).text())
            for i in range(feature_count)
        ])

        user_config = {'list_model': list_model}

        self.ldb.update_record(
            UserSettings, {
                'user_parent': 'default',
                'line_current_game': self.window.line_current_game.text()
            }, user_config)

        for model in list_model.split('|'):
            self.signal_add_to_model.emit(model)

        self.close_dialog()

    def close_dialog(self):
        self.close()
Beispiel #14
0
class Window(QtBaseClass, Ui_MainWindow):
    def __init__(self, parent=None):

        super(QtBaseClass, self).__init__(parent)

        #  class initialize
        self.setupUi(self)
        self.worker = ThreadClass(self)
        self.ldb = LotteryDatabase(True)
        self.update_algorithms()
        self.update_combobox_ml()
        self.get_user_settings()

        # sys.stdout = OutLog(self.stdout_text, sys.stdout, QtGui.QColor(255, 255, 255))
        # sys.stderr = OutLog(self.stdout_text, sys.stderr, QtGui.QColor(255, 255, 255))

        #  variables
        self.select_thread = None
        self.response = None

        # signals
        # self.ldb.signal_db_error.connect(self.info_box)

        self.worker.signal_progress_bar.connect(self.update_progress_bar)
        self.worker.signal_infobox.connect(self.info_box)
        self.worker.signal_status.connect(self.update_status_bar)
        self.worker.signal_qbox.connect(self.question_box)

        # buttons
        self.push_delete.clicked.connect(self.delete_feature)
        self.push_create.clicked.connect(self.create_model)
        self.push_predict.clicked.connect(self.process_input)
        self.push_embedded.clicked.connect(self.process_embedded)
        self.push_add.clicked.connect(self.load_add_ui)
        self.push_ml.clicked.connect(self.sklearn_ml)
        self.push_knn.clicked.connect(self.keras_ml)

        # menu bar actions
        self.actionDatabase_Manager.triggered.connect(self.load_db_manager)
        self.actionExit_Program.triggered.connect(self.close_app)
        self.actionImport_from_file.triggered.connect(self.import_data)
        self.actionVersion.triggered.connect(self.program_version)
        self.actionExport_to.triggered.connect(self.export_to_csv)
        self.actionImport_La_Jolla.triggered.connect(self.update_la_jolla)

        # tooltips
        self.check_add_random.setToolTip('Add random numbers to each sample.')
        self.combo_test_size.setToolTip(
            'Determine testing size for each sample.')

    def save_user_settings(self):

        list_model = '|'.join([
            str(self.list_model.item(i).text())
            for i in range(self.list_model.count())
        ])

        user_config = {
            'check_win_loss': self.check_win_loss.isChecked(),
            'check_add_random': self.check_add_random.isChecked(),
            'check_latest': self.check_latest.isChecked(),
            'check_sampling': self.check_sampling.isChecked(),
            'check_keras': self.check_keras.isChecked(),
            'combo_predict_model': self.combo_predict_model.currentText(),
            'combo_predict_ml': self.combo_predict_ml.currentText(),
            'combo_db': self.combo_db.currentText(),
            'combo_test_size': self.combo_test_size.currentText(),
            'combo_scoring': self.combo_scoring.currentText(),
            'list_model': list_model
        }

        self.ldb.update_record(
            UserSettings, {
                'user_parent': 'default',
                'line_current_game': self.line_current_game.text()
            }, user_config)

    def get_user_settings(self):

        user_config = self.ldb.fetchone(
            UserSettings, {
                'user_parent': 'default',
                'line_current_game': self.line_current_game.text()
            })

        self.check_win_loss.setChecked(user_config.check_win_loss)
        self.check_add_random.setChecked(user_config.check_add_random)
        self.check_latest.setChecked(user_config.check_latest)
        self.check_sampling.setChecked(user_config.check_sampling)
        self.check_keras.setChecked(user_config.check_keras)

        self.combo_predict_model.setCurrentText(
            user_config.combo_predict_model)
        self.combo_predict_ml.setCurrentText(user_config.combo_predict_ml)
        self.combo_db.setCurrentText(user_config.combo_db)
        self.combo_test_size.setCurrentText(user_config.combo_test_size)
        self.combo_scoring.setCurrentText(user_config.combo_scoring)

        if user_config.list_model != '':
            for each_key in user_config.list_model.split("|"):
                self.list_model.addItem(each_key)

    def load_add_ui(self):
        feature_dialog = ModelAddDialog(self)
        feature_dialog.signal_add_to_model.connect(
            self.update_add_feature_list)

        for i in range(self.list_model.count()):

            feature_dialog.signal_add_to_list.emit(
                self.list_model.item(i).text())

        feature_dialog.exec_()

    def load_db_manager(self):
        db_manager = DatabaseManagerDialog(self)
        db_manager.exec_()

    def delete_feature(self):
        self.list_model.takeItem(self.list_model.currentRow())

        list_model = '|'.join([
            str(self.list_model.item(i).text())
            for i in range(self.list_model.count())
        ])

        user_config = {'list_model': list_model}

        self.ldb.update_record(
            UserSettings, {
                'user_parent': 'default',
                'line_current_game': self.line_current_game.text()
            }, user_config)

    def update_add_feature_list(self, item):
        self.list_model.addItem(item)

    def update_algorithms(self):
        self.list_ml.addItem('RandomForestClassifier')
        self.list_ml.addItem('RandomForestRegressor')
        self.list_ml.addItem('LogisticRegression')
        self.list_ml.addItem('SGDClassifier')

    def update_combobox_ml(self):

        tables = self.ldb.fetchall(DatabaseModels, {})

        self.combo_predict_model.clear()
        self.combo_db.clear()
        self.combo_test_size.clear()
        self.combo_predict_ml.clear()

        for alg in range(self.list_ml.count()):
            self.combo_predict_ml.addItem(self.list_ml.item(alg).text())

        for table in tables:
            if table.database_name.startswith('MODEL_'):
                self.combo_db.addItem(table.database_name.replace(
                    'MODEL_', ''))
            elif table.database_name.startswith('PREDICT_'):
                self.combo_predict_model.addItem(
                    table.database_name.replace('PREDICT_', ''))

        for n in range(6, 13):
            self.combo_test_size.addItem(str(n))

    def update_status_bar(self, status):
        self.statusBar().showMessage(status)

    def update_progress_bar(self, progress_val):

        if self.select_thread == "process_input":

            self.progress_predict.setValue(progress_val)

        elif self.select_thread == "create_model":

            self.progress_create.setValue(progress_val)

        else:

            self.progress_ml.setValue(progress_val)

    def sklearn_ml(self):
        self.select_thread = inspect.stack()[0][3]
        self.worker.start()

    def keras_ml(self):
        self.select_thread = inspect.stack()[0][3]
        self.worker.start()

    def create_model(self):
        self.select_thread = inspect.stack()[0][3]
        self.worker.start()

    def import_data(self):
        self.select_thread = inspect.stack()[0][3]
        self.worker.start()

    def process_input(self):
        self.select_thread = inspect.stack()[0][3]
        self.worker.start()

    def export_to_csv(self):
        self.select_thread = inspect.stack()[0][3]
        self.worker.start()

    def process_embedded(self):
        self.select_thread = inspect.stack()[0][3]
        self.worker.start()

    def update_la_jolla(self):
        self.select_thread = inspect.stack()[0][3]
        self.worker.start()

    def info_box(self, info_head, info_text):
        QMessageBox.information(self, info_head, info_text)

    def question_box(self, question_head, question_text):

        self.response = QMessageBox.question(self, question_head,
                                             question_text,
                                             QMessageBox.Yes | QMessageBox.No,
                                             QMessageBox.No)

    def closeEvent(self, event):

        odp = QMessageBox.question(self, 'Exit', "Are you sure you want exit?",
                                   QMessageBox.Yes | QMessageBox.No,
                                   QMessageBox.No)

        if odp == QMessageBox.Yes:
            self.save_user_settings()
            event.accept()
            sys.exit()
        else:
            event.ignore()

    def keyPressEvent(self, event):

        if event.key() == QtCore.Qt.Key_Escape:

            odp = QMessageBox.question(self, 'Exit',
                                       "Are you sure you want exit?",
                                       QMessageBox.Yes | QMessageBox.No,
                                       QMessageBox.No)

            if odp == QMessageBox.Yes:
                self.save_user_settings()
                event.accept()
                sys.exit()
            else:
                event.ignore()

    def close_app(self):
        odp = QMessageBox.question(self, 'Exit', "Are you sure you want exit?",
                                   QMessageBox.Yes | QMessageBox.No,
                                   QMessageBox.No)

        if odp == QMessageBox.Yes:
            self.save_user_settings()
            sys.exit()

    def program_version(self):
        self.info_box('Program Version', VERSION)
Beispiel #15
0
class MachineLearning:
    def __init__(self, worker):

        self.worker = worker
        self.ldb = LotteryDatabase()

        # variables
        self.x = None
        self.y = None

        self.x_train = None
        self.x_validation = None
        self.y_train = None
        self.y_validation = None

        self.N_FOLDS = 5
        self.MAX_EVALS = 20
        self.RANDOM_STATE = 42
        self.training_size = 15
        self.n_increment = 10
        self.curr_game = CONFIG['games']['mini_lotto']

        # features
        self.table_headers = []
        self.table_name = self.worker.table_name
        self.features = self.curr_game['features']

        for i in range(self.worker.window.list_model.count()):
            feature_len = self.features[self.worker.window.list_model.item(
                i).text()]['length'] + 1
            feature_header = self.features[self.worker.window.list_model.item(
                i).text()]['header']
            self.table_headers += [
                feature_header + str(n) + ' INTEGER'
                for n in range(1, feature_len)
            ]

    def generate_df_pieces(self, connection, chunk_size, offset, ids):

        last_row = self.ldb.get_table_length(self.table_name)
        chunks = int(math.ceil(last_row / chunk_size))
        n_chunk = 1

        self.ldb.delete_view('tempView')
        self.ldb.create_view(
            'tempView',
            ",".join(['DRAFT_ID'] + self.table_headers + ['LABEL']),
            self.table_name)

        while True:
            self.worker.signal_status.emit(
                str.format('Collecting data from database... {} of {}',
                           n_chunk, chunks))
            sql_ct = "SELECT * FROM tempView WHERE DRAFT_ID <= %d limit %d offset %d" % (
                ids, chunk_size, offset)
            df_piece = pd.read_sql_query(sql_ct, connection)

            if not df_piece.shape[0]:
                break
            yield df_piece

            if df_piece.shape[0] < chunk_size:
                break

            offset += chunk_size
            n_chunk += 1

        self.worker.signal_status.emit('')

    def embedded_learning(self, input_array, limit=0, draft_id=0):

        original_len = self.ldb.get_table_length('INPUT_' +
                                                 self.curr_game['database'])

        dataset = pd.concat(
            self.generate_df_pieces(self.ldb.conn, 100000, 0,
                                    original_len - limit))
        array = dataset.values

        self.x = array[:, 1:len(self.table_headers) + 1]
        self.y = array[:, len(self.table_headers) + 1]

        tb._SYMBOLIC_SCOPE.value = True
        model = self.choose_model(keras=True)

        self.worker.table_name = 'PREDICT_' + self.worker.window.combo_predict_model.currentText(
        )

        convert = ConvertModel(self.worker,
                               list(map(int, input_array.split(" "))), limit)
        convert.create_prediction_model(input_array)

        self.table_name = 'PREDICT_' + self.worker.window.combo_predict_model.currentText(
        )

        output_dataset = pd.concat(
            self.generate_df_pieces(self.ldb.conn, 10000, 0, 0))
        output_array = output_dataset.values
        output_x = output_array[:, 1:len(self.table_headers) + 1]

        original_len = self.ldb.get_table_length(self.worker.table_name) + 1

        now = datetime.datetime.now()
        file_name_r = str.format(
            '{} {}', self.worker.window.combo_predict_model.currentText(),
            now.strftime("%Y-%m-%d %H %M %S")) + model['info']

        export_columns = ['DRAFT_NR', 'FIRST', 'SECOND', 'THIRD', 'FOURTH', 'FIFTH', 'ODD_EVEN', 'LOW_HIGH', 'LA_JOLLA',
                          'SCORE_ALL', 'SCORE_TOP', 'SCORE_LESS', 'SCORE_2', 'SCORE_3', 'LABEL'] + \
                         ['OUTPUT ' + str(n) for n in range(1, self.training_size+1)]

        with open('archived/' + file_name_r + '.csv', 'a',
                  newline='') as csv_file:

            writer = csv.writer(csv_file)
            writer.writerow(export_columns)

            score_all, score_2, score_3, score_top, score_less = 0, 0, 0, 0, 0

            pairs_two = convert.get_latest_pairs(2)
            pairs_three = convert.get_latest_pairs(3)

            latest_numbers = convert.get_latest_top()
            top_numbers = nlargest(20, latest_numbers, key=latest_numbers.get)
            less_numbers = nsmallest(20,
                                     latest_numbers,
                                     key=latest_numbers.get)

            head = ','.join(['LA_JOLLA_' + str(n) for n in range(1, 6)])

            # self.ldb.db_delete_view('LA_JOLLA_VIEW')
            # self.ldb.db_create_view('LA_JOLLA_VIEW', head, 'LA JOLLA')
            # self.ldb.db_execute('SELECT * FROM LA_JOLLA_VIEW')

            la_jolla_db = self.ldb.c.fetchall()

            for o in range(1, original_len):
                fetch_one = list(self.ldb.fetchone(self.table_name, o))

                originals = fetch_one[2:self.curr_game['length'] + 2]
                label_column = [fetch_one[-1]]

                output_list = [
                    n + 1 for n in range(0, len(originals))
                    if originals[n] == 1
                ]

                odd_count = len(
                    list(filter(lambda x: (x % 2 != 0), output_list)))
                even_count = len(
                    list(filter(lambda x: (x % 2 == 0), output_list)))

                if even_count > odd_count:
                    odd_even_check = 1
                else:
                    odd_even_check = 0

                high_low = sum(x > 21 for x in output_list)

                decrease = 0
                for top in top_numbers:
                    if int(top) in output_list:
                        score_all += (1 - decrease)
                        score_top += (1 - decrease)
                    decrease += 0.05

                decrease = 0
                for top in less_numbers:
                    if int(top) in output_list:
                        # score_all += (1 - decrease)
                        score_less += (1 - decrease)
                    decrease += 0.05

                output_counter = Counter(combinations(output_list, 2))

                decrease = 0
                for pair in pairs_two:
                    if pair in output_counter:
                        score_all += (1 - decrease)
                        score_2 += (1 - decrease)
                    decrease += 0.01

                output_counter = Counter(combinations(output_list, 3))

                decrease = 0
                for pair in pairs_three:
                    if pair in output_counter:
                        score_all += (1 - decrease)
                        score_3 += (1 - decrease)
                    decrease += 0.01

                if output_list in la_jolla_db:
                    la_jolla = 1
                else:
                    la_jolla = 0

                output_list = [draft_id] + output_list + [odd_even_check] + [high_low] + [la_jolla] +\
                              [score_all] + [score_top] + [score_less] + [score_2] + [score_3] + label_column

                writer.writerow(output_list)

                score_all, score_2, score_3, score_top, score_less = 0, 0, 0, 0, 0

            self.worker.signal_status.emit('')

        if self.worker.window.check_keras.isChecked():

            model['model'].fit(self.x,
                               self.y,
                               nb_epoch=100,
                               batch_size=212,
                               verbose=2)

            prediction = model['model'].predict(output_x)

            combined_set = list(map(str, prediction))

            with open('archived/' + file_name_r + '.csv',
                      'r') as read_csv_file:

                csv_input = csv.reader(read_csv_file)
                next(csv_input)

                now = datetime.datetime.now()
                file_name_w = str.format(
                    '{} {}',
                    self.worker.window.combo_predict_model.currentText(),
                    now.strftime("%Y-%m-%d %H %M %S")) + model['info']

                with open('archived/' + file_name_w + '.csv', 'w',
                          newline='') as csv_file:
                    writer = csv.writer(csv_file)
                    writer.writerow(export_columns)

                    for row, o in zip(csv_input, combined_set):
                        writer.writerow(row + [o])

            os.remove('archived/' + file_name_r + '.csv')
            file_name_r = file_name_w

        elif self.worker.window.combo_predict_ml.currentText() == 'LogisticRegression' or \
                self.worker.window.combo_predict_ml.currentText() == 'SGDClassifier':

            model['model'].fit(self.x, self.y)

            prediction = model['model'].predict(output_x)

            combined_set = list(map(str, prediction))

            with open('archived/' + file_name_r + '.csv',
                      'r') as read_csv_file:

                csv_input = csv.reader(read_csv_file)
                next(csv_input)

                now = datetime.datetime.now()
                file_name_w = str.format(
                    '{} {}',
                    self.worker.window.combo_predict_model.currentText(),
                    now.strftime("%Y-%m-%d %H %M %S")) + model['info']

                with open('archived/' + file_name_w + '.csv', 'w',
                          newline='') as csv_file:
                    writer = csv.writer(csv_file)
                    writer.writerow(export_columns)

                    for row, o in zip(csv_input, combined_set):
                        writer.writerow(row + [o])

            os.remove('archived/' + file_name_r + '.csv')
            file_name_r = file_name_w

        else:

            for t in range(self.training_size):
                self.worker.signal_status.emit(
                    str.format('Training in process... {} of {}', t + 1,
                               self.training_size))
                model['model'].n_estimators += self.n_increment
                model['model'].fit(self.x, self.y)

                prediction = model['model'].predict(output_x)

                combined_set = list(map(str, prediction))

                with open('archived/' + file_name_r + '.csv',
                          'r') as read_csv_file:

                    csv_input = csv.reader(read_csv_file)
                    next(csv_input)

                    now = datetime.datetime.now()
                    file_name_w = str.format(
                        '{} {}',
                        self.worker.window.combo_predict_model.currentText(),
                        now.strftime("%Y-%m-%d %H %M %S")) + model['info']

                    with open('archived/' + file_name_w + '.csv',
                              'w',
                              newline='') as csv_file:

                        writer = csv.writer(csv_file)
                        writer.writerow(export_columns)

                        for row, o in zip(csv_input, combined_set):

                            writer.writerow(row + [o])

                os.remove('archived/' + file_name_r + '.csv')
                file_name_r = file_name_w

        self.worker.signal_status.emit('')

        msg = ''

        # msg = "Algorithm: RandomForestClassifier" + '\n' + \
        #       "Number of estimators: {}".format(forest.n_estimators) + '\n' + \
        #       "Accuracy on training set: {:.3f}".format(forest.score(x_train, y_train)) + '\n' + \
        #       "Accuracy on test set: {:.3f}".format(forest.score(x_validation, y_validation))

        return msg

    def choose_model(self, params=None, fresh=False, keras=False):

        model, info = None, None

        if keras:

            model = Sequential()
            model.add(
                Dense(output_dim=220,
                      kernel_initializer='uniform',
                      input_dim=int(self.x.shape[1])))
            model.add(Activation(activation='relu'))
            model.add(Dropout(0.27208339620963506))

            model.add(
                Dense(output_dim=205, kernel_initializer="glorot_uniform"))
            model.add(Activation(activation='relu'))
            model.add(Dropout(0.29152160619480066))

            model.add(Dense(1))
            model.add(Activation('sigmoid'))
            model.compile(loss='binary_crossentropy', optimizer='rmsprop')

        elif self.worker.window.list_ml.currentItem().text(
        ) == 'RandomForestClassifier':

            if params is not None:
                model = RandomForestClassifier(**params)
            elif fresh:
                model = RandomForestClassifier()
            else:
                model = RandomForestClassifier(warm_start=True,
                                               n_estimators=1,
                                               n_jobs=-1,
                                               random_state=self.RANDOM_STATE)

            # model = RandomForestClassifier(warm_start=True, n_estimators=1, n_jobs=-1, random_state=self.RANDOM_STATE)
            info = ' RFC ' + self.worker.window.combo_db.currentText()

        elif self.worker.window.list_ml.currentItem().text(
        ) == 'RandomForestRegressor':

            if params is not None:
                model = RandomForestRegressor(**params)
            elif fresh:
                model = RandomForestRegressor()
            else:
                model = RandomForestRegressor(warm_start=True,
                                              n_estimators=1,
                                              n_jobs=-1,
                                              random_state=self.RANDOM_STATE)

            info = ' RFR ' + self.worker.window.combo_db.currentText()

        elif self.worker.window.list_ml.currentItem().text(
        ) == 'LogisticRegression':

            if params is not None:
                model = linear_model.LogisticRegression(**params)
            elif fresh:
                model = linear_model.LogisticRegression()
            else:
                model = linear_model.LogisticRegression(C=50,
                                                        solver='liblinear')
            info = ' LR ' + self.worker.window.combo_db.currentText()

        elif self.worker.window.list_ml.currentItem().text(
        ) == 'SGDClassifier':

            if params is not None:
                model = linear_model.SGDClassifier(**params)
            elif fresh:
                model = linear_model.SGDClassifier()
            else:
                model = linear_model.SGDClassifier(class_weight='balanced',
                                                   loss='hinge',
                                                   max_iter=2426,
                                                   tol=1.6246894453989777e-05,
                                                   warm_start=True)
            # model = linear_model.SGDClassifier(class_weight='balanced', loss='log', max_iter=2330, tol=7.289319599768096e-05)
            # model = linear_model.SGDClassifier(max_iter=1486, tol=4.663673194605843e-05, loss='log', fit_intercept=False)
            # model = linear_model.SGDClassifier(max_iter=840, tol=15.8197115265907305e-05, class_weight='balanced', loss='modified_huber')
            info = ' SGD ' + self.worker.window.combo_db.currentText()

        return {'model': model, 'info': info}

    def choose_space(self, keras=False):

        space = {}

        if keras:

            space = {
                'choice':
                hp.choice('num_layers',
                          [{
                              'layers': 'two',
                          }, {
                              'layers': 'three',
                              'units3': hp.uniform('units3', 64, 1024),
                              'dropout3': hp.uniform('dropout3', .25, .75)
                          }]),
                'units1':
                hp.uniform('units1', 64, 1024),
                'units2':
                hp.uniform('units2', 64, 1024),
                'dropout1':
                hp.uniform('dropout1', .25, .75),
                'dropout2':
                hp.uniform('dropout2', .25, .75),
                'batch_size':
                hp.uniform('batch_size', 28, 128),
                'nb_epochs':
                100,
                'optimizer':
                hp.choice('optimizer', ['adadelta', 'adam', 'rmsprop']),
                'activation':
                'relu'
            }

        elif self.worker.window.list_ml.currentItem().text(
        ) == 'RandomForestClassifier':

            space = {
                'n_estimators':
                hp.choice('n_estimators', range(100, 1500)),
                'class_weight':
                hp.choice('class_weight',
                          ['balanced', 'balanced_subsample', None]),
                'max_features':
                hp.choice('max_features', ['auto', 'sqrt', 'log2']),
                'bootstrap':
                hp.choice('bootstrap', [True, False]),
                'max_depth':
                hp.choice('max_depth', [None, 1, 3]),
                'criterion':
                hp.choice('criterion', ['gini', 'entropy'])
            }

        elif self.worker.window.list_ml.currentItem().text(
        ) == 'RandomForestRegressor':

            space = {
                'n_estimators':
                hp.choice('n_estimators', range(10, 150)),
                'warm_start':
                hp.choice('warm_start', [True, False]),
                'class_weight':
                hp.choice('class_weight',
                          ['balanced', 'balanced_subsample', None]),
                'max_features':
                hp.choice('max_features', ['auto', 'sqrt']),
                'bootstrap':
                hp.choice('bootstrap', [True, False]),
                'max_depth':
                hp.choice('max_depth', [None, 1, 2, 3]),
                'min_samples_split':
                hp.choice('min_samples_split', [2, 3]),
                'min_samples_leaf':
                hp.choice('min_samples_leaf', [1, 2])
            }

        elif self.worker.window.list_ml.currentItem().text(
        ) == 'LogisticRegression':

            space = {
                'solver': hp.choice('solver', ['newton-cg', 'lbfgs', 'sag']),
                'warm_start': hp.choice('warm_start', [True, False]),
                'class_weight': hp.choice('class_weight', ['balanced', None]),
                'tol': hp.uniform('tol', 0.00001, 0.0001),
                'C': hp.uniform('C', 1.0, 50.0),
                'fit_intercept': hp.choice('fit_intercept', [True, False]),
                'max_iter': hp.choice('max_iter', range(100, 3000))
            }

        elif self.worker.window.list_ml.currentItem().text(
        ) == 'SGDClassifier':

            space = {
                'class_weight':
                hp.choice('class_weight', [None, 'balanced']),
                'warm_start':
                hp.choice('warm_start', [True, False]),
                'fit_intercept':
                hp.choice('fit_intercept', [True, False]),
                'tol':
                hp.uniform('tol', 0.00001, 0.0001),
                'loss':
                hp.choice('loss',
                          ['hinge', 'log', 'squared_hinge', 'modified_huber']),
                'max_iter':
                hp.choice('max_iter', range(500, 3000))
            }

        return space

    def sklearn_model_train(self):

        print(self.worker.window.list_ml.currentItem().text())

        dataset = pd.concat(
            self.generate_df_pieces(self.ldb.conn, 100000, offset=0, ids=5000))
        array = dataset.values

        self.x = array[:, :len(self.table_headers)]
        self.y = array[:, len(self.table_headers)]

        x_train, x_validation, y_train, y_validation = model_selection.train_test_split(
            self.x, self.y, test_size=0.2, random_state=self.RANDOM_STATE)

        space = self.choose_space()

        bayes_trials = Trials()

        best = fmin(fn=self.sklearn_objective,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=self.MAX_EVALS,
                    trials=bayes_trials)

        print(best)

        for bt in bayes_trials:
            print(bt['result']['loss'])
            print(bt['result']['params'])

        model = self.choose_model(space_eval(space, best))
        model['model'].fit(x_train, y_train)
        y_pred = model['model'].predict(x_validation)

        msg = 'Accuracy Score : ' + str(accuracy_score(y_validation, y_pred)) + '\n' + \
              'Precision Score : ' + str(precision_score(y_validation, y_pred)) + '\n' + \
              'Recall Score : ' + str(recall_score(y_validation, y_pred)) + '\n' + \
              'F1 Score : ' + str(f1_score(y_validation, y_pred)) + '\n' + \
              'ROC_AUC Score:' + str(roc_auc_score(y_validation, y_pred))

        # 'Confusion Matrix : \n' + str(confusion_matrix(y_validation, y_pred))

        plt.figure()
        self.plot_confusion_matrix(confusion_matrix(y_validation, y_pred),
                                   classes=[1, 0])

        self.worker.signal_status.emit('')

        self.worker.signal_infobox.emit("Completed", msg)
        plt.show()

    def sklearn_objective(self, params, n_folds=5):

        clf = self.choose_model(fresh=True)

        rus = RandomUnderSampler()

        # pipeline = make_pipeline(rus, clf)

        scores = model_selection.cross_val_score(clf['model'],
                                                 self.x,
                                                 self.y,
                                                 cv=n_folds,
                                                 scoring='f1_macro')

        best_score = max(scores)

        loss = 1 - best_score

        return {'loss': loss, 'params': params, 'status': STATUS_OK}

    def keras_model_train(self):

        dataset = pd.concat(
            self.generate_df_pieces(self.ldb.conn, 100000, offset=0, ids=5000))
        array = dataset.values

        self.x = array[:, :len(self.table_headers)]
        self.y = array[:, len(self.table_headers)]

        self.x_train, self.x_validation, self.y_train, self.y_validation = model_selection.train_test_split(
            self.x, self.y, test_size=0.2, random_state=self.RANDOM_STATE)

        space = self.choose_space(keras=True)

        tb._SYMBOLIC_SCOPE.value = True

        bayes_trials = Trials()

        best = fmin(fn=self.keras_objective,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=self.MAX_EVALS,
                    trials=bayes_trials)

        print(best)

        for bt in bayes_trials:
            print(bt['result']['loss'])
            print(bt['result']['params'])

    def keras_objective(self, params):

        model = Sequential()
        model.add(
            Dense(output_dim=params['units1'],
                  input_dim=self.x_train.shape[1]))
        model.add(Activation(params['activation']))
        model.add(Dropout(params['dropout1']))

        model.add(Dense(output_dim=params['units2'], init="glorot_uniform"))
        model.add(Activation(params['activation']))
        model.add(Dropout(params['dropout2']))

        if params['choice']['layers'] == 'three':
            model.add(
                Dense(output_dim=params['choice']['units3'],
                      init="glorot_uniform"))
            model.add(Activation(params['activation']))
            model.add(Dropout(params['choice']['dropout3']))

        model.add(Dense(1))
        model.add(Activation('sigmoid'))
        model.compile(loss='binary_crossentropy',
                      optimizer=params['optimizer'])

        model.fit(self.x_train,
                  self.y_train,
                  nb_epoch=params['nb_epochs'],
                  batch_size=params['batch_size'],
                  verbose=0)

        pred_auc = model.predict_proba(self.x_validation,
                                       batch_size=128,
                                       verbose=0)
        acc = roc_auc_score(self.y_validation, pred_auc)
        # print('AUC:', acc)
        # sys.stdout.flush()

        return {'loss': -acc, 'params': params}

    @staticmethod
    def plot_confusion_matrix(cm, classes, normalize=False, cmap=plt.cm.Blues):

        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.title('Confusion matrix')
        plt.colorbar()
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes, rotation=45)
        plt.yticks(tick_marks, classes)

        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i, j in product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j,
                     i,
                     format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.tight_layout()

    def validate_estimators(self, x, y):

        x_train, x_validation, y_train, y_validation = model_selection.train_test_split(
            x, y, test_size=0.3, random_state=0)
        n_estimators = []
        train_results = []
        test_results = []
        rf = RandomForestRegressor(warm_start=True, n_estimators=0, n_jobs=-1)
        # rf = RandomForestClassifier(warm_start=True, n_estimators=0, n_jobs=-1)

        for t in range(self.training_size):

            rf.n_estimators += 3
            # rf.n_iter += 2
            # n_estimators += [rf.n_iter]
            n_estimators += [rf.n_estimators]

            self.worker.signal_status.emit(
                'Validating estimators: {} of {}. Current estimator: {}'.
                format(t + 1, self.training_size, rf.n_estimators))

            rf.fit(x_train, y_train)

            train_pred = rf.predict(x_train)

            false_positive_rate, true_positive_rate, thresholds = roc_curve(
                y_train, train_pred)
            roc_auc = auc(false_positive_rate, true_positive_rate)
            train_results.append(roc_auc)

            y_pred = rf.predict(x_validation)

            false_positive_rate, true_positive_rate, thresholds = roc_curve(
                y_validation, y_pred)
            roc_auc = auc(false_positive_rate, true_positive_rate)
            test_results.append(roc_auc)

        line1, = plt.plot(n_estimators, train_results, 'b', label="Train AUC")
        line2, = plt.plot(n_estimators, test_results, 'r', label="Test AUC")

        plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
        plt.ylabel('AUC score')
        plt.xlabel('n_estimators')
        plt.show()

    def validate_max_depth(self, x, y):

        x_train, x_validation, y_train, y_validation = model_selection.train_test_split(
            x, y, test_size=0.3, random_state=0)

        max_depths = np.linspace(1, 32, 32, endpoint=True)

        train_results = []
        test_results = []

        for max_depth in max_depths:

            rf = RandomForestClassifier(warm_start=True,
                                        n_estimators=10,
                                        max_depth=max_depth,
                                        n_jobs=-1)

            self.worker.signal_status.emit(
                'Validating max depth: {} of {}.'.format(
                    max_depth, len(max_depths)))

            rf.fit(x_train, y_train)

            train_pred = rf.predict(x_train)

            false_positive_rate, true_positive_rate, thresholds = roc_curve(
                y_train, train_pred)
            roc_auc = auc(false_positive_rate, true_positive_rate)

            train_results.append(roc_auc)

            y_pred = rf.predict(x_validation)

            false_positive_rate, true_positive_rate, thresholds = roc_curve(
                y_validation, y_pred)
            roc_auc = auc(false_positive_rate, true_positive_rate)

            test_results.append(roc_auc)

        line1, = plt.plot(max_depths, train_results, 'b', label="Train AUC")
        line2, = plt.plot(max_depths, test_results, 'r', label="Test AUC")

        plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
        plt.ylabel('AUC score')
        plt.xlabel('Tree depth')
        plt.show()

    def validate_min_sample_split(self, x, y):

        x_train, x_validation, y_train, y_validation = model_selection.train_test_split(
            x, y, test_size=0.3, random_state=0)

        min_samples_splits = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0]

        train_results = []
        test_results = []

        for min_samples_split in min_samples_splits:

            rf = RandomForestClassifier(warm_start=True,
                                        n_estimators=10,
                                        min_samples_split=min_samples_split,
                                        n_jobs=-1)

            self.worker.signal_status.emit(
                'Validating min sample split: {} of {}.'.format(
                    min_samples_split, len(min_samples_splits)))

            rf.fit(x_train, y_train)

            train_pred = rf.predict(x_train)
            false_positive_rate, true_positive_rate, thresholds = roc_curve(
                y_train, train_pred)
            roc_auc = auc(false_positive_rate, true_positive_rate)

            train_results.append(roc_auc)

            y_pred = rf.predict(x_validation)

            false_positive_rate, true_positive_rate, thresholds = roc_curve(
                y_validation, y_pred)
            roc_auc = auc(false_positive_rate, true_positive_rate)
            test_results.append(roc_auc)

        line1, = plt.plot(min_samples_splits,
                          train_results,
                          'b',
                          label="Train AUC")
        line2, = plt.plot(min_samples_splits,
                          test_results,
                          'r',
                          label="Test AUC")

        plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
        plt.ylabel('AUC score')
        plt.xlabel('min samples split')
        plt.show()

    def validate_min_sample_leaf(self, x, y):

        x_train, x_validation, y_train, y_validation = model_selection.train_test_split(
            x, y, test_size=0.3, random_state=0)

        min_samples_leafs = [1, 2, 3, 4, 5]

        train_results = []
        test_results = []

        for min_samples_leaf in min_samples_leafs:

            rf = RandomForestClassifier(warm_start=True,
                                        n_estimators=10,
                                        min_samples_leaf=min_samples_leaf,
                                        n_jobs=-1)

            self.worker.signal_status.emit(
                'Validating min sample leaf: {} of {}.'.format(
                    min_samples_leaf, len(min_samples_leafs)))

            rf.fit(x_train, y_train)
            train_pred = rf.predict(x_train)

            false_positive_rate, true_positive_rate, thresholds = roc_curve(
                y_train, train_pred)

            roc_auc = auc(false_positive_rate, true_positive_rate)

            train_results.append(roc_auc)
            y_pred = rf.predict(x_validation)

            false_positive_rate, true_positive_rate, thresholds = roc_curve(
                y_validation, y_pred)

            roc_auc = auc(false_positive_rate, true_positive_rate)

            test_results.append(roc_auc)

        line1, = plt.plot(min_samples_leafs,
                          train_results,
                          'b',
                          label="Train AUC")
        line2, = plt.plot(min_samples_leafs,
                          test_results,
                          'r',
                          label="Test AUC")

        plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
        plt.ylabel('AUC score')
        plt.xlabel('min samples leaf')
        plt.show()

    def validate_max_features(self):
        pass

    def random_forest_predict(self):

        output_headers = ",".join(['ID INTEGER PRIMARY KEY'] +
                                  ['OUTPUT_LABEL INTEGER'])

        self.ldb.delete_table('OUTPUT_prediction')
        self.ldb.create_table('OUTPUT_prediction', output_headers)

        ids = 1

        dataset = pd.concat(
            self.generate_df_pieces(self.ldb.conn, 1000, offset=0))

        array = dataset.values

        x = array[:, :len(self.table_headers)]

        filename = 'random_forest.sav'
Beispiel #16
0
class TestTF:
    def __init__(self):

        self.ldb = LotteryDatabase(CONFIG['database'])

        self.x = None
        self.y = None

        self.x_train = None
        self.x_validation = None
        self.y_train = None
        self.y_validation = None

        self.curr_game = CONFIG['games']['mini_lotto']

        self.table_headers = []
        self.features = self.curr_game['features']

        feat = ['number_map', 'number_cycles', 'cool numbers']

        for i in feat:
            feature_len = self.features[i]['length'] + 1
            feature_header = self.features[i]['header']
            self.table_headers += [
                feature_header + str(n) + ' INTEGER'
                for n in range(1, feature_len)
            ]

    def main_tf(self):

        # dataset = dataframe.read_sql_table(table='MODEL_ml', uri='sqlite:///' + config['database'], index_col='ID', npartitions=6)

        dataset = pd.concat(
            self.generate_df_pieces(self.ldb.conn, 100000, offset=0, ids=5000))
        # dataset.compute()
        array = dataset.values

        self.x = array[:, :len(self.table_headers)]
        self.y = array[:, len(self.table_headers)]

        self.x_train, self.x_validation, self.y_train, self.y_validation = model_selection.train_test_split(
            self.x, self.y, test_size=0.2, random_state=42)

        bayes_trials = Trials()

        best = fmin(fn=self.keras_objective,
                    space=space,
                    algo=tpe.suggest,
                    max_evals=10,
                    trials=bayes_trials)

        print(best)

        for bt in bayes_trials:
            print(bt['result']['loss'])
            print(bt['result']['params'])

    def generate_df_pieces(self, connection, chunk_size, offset, ids):

        last_row = self.ldb.get_table_length('MODEL_ml')
        chunks = int(math.ceil(last_row / chunk_size))
        n_chunk = 1

        self.ldb.delete_view('tempView')
        self.ldb.create_view(
            'tempView',
            ",".join(['DRAFT_ID'] + self.table_headers + ['LABEL']),
            'MODEL_ml')

        while True:
            print(
                str.format('Collecting data from database... {} of {}',
                           n_chunk, chunks))
            sql_ct = "SELECT * FROM tempView WHERE DRAFT_ID <= %d limit %d offset %d" % (
                ids, chunk_size, offset)
            df_piece = pd.read_sql_query(sql_ct, connection)

            if not df_piece.shape[0]:
                break
            yield df_piece

            if df_piece.shape[0] < chunk_size:
                break

            offset += chunk_size
            n_chunk += 1

    def keras_objective(self, params):

        model = Sequential()
        model.add(
            Dense(output_dim=params['units1'],
                  kernel_initializer='uniform',
                  input_dim=int(self.x_train.shape[1])))
        model.add(Activation(params['activation']))
        model.add(Dropout(params['dropout1']))

        model.add(
            Dense(output_dim=params['units2'],
                  kernel_initializer="glorot_uniform"))
        model.add(Activation(params['activation']))
        model.add(Dropout(params['dropout2']))

        if params['choice']['layers'] == 'three':
            model.add(
                Dense(output_dim=params['choice']['units3'],
                      kernel_initializer="glorot_uniform"))
            model.add(Activation(params['activation']))
            model.add(Dropout(params['choice']['dropout3']))

        model.add(Dense(1))
        model.add(Activation('sigmoid'))
        model.compile(loss='binary_crossentropy',
                      optimizer=params['optimizer'])

        model.fit(self.x_train,
                  self.y_train,
                  nb_epoch=params['epochs'],
                  batch_size=params['batch_size'],
                  verbose=2)

        pred_auc = model.predict_proba(self.x_validation,
                                       batch_size=params['batch_size'],
                                       verbose=2)
        acc = roc_auc_score(self.y_validation, pred_auc)
        print('AUC:', acc)
        sys.stdout.flush()

        return {'loss': -acc, 'params': params, 'status': STATUS_OK}