コード例 #1
0
def main_14():
    if not os.path.isdir('./outputs'):
        os.mkdir('./outputs')

    if not os.path.isdir('./models'):
        os.mkdir('./models')
    """
    preprocessing
    """
    _inputs, _outputs, input_dict = regression_preprocessing(
    )  # 범주형 데이터 리스트, 사람 수 데이터 리스트, 벡터화 dictionary

    # delete columns
    _outputs = deleteColumn(_outputs, [1])  # 사람 수 데이터에서 사상자 수 column을 제외

    _inputs, _outputs = shuffleList(
        _inputs, _outputs)  # 데이터 리스트가 고루 섞이도록 _inputs와 _outputs를 함께 섞음
    inputs, outputs, input_test, input_train, input_val, output_test, output_train, output_val \
        = separate_set_one_output(_inputs, _outputs)  # 범주형 데이터와 사람 수 데이터를 각각 test, train, validate를 위해 분류

    # ensemble
    num_models = 11  # 11
    cnt = 0

    models = []
    while cnt < num_models:
        """
        create model
        """
        model = createModel(inputs, outputs)
        """
        training
        """
        model.compile(loss="mse", optimizer="adam", metrics=['accuracy'])
        # model.summary()

        # early stopping
        # val_loss값이 10번 동안 개선되지 않으면 해당 model의 학습을 중단
        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=10,
                                       verbose=0)

        # train
        # model의 학습 이력 정보로 train의 loss와 accuracy, val의 loss와 accuracy 값을 받음
        hist = model.fit([np.array(i) for i in input_train],
                         np.array(output_train),
                         epochs=1000,
                         batch_size=pow(2, 13),
                         validation_data=([np.array(i) for i in input_val],
                                          np.array(output_val)),
                         callbacks=[early_stopping],
                         verbose=0)

        try:
            # plot_hist(hist)
            pass
        except:
            pass
        """
        test
        """
        # model의 성능 평가
        # score = model.evaluate([np.array(i) for i in input_test], np.array(output_test), verbose=0)
        # print('complete: %s = %.2f%%' % (model.metrics_names[1], score[1] * 100))

        _preds = model.predict([np.array(i) for i in input_test])

        preds = []
        for i, _pred in enumerate(_preds):
            preds.append([])
            for val in _pred:
                # 반올림된 예측값이 0보다 클 경우 preds 리스트에 추가, 음수일경우 0을 추가
                preds[i].append(int(max(0, round(val))))

        # 성능 평가
        res = evaluate_lists(preds, output_test)

        # TH = 0.90
        if res < 0.90:
            print('fail    : model %d: %.2f%%' % (cnt, res * 100))
            continue

        # PASS
        print('complete: model %d: %.2f%%' % (cnt, res * 100))
        """
        save model
        """
        model.save('./models/14_model_' + str(cnt) + '.h5')

        try:
            plot_model(model,
                       to_file='./models/14_model_' + str(cnt) + '.png',
                       show_shapes=True,
                       show_layer_names=True)
        except:
            pass

        models.append(model)

        # model의 개수가 10개가 될 때
        cnt += 1
    """
    predict
    """
    f1 = open('./outputs/13_output.csv', 'r')
    f2 = open('./outputs/14_output.csv', 'w', newline='')
    r1 = csv.reader(f1)
    r2 = csv.writer(f2)

    for i, row in enumerate(r1):
        # blank
        if ((0 < i) & (i < 11)) | ((30 < i) & (i < 36)) | (45 < i):
            """
            preprocessing
            """
            # copy
            _row = row[:]

            # C~G열 제외
            for k, j in enumerate([2, 3, 4, 5, 6]):
                del (_row[j - k])

            input_pred = []
            for j, elem in enumerate(_row):
                try:
                    if isinstance(elem, int):
                        input_pred.append(elem)
                    else:
                        input_pred.append(input_dict[j][elem])
                except:
                    input_pred.append('')
            """
            predict
            """
            preds = []
            for k, model in enumerate(models):
                _pred = list(
                    model.predict([np.array([j]) for j in input_pred])[0])

                preds.append([])
                for val in _pred:
                    # 반올림된 예측값이 0보다 클 경우 preds 리스트에 추가, 음수일경우 0을 추가
                    preds[k].append(int(max(0, round(val))))

            collect = []
            for j in range(len(preds[0])):
                collect.append([])

                for k in range(len(preds)):
                    collect[j].append(preds[k][j])

            # majority
            val1, rate1 = majority(collect[0])
            print('save    : row  %2d: 1_val = %.2f%%' % (i + 1, rate1 * 100))

            val2, rate2 = majority(collect[1])
            print('save    : row  %2d: 2_val = %.2f%%' % (i + 1, rate2 * 100))

            val3, rate3 = majority(collect[2])
            print('save    : row  %2d: 3_val = %.2f%%' % (i + 1, rate3 * 100))

            val4, rate4 = majority(collect[3])
            print('save    : row  %2d: 4_val = %.2f%%' % (i + 1, rate4 * 100))
            """
            write
            """
            if row[2] == '':
                row[2] = val1

            if row[4] == '':
                row[4] = val2

            if row[5] == '':
                row[5] = val3

            if row[6] == '':
                row[6] = val4

            r2.writerow(row)

        else:
            r2.writerow(row)

    f1.close()
    f2.close()
コード例 #2
0
def main_2():
    if not os.path.isdir('./outputs'):
        os.mkdir('./outputs')

    if not os.path.isdir('./models'):
        os.mkdir('./models')
    """
    preprocessing
    """
    # 범주형 데이터 리스트, 사람 수 데이터 리스트, 벡터화 dictionary
    _inputs, _outputs, input_dict, output_dict = category_L_preprocessing()

    # delete columns
    _inputs = deleteColumn(_inputs, [1, 3, 10])  # 요일, 사상자 수, K열 제외

    # make list
    tmp = []
    for _input in _inputs:
        ttmp = []
        nums = []
        for i, elem in enumerate(_input):
            if i < 1:
                ttmp.append(elem)
            elif i < 5:
                nums.append(elem)
            elif i == 5:
                ttmp.append(norm_list(nums))
                ttmp.append(elem)
            else:
                ttmp.append(elem)

        tmp.append(ttmp)

    _inputs, _outputs = shuffleList(
        tmp, _outputs)  # 데이터 리스트가 고루 섞이도록 _inputs와 _outputs를 함께 섞음
    inputs, outputs, input_test, input_train, input_val, output_test, output_train, output_val \
        = separate_set_multiple_outputs(_inputs, _outputs)  # 범주형 데이터와 사람 수 데이터를 각각 test, train, validate를 위해 분류

    # ensemble
    num_models = 11  # 11
    cnt = 0

    models = []
    while cnt < num_models:
        """
        create model
        """
        model = createModel(inputs, outputs)
        """
        training
        """
        model.compile(loss="categorical_crossentropy",
                      optimizer="adam",
                      metrics=['accuracy'])
        # model.summary()

        # early stopping
        # val_loss값이 10번 동안 개선되지 않으면 해당 model의 학습을 중단
        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=10,
                                       verbose=0)

        # train
        # model의 학습 이력 정보로 train의 loss와 accuracy, val의 loss와 accuracy 값을 받음
        hist = model.fit([np.array(i) for i in input_train],
                         [np.array(i) for i in output_train],
                         epochs=1000,
                         batch_size=pow(2, 13),
                         validation_data=([np.array(i) for i in input_val],
                                          [np.array(i) for i in output_val]),
                         callbacks=[early_stopping],
                         verbose=0)

        try:
            # plot_hist(hist)
            pass
        except:
            pass
        """
        test
        """
        # model의 성능 평가
        score = model.evaluate([np.array(i) for i in input_test],
                               [np.array(i) for i in output_test],
                               verbose=0)

        # TH = 0.72
        if score[1] < 0.72:
            print('fail    : model %d: %s = %.2f%%' %
                  (cnt, model.metrics_names[1], score[1] * 100))
            continue

        # PASS
        print('complete: model %d: %s = %.2f%%' %
              (cnt, model.metrics_names[1], score[1] * 100))
        """
        save model
        """
        model.save('./models/2_model_' + str(cnt) + '.h5')

        try:
            plot_model(model,
                       to_file='./models/2_model_' + str(cnt) + '.png',
                       show_shapes=True,
                       show_layer_names=True)
        except:
            pass

        models.append(model)

        # model의 개수가 10개가 될 때
        cnt += 1
    """
    predict
    """
    f1 = open('./outputs/1_output.csv', 'r')
    f2 = open('./outputs/2_output.csv', 'w', newline='')
    r1 = csv.reader(f1)
    r2 = csv.writer(f2)

    for i, row in enumerate(r1):
        # blank
        if (10 < i & i < 21) | (42 < i & i < 46):
            """
            preprocessing
            """
            # copy
            _row = row[:]

            # L열 제외
            for k, j in enumerate([11]):
                del (_row[j - k])

            for j, elem in enumerate(_row):
                if 1 < j & j < 7:
                    _row[j] = int(elem)

            _input_pred = []
            for j, elem in enumerate(_row):
                try:
                    if isinstance(elem, int):
                        _input_pred.append(elem)
                    else:
                        _input_pred.append(input_dict[j][elem])
                except:
                    _input_pred.append('')

            # _row = deleteColumn(row, [1, 3, 10])  # 요일, 사상자 수, K열 제외
            for k, j in enumerate([1, 3, 10]):
                del (_input_pred[j - k])

            # make list
            input_pred = []
            nums = []
            for j, elem in enumerate(_input_pred):
                if j < 1:
                    input_pred.append(elem)
                elif j < 5:
                    nums.append(elem)
                elif j == 5:
                    input_pred.append(norm_list(nums))
                    input_pred.append(elem)
                else:
                    input_pred.append(elem)
            """
            predict
            """
            pred = []
            for model in models:
                _pred = list(
                    model.predict([np.array([j]) for j in input_pred])[0])
                pred.append(
                    value_key_map(
                        output_dict, 0,
                        oneHotEncoding(len(_pred), _pred.index(max(_pred)))))

            # majority
            val, rate = majority(pred)
            print('save    : row  %2d: val = %.2f%%' % (i + 1, rate * 100))
            """
            write
            """
            row[11] = val
            r2.writerow(row)

        else:
            r2.writerow(row)

    f1.close()
    f2.close()