コード例 #1
0
def main():
    parser = argparse.ArgumentParser(
        description='Sentiment classifier argument parser.')
    parser.add_argument(
        '--build_data',
        action='store_true',
        help='build and save data sets (only needed for the first time).')
    parser.add_argument('--alg',
                        choices=['CNN', 'BiLSTM', 'BiRNN'],
                        required=True,
                        help='algorithm to train the sentiment classifier')
    parser.add_argument(
        '--small_subsets',
        action='store_true',
        help='train and evaluate on smaller subsets of the data.')
    parser.add_argument('--outfile',
                        type=str,
                        help='output file name to save trained model.')
    args = parser.parse_args()

    # build and save data for the first time
    if args.build_data:
        data_loader = DataLoader()
        data_loader.build_data()

    # load data from file
    data_loader = DataLoader()
    data_loader.load_data()

    # train model
    train_classifier(alg=args.alg,
                     data_loader=data_loader,
                     small_subsets=args.small_subsets,
                     outfile=args.outfile)
コード例 #2
0
def main():
    s = time.time()
    dl = DataLoader()
    print(time.time() - s)

    for project in dataset_names:

        result = {
            project: {
                i: {
                    algo: {measure: []
                           for measure in measure_names}
                    for algo in algo_names
                }
                for i in range(STEPS)
            }
        }

        counter = 0

        while counter != STEPS:

            try:

                x_train_scaled, x_test_scaled, y_train, y_test = dl.build_data(
                    project)

                pos_count = DataLoader.get_positive_count(y_train)
                all_count = len(y_train)

                for algo in algo_names:

                    net = Model(FEATURES_NUM,
                                hidden_shape=config.hidden_layer,
                                classes=config.classes,
                                positive_num=pos_count,
                                all_num=all_count)

                    # net.train(x_train_scaled, y_train, config)
                    net.train_batch(x_train_scaled, y_train, config)

                    y_pred, y = net.test(x_test_scaled, y_test, config)

                    result = Evaluator.evaluate(measure_names, y_pred, y,
                                                result, project, algo, counter)

                print('This is the ' + str(counter + 1) + 'times')

            except BaseException as err:
                print('The' + str(counter) + 'times loop error')
                print(err)
                continue

            counter += 1

            try:
                # reverse
                reverse_x_train = x_test_scaled
                reverse_y_train = y_test
                reverse_x_test = x_train_scaled
                reverse_y_test = y_train

                pos_count = DataLoader.get_positive_count(reverse_y_train)
                all_count = len(reverse_y_train)

                net = Model(FEATURES_NUM,
                            hidden_shape=config.hidden_layer,
                            classes=config.classes,
                            positive_num=pos_count,
                            all_num=all_count)

                net.train_batch(reverse_x_train, reverse_y_train, config)

                y_pred, y = net.test(reverse_x_test, reverse_y_test, config)

                result = Evaluator.evaluate(measure_names, y_pred, y, result,
                                            project, algo, counter)

            except BaseException as err:
                print('The' + str(counter) + 'times loop error')
                print(err)
                counter -= 2
            finally:
                counter += 1

            continue

        with open(
                './results/algo--final--' + str(STEPS) + '--' + project +
                '.json', 'w') as f:
            json.dump(result, f)