Example #1
0
def main():
    feature = Feature()

    tests = []
    with open('./data/basis.txt') as p:
        for test_line in p:
            tests.append(feature.convert_sentence(test_line))

    max_width = 35  # TODO: 自動で取れるように
    tests = [(test + [0] * (max_width - len(test)))[:max_width]
             for test in tests]
    svc = joblib.load('./model/svc.pkl')
    prediction = svc.predict(tests)

    with open('./data/output', mode='w') as p:
        p.write("\n".join([
            '1' if result == '+1' else '-1' for result in prediction.tolist()
        ]))
Example #2
0
def main():
    feature = Feature()

    trains, labels = [], []
    with open('data/train.list') as p:
        for train_line in p:
            label, _, train_txt = train_line.split(maxsplit=2)
            trains.append(feature.convert_sentence(train_txt))
            labels.append(label)

    max_width = int(statistics.mean([len(train) for train in trains]) * 2)
    trains = [(train + [0] * (max_width - len(train)))[:max_width] for train in trains]

    assert statistics.mean([len(train) for train in trains]) == max_width, '配列の要素数が次元ごとで揃っていない'
    assert len(trains) == len(labels), '学習データとラベルの数が不一致'

    numpy.savez('data/dataset.npz',
                trains=numpy.array(trains, dtype=numpy.float),
                labels=numpy.array(labels, dtype=numpy.str))