예제 #1
0
파일: main.py 프로젝트: njnuzpy/CWS
def train(iterations, train_file, beam_size):
    data = prepare_data.read_file(train_file)
    feature = Feature()
    decoder = Decoder(beam_size, feature.get_score)

    for t in range(iterations):
        count = 0
        data_size = len(data)

        for line in data:
            y = line.split()
            z = decoder.beamSearch(line)
            if z != y:
                feature.update_weight(y, z)

            train_seg = ' '.join(z)
            seg_data_file = '/home/xzt/CWS/train_seg_data/train-seg-data_ model-' + str(
                t) + '.txt'
            with open(seg_data_file, 'a') as f:
                f.write(train_seg + '\n')

            count += 1
            if count % 1000 == 0:
                print("iter %d , finish %.2f%%" % (t,
                                                   (count / data_size) * 100))

        model_file = open(
            "/home/xzt/CWS/model_result/model-" + str(t) + "_beam-size-" +
            str(beam_size) + '.pkl', 'wb')
        feature.save_model(model_file)

        model_file.close()
        f.close()
        print("segment with model-%d finish" % t)
        print("iteration %d finish" % t)
예제 #2
0
파일: main.py 프로젝트: njnuzpy/CWS
def test_avg(iterations, test_file, beam_size):
    data = prepare_data.read_file(test_file)
    feature = Feature()
    decoder = Decoder(beam_size, feature.get_score)

    count = 0
    data_size = len(data)

    model_file = open(
        '/home/xzt/CWS/model_result/avg-model_beam-size-' + str(beam_size) +
        '.pkl', 'rb')
    feature.load_model(model_file)
    model_file.close()
    for line in data:
        z = decoder.beamSearch(line)
        seg_data = ' '.join(z)
        seg_data_file = '/home/xzt/CWS/test_seg_data/avg-test-seg-data' + '_beam-size-' + str(
            beam_size) + '.txt'
        with open(seg_data_file, 'a') as f:
            f.write(seg_data + '\n')
        count += 1
        if count % 1000 == 0:
            print("segment with avg-model, finish %.2f%%" %
                  ((count / data_size) * 100))
    f.close()
    print("segment with avg model finish")
예제 #3
0
파일: main.py 프로젝트: njnuzpy/CWS
def train_avg(iterations, train_file, beam_size):
    data = prepare_data.read_file(train_file)
    feature = Feature()
    decoder = Decoder(beam_size, feature.get_score)
    n = 0
    for t in range(iterations):
        count = 0
        data_size = len(data)

        for line in data:
            n += 1
            y = line.split()
            z = decoder.beamSearch(line)
            if z != y:
                feature.update_avgWeight(y, z, n, t, data_size)

            train_seg = ' '.join(z)

            count += 1
            if count % 1000 == 0:
                print("iter %d , finish %.2f%%" % (t,
                                                   (count / data_size) * 100))

        model_file = open(
            "/home/xzt/CWS/model_result/model-" + str(t) + "_beam-size-" +
            str(beam_size) + '.pkl', 'wb')
        feature.save_model(model_file)
        model_file.close()
        print("segment with model-%d finish" % t)
        print("iteration %d finish" % t)

    feature.last_update(iterations, data_size)
    feature.cal_avg_weight(iterations, data_size)
    avg_model = open(
        "/home/xzt/CWS/model_result/avg-model_beam-size-" + str(beam_size) +
        '.pkl', 'wb')
    feature.save_model(avg_model)
    avg_model.close()
    print("segment with avg-model finish")