def train(iterations, train_file, beam_size): data = prepare_data.read_file(train_file) feature = Feature() decoder = Decoder(beam_size, feature.get_score) for t in range(iterations): count = 0 data_size = len(data) for line in data: y = line.split() z = decoder.beamSearch(line) if z != y: feature.update_weight(y, z) train_seg = ' '.join(z) seg_data_file = '/home/xzt/CWS/train_seg_data/train-seg-data_ model-' + str( t) + '.txt' with open(seg_data_file, 'a') as f: f.write(train_seg + '\n') count += 1 if count % 1000 == 0: print("iter %d , finish %.2f%%" % (t, (count / data_size) * 100)) model_file = open( "/home/xzt/CWS/model_result/model-" + str(t) + "_beam-size-" + str(beam_size) + '.pkl', 'wb') feature.save_model(model_file) model_file.close() f.close() print("segment with model-%d finish" % t) print("iteration %d finish" % t)
def test_avg(iterations, test_file, beam_size): data = prepare_data.read_file(test_file) feature = Feature() decoder = Decoder(beam_size, feature.get_score) count = 0 data_size = len(data) model_file = open( '/home/xzt/CWS/model_result/avg-model_beam-size-' + str(beam_size) + '.pkl', 'rb') feature.load_model(model_file) model_file.close() for line in data: z = decoder.beamSearch(line) seg_data = ' '.join(z) seg_data_file = '/home/xzt/CWS/test_seg_data/avg-test-seg-data' + '_beam-size-' + str( beam_size) + '.txt' with open(seg_data_file, 'a') as f: f.write(seg_data + '\n') count += 1 if count % 1000 == 0: print("segment with avg-model, finish %.2f%%" % ((count / data_size) * 100)) f.close() print("segment with avg model finish")
def train_avg(iterations, train_file, beam_size): data = prepare_data.read_file(train_file) feature = Feature() decoder = Decoder(beam_size, feature.get_score) n = 0 for t in range(iterations): count = 0 data_size = len(data) for line in data: n += 1 y = line.split() z = decoder.beamSearch(line) if z != y: feature.update_avgWeight(y, z, n, t, data_size) train_seg = ' '.join(z) count += 1 if count % 1000 == 0: print("iter %d , finish %.2f%%" % (t, (count / data_size) * 100)) model_file = open( "/home/xzt/CWS/model_result/model-" + str(t) + "_beam-size-" + str(beam_size) + '.pkl', 'wb') feature.save_model(model_file) model_file.close() print("segment with model-%d finish" % t) print("iteration %d finish" % t) feature.last_update(iterations, data_size) feature.cal_avg_weight(iterations, data_size) avg_model = open( "/home/xzt/CWS/model_result/avg-model_beam-size-" + str(beam_size) + '.pkl', 'wb') feature.save_model(avg_model) avg_model.close() print("segment with avg-model finish")