def test(data, batch_size=64, filename='roc.png', **kwargs): global args assert args.checkpoint is not None model = RNNModel(feature_dims=data[0].feature_dim, model_dir=args.output_dir, **kwargs) model.restore(args.checkpoint) data = list(filter(lambda d: d.seq is not None, data)) for i in tqdm(range(0, len(data), batch_size)): x, y, length = get_feature_label(data[i:i + batch_size], length_limit=10000) predictions = model.predict(x, length) for l, p in zip(data[i:i + batch_size], predictions): l.prediction = p # if SimpleLengthModel.data_filter(data[i]): # x, y, length = get_feature_label(data[i:i+batch_size], length_limit=1000) # predictions = model.predict(x, length) # for l,p in zip(data[i:i+batch_size], predictions): # l.prediction = p # else: # for l in data[i:i+batch_size]: # l.prediction = 1 + l.length / 100000.0 predictions = list(map(attrgetter('prediction'), data)) labels = list(map(attrgetter('label'), data)) plot_roc(predictions, labels, filename=filename)
def train(train_data, val_data, steps=6000, val_per_steps=300, checkpoint_per_steps=100, batch_size=64, learning_rate=0.01, **kwargs): global args # train_data = list(filter(SimpleLengthModel.data_filter, train_data)) # val_data = list(filter(SimpleLengthModel.data_filter, val_data)) model = RNNModel(feature_dims=train_data[0].feature_dim, model_dir=args.output_dir, **kwargs) if args.checkpoint is not None: model.restore(args.checkpoint) data_provider = batch_data_provider(train_data, batch_size=batch_size) for t in range(0, steps): x, y, length = get_feature_label(next(data_provider), length_limit=1000) result = model.train(x, y, length, learning_rate) logging.info("step = {}: {}".format(model.global_step, result)) if model.global_step % val_per_steps == 0: result = val(model, val_data) model.init_streaming() logging.info("validation for step = {}: {}".format( model.global_step, result)) if model.global_step % checkpoint_per_steps == 0: model.save_checkpoint() logging.info("save checkpoint at {}".format(model.global_step)) if model.global_step % 2000 == 0: learning_rate *= 0.2 logging.info("current learning rate = {}".format(learning_rate))