def eval(tag_path, corpus_path): correct = 0 total = 0 acc_list = [] model_name = MODEL_NAME embedding_dim = EMBEDDING_DIM hidden_dim = HIDDEN_DIM word_to_ix = WORD_TO_IX model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim) checkpoint = torch.load(model_name) model.load_state_dict(checkpoint['model_state_dict']) model.eval() tag_to_ix = {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4} sentences, tags = load_train_data(tag_path, corpus_path) labels = torch.tensor([[tag_to_ix[tag]] for tag in tags[:]]) with torch.no_grad(): for i, sen in enumerate(tqdm(sentences[:])): input = prepare_sequence(sen, word_to_ix) output = model(input) _, predicted = torch.max(output.data, 1) label = labels[i] total += label.size(0) correct += (predicted == label).sum().item() acc = round(100 * correct / total, 2) acc_list.append(acc) assert len(acc_list) == len(sentences) final_acc = acc plt.plot(list(range(len(tags))), acc_list) plt.xlabel('pred_num') plt.ylabel('accuracy / %') plt.show() return final_acc
def run_test(args): log = args.logfile trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id() ) if trainer_count > 1 else fluid.CUDAPlace(0) print("Loading data...") train_data, val_data = load_train_data() test_data = load_test_data() print("Loading model...") seq_vocab, bracket_vocab = process_vocabulary(args, train_data, quiet=True) network = Network( seq_vocab, bracket_vocab, dmodel=args.dmodel, layers=args.layers, dropout=0, ) exe = fluid.Executor(place) paddle.enable_static() fluid.io.load_inference_model(args.model_path_base, exe) test_reader = fluid.io.batch(reader_creator(args, test_data, seq_vocab, bracket_vocab, test=True), batch_size=args.batch_size) seq = fluid.data(name="seq", shape=[None], dtype="int64", lod_level=1) dot = fluid.data(name="dot", shape=[None], dtype="int64", lod_level=1) predictions = network(seq, dot) main_program = fluid.default_main_program() test_program = main_program.clone(for_test=True) test_feeder = fluid.DataFeeder(place=place, feed_list=[seq, dot]) test_results = [] kase = 0 for data in test_reader(): pred, = exe.run(test_program, feed=test_feeder.feed(data), fetch_list=[predictions.name], return_numpy=False) pred = list(np.array(pred)) kase += 1 with open(str(kase) + '.predict.txt', "w") as f: for x in pred: f.write(str(x)) f.write('\n') f.close() with ZipFile("result.zip", "w") as myzip: for i in range(kase): myzip.write(str(i + 1) + '.predict.txt') os.remove(str(i + 1) + '.predict.txt')
def run_test(args): log = args.logfile trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id() ) if trainer_count > 1 else fluid.CUDAPlace(0) print("Loading data...") train_data, val_data = load_train_data() test_data = load_test_data() print("Loading model...") seq_vocab, bracket_vocab, mixture_vocab = process_vocabulary(args, train_data, quiet=True) network = Network( seq_vocab, bracket_vocab, mixture_vocab, dmodel=args.dmodel, layers=args.layers, dropout=0, ) exe = fluid.Executor(place) paddle.enable_static() fluid.io.load_inference_model(args.model_path_base, exe) test_reader = fluid.io.batch(reader_creator(args, test_data, seq_vocab, bracket_vocab, mixture_vocab, test=True), batch_size=args.batch_size) seq = fluid.data(name="seq", shape=[None], dtype="int64", lod_level=1) dot = fluid.data(name="dot", shape=[None], dtype="int64", lod_level=1) mix = fluid.data(name="mix", shape=[None], dtype="int64", lod_level=1) predictions = network(seq, dot, mix) main_program = fluid.default_main_program() test_program = main_program.clone(for_test=True) test_feeder = fluid.DataFeeder(place=place, feed_list=[seq, dot, mix]) test_results = [] for data in test_reader(): pred, = exe.run(test_program, feed=test_feeder.feed(data), fetch_list=[predictions.name], return_numpy=False) pred = list(np.array(pred)) test_results.append(pred) out(log, " ".join([str(x) for x in pred]))
def loaddataset(): params = { 'TrainFile': '../data/train.csv', 'TestFile': '../data/test.csv', 'TrainSize': 0.9 } df = dataset.load_train_data(params) train_data = df.values # Start in the PClass column, we will not be using the passengerid X_train = train_data[:, 2:] Y_train = train_data[:, 0].astype(int) # Partition training data trainSize = int(params['TrainSize'] * np.size(Y_train)) x_train, x_valid = X_train[:trainSize, :], X_train[trainSize:, :] y_train, y_valid = Y_train[:trainSize], Y_train[trainSize:] return [x_train, y_train, x_valid, y_valid]
def loaddataset(): params = { 'TrainFile' : '../data/train.csv', 'TestFile' : '../data/test.csv', 'TrainSize' : 0.9 } df = dataset.load_train_data(params) train_data = df.values # Start in the PClass column, we will not be using the passengerid X_train = train_data[:,2:] Y_train = train_data[:,0].astype(int) # Partition training data trainSize = int(params['TrainSize'] * np.size(Y_train)) x_train, x_valid = X_train[:trainSize, :], X_train[trainSize:,:] y_train, y_valid = Y_train[:trainSize], Y_train[trainSize:] return [x_train, y_train, x_valid, y_valid]
def run_train(args): out(args.logfile, datetime.datetime.now()) out(args.logfile, "# python3 " + " ".join(sys.argv)) log = args.logfile train_data, val_data = load_train_data() out(log, "# Training set contains {} Sequences.".format(len(train_data))) out(log, "# Validation set contains {} Sequences.".format(len(val_data))) trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id ) if trainer_count > 1 else fluid.CUDAPlace(0) exe = fluid.Executor(place) paddle.enable_static() out(log, "# Paddle: Using device: {}".format(place)) out(log, "# Initializing model...") seq_vocab, bracket_vocab = process_vocabulary(args, train_data) network = Network( seq_vocab, bracket_vocab, dmodel=args.dmodel, layers=args.layers, dropout=args.dropout, ) main_program = fluid.default_main_program() startup_program = fluid.default_startup_program() current_processed, total_processed = 0, 0 check_every = math.floor((len(train_data) / args.checks_per_epoch)) best_dev_loss, best_dev_model_path = np.inf, None start_time = time.time() out( log, "# Checking validation {} times an epoch (every {} batches)".format( args.checks_per_epoch, check_every)) patience = check_every * args.checks_per_epoch * 2 batches_since_dev_update = 0 train_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, train_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=args.batch_size) val_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, val_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=1) seq = fluid.data(name="seq", shape=[None], dtype="int64", lod_level=1) dot = fluid.data(name="dot", shape=[None], dtype="int64", lod_level=1) y = fluid.data(name="label", shape=[None], dtype="float32") predictions = network(seq, dot) loss = fluid.layers.mse_loss(input=predictions, label=y) avg_loss = fluid.layers.mean(loss) test_program = main_program.clone(for_test=True) feeder = paddle.fluid.DataFeeder(place=place, feed_list=[seq, dot, y]) learning_rate = 1e-4 beta1 = 0.9 beta2 = 0.999 epsilon = 1e-08 optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon, regularization=L1Decay(0.01)) optimizer.minimize(avg_loss) exe.run(startup_program) exe_test = fluid.Executor(place) start_epoch_index = 1 for epoch in itertools.count(start=start_epoch_index): if epoch >= args.epochs + 1: break train_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, train_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=args.batch_size) out(log, "# Epoch {} starting.".format(epoch)) epoch_start_time = time.time() for batch_index, batch in enumerate(train_reader()): batch_loss, pred_values = exe.run( main_program, feed=feeder.feed(batch), fetch_list=[avg_loss.name, predictions.name], return_numpy=False) batch_loss = np.array(batch_loss) pred_values = np.array(pred_values) total_processed += len(batch) current_processed += len(batch) batches_since_dev_update += 1 out( log, "epoch {:,} " "batch {:,} " "processed {:,} " "batch-loss {:.4f} " "epoch-elapsed {} " "total-elapsed {} " "".format( epoch, batch_index + 1, total_processed, float(batch_loss), format_elapsed(epoch_start_time), format_elapsed(start_time), )) if math.isnan(float(batch_loss[0])): sys.exit("got NaN loss, training failed.") if current_processed >= check_every: current_processed -= (check_every) val_results = [] for data in val_reader(): loss, pred = exe.run( test_program, feed=feeder.feed(data), fetch_list=[avg_loss.name, predictions.name], return_numpy=False) loss = np.array(loss) val_results.append(loss[0]) val_loss = sum(val_results) / len(val_results) out( log, "# Dev Average Loss: {:5.3f} (MSE) -> {:5.3f} (RMSD)". format(float(val_loss), math.sqrt(float(val_loss)))) if val_loss < best_dev_loss: batches_since_dev_update = 0 if best_dev_model_path is not None: path = "{}/{}_dev={:.4f}".format( args.model_path_base, args.model_path_base, best_dev_loss) print("\t\t", best_dev_model_path, os.path.exists(path)) if os.path.exists(path): out( log, "* Removing previous model file {}...".format( path)) shutil.rmtree(path) best_dev_loss = val_loss best_dev_model_path = "{}_dev={:.4f}".format( args.model_path_base, val_loss) out( log, "* Saving new best model to {}...".format( best_dev_model_path)) if not os.path.exists(args.model_path_base): os.mkdir(args.model_path_base) fluid.io.save_inference_model( args.model_path_base + "/" + best_dev_model_path, ['seq', 'dot'], [predictions], exe)
def run_test_withlabel(args): out(args.logfile, datetime.datetime.now()) out(args.logfile, "# python3 " + " ".join(sys.argv)) log = args.logfile trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id() ) if trainer_count > 1 else fluid.CUDAPlace(0) out(log, "Loading data...") train_data, val_data = load_train_data() test_data = load_test_label_data() out(log, "Loading model...") seq_vocab, bracket_vocab = process_vocabulary(args, train_data) network = Network( seq_vocab, bracket_vocab, dmodel=args.dmodel, layers=args.layers, dropout=0, ) exe = fluid.Executor(place) paddle.enable_static() fluid.io.load_inference_model(args.model_path_base, exe) val_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, val_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=args.batch_size) test_reader = fluid.io.batch(reader_creator(args, test_data, seq_vocab, bracket_vocab), batch_size=args.batch_size) seq = fluid.data(name="seq", shape=[None], dtype="int64", lod_level=1) dot = fluid.data(name="dot", shape=[None], dtype="int64", lod_level=1) y = fluid.data(name="label", shape=[None], dtype="float32") predictions = network(seq, dot) loss = fluid.layers.mse_loss(input=predictions, label=y) avg_loss = fluid.layers.mean(loss) main_program = fluid.default_main_program() test_program = main_program.clone(for_test=True) feeder = fluid.DataFeeder(place=place, feed_list=[seq, dot, y]) val_results = [] for data in val_reader(): loss, pred = exe.run(test_program, feed=feeder.feed(data), fetch_list=[avg_loss.name, predictions.name], return_numpy=False) loss = np.array(loss) val_results.append(loss[0]) val_loss = sum(val_results) / len(val_results) out( log, "# Dev Average Loss: {:6.4f} (MSE) -> {:6.4f} (RMSD)".format( float(val_loss), math.sqrt(float(val_loss)))) test_results = [] avg_losses = [] for data in test_reader(): loss, pred, gold = exe.run( test_program, feed=feeder.feed(data), fetch_list=[avg_loss.name, predictions.name, y.name], return_numpy=False) loss = np.array(loss) test_results.append(loss[0]) pred = list(np.array(pred)) gold = list(np.array(gold)) """ print("PRED", ["{:5.3f}".format(x) for x in pred[:20]], "...") print("GOLD", ["{:5.3f}".format(x) for x in gold[:20]], "...") MSE = [] for p,g in zip(pred, gold): mse = (p - g) ** 2 MSE.append(mse) avg_mse = sum(MSE) / len(MSE) print("MSE ", ["{:5.3f}".format(x) for x in MSE[:20]], "...") print("AVG LOSS:", avg_mse) print() avg_losses.append(avg_mse) """ test_loss = sum(test_results) / len(test_results) out( log, "# Test Average Loss: {:6.4f} (MSE) -> {:6.4f} (RMSD)".format( float(test_loss), math.sqrt(float(test_loss))))
with tf.Session() as sess: sess.run(init) checkpoint = tf.train.get_checkpoint_state(checkpoint_path) if checkpoint: saver.restore(sess, checkpoint.model_checkpoint_path) print("checkpoint load") else: print("load checkpoint failed") # test model correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) # calculate accuracy accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) test_x, test_y = dataset.load_train_data("test.csv") print("Accuracy", accuracy.eval({x:test_x, y: test_y})) pred_list = tf.argmax(pred, 1).eval({x:test_x}) ture_list = tf.argmax(y, 1).eval({y:test_y}) caculate_p(ture_list, pred_list) # pred the file data_dir = "data" path_dir = os.listdir(data_dir) for all_file in path_dir: csv_path = os.path.join('%s/%s' % (data_dir, all_file)) print(csv_path) test_list = dataset.read_real_data(csv_path) # test_list = dataset.read_real_data("data/daht_c001_04_15.csv")
def predict_labels(probs): labels = [] for prob in probs: if prob > 0.5: labels.append(1) else: labels.append(0) return labels if __name__ == '__main__': args = parse() root_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../../') # データ読み込み train_ids, train_data, train_labels, med, mean, std = load_train_data(os.path.join(root_dir, 'data', 'train.csv')) # バリデーションデータの用意 val_data = train_data[:args.val_num] val_labels = train_labels[:args.val_num] train_data = train_data[args.val_num:] train_labels = train_labels[args.val_num:] # モデル設定 model = TitanicModel(hidden_ch=args.hidden_ch) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=args.lr), loss=tf.losses.log_loss, metrics=['accuracy']) # 学習 ckpt_path = os.path.join(root_dir, 'ckpt/titanic')
def main(): print 'Running', __file__, '...' params = { 'Model': 'neuralnetwork', 'TrainFile': '../data/train.csv', 'TestFile': '../data/test.csv', 'n_fold': 5, 'TrainSize': .9 } # 1. Generate data df = dataset.load_train_data(params) train_data = df.values # Skip the passengerid X_train = train_data[:, 2:] Y_train = train_data[:, 0].astype(int) # 2. Partition training data trainSize = int(params['TrainSize'] * np.size(Y_train)) x_train, x_valid = X_train[:trainSize, :], X_train[trainSize:, :] y_train, y_valid = Y_train[:trainSize], Y_train[trainSize:] df = dataset.load_test_data(params) X_test = df.values x_test_index = X_test[:, 0] x_test = X_test[:, 1:] print 'Analyzing training data ', params[ 'Model'], 'datapoints=', x_train.shape[0], 'features=', x_train.shape[ 1] rng = np.random.RandomState(5000) classifier = N.NeuralNetwork() param_grid = dict( network=[[9, 18, 18, 1], [9, 24, 1], [9, 45, 1]], connection_rate=[.6, .7], learning_rate=[.07, .1], learning_momentum=[.005, .05], initial_weight=[.73, .82], desired_error=[0.0001], epoch=[100], hidden_activation=[N.SIGMOID, N.SIGMOID_STEPWISE, N.SIGMOID_SYMMETRIC], output_activation=[N.SIGMOID_SYMMETRIC], training_algorithm=[N.TRAIN_RPROP], show=[500]) # 3. Search for the best estimator cv_ = cv.StratifiedShuffleSplit(y_train, n_iter=params['n_fold'], train_size=params['TrainSize'], random_state=rng) grid = grid_search.GridSearchCV(classifier, param_grid=param_grid, cv=cv_) grid.fit(x_train, y_train) best_estimator = grid.best_estimator_ print 'Best estimator:', best_estimator scores = cv.cross_val_score(best_estimator, x_train, y_train, cv=params['n_fold']) print('Train: (folds=%d) Score for %s accuracy=%0.5f (+/- %0.5f)' % \ (params['n_fold'], params['Model'], scores.mean(), scores.std())) y_valid_pred = best_estimator.predict(x_valid) print"Valid: Score for %s accuracy=%0.5f rmse=%0.5f" % \ (params['Model'], metrics.accuracy_score(y_valid, y_valid_pred), np.sqrt(metrics.mean_squared_error(y_valid, y_valid_pred))) # 4. Run found estimator on the test data. print 'Analyzing test data ', params['Model'], 'datapoints=', x_test.shape[ 0], 'features=', x_test.shape[1] process_test_data(params, best_estimator, x_test_index, x_test)
def main(argv=None): transform = Compose([ Resize(cons.IMAGE_SIZE, cons.IMAGE_SIZE), Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), max_pixel_value=255.0) ]) valid_loader = load_train_data(train_images_path=FLAGS.train_images_path, train_labels_path=FLAGS.train_labels_path, batch_size=FLAGS.batch_size, num_worker=FLAGS.num_worker, valid=True, nfold=FLAGS.nfold, transform=transform) model = models.get_model(model_name=FLAGS.model_name, num_classes=cons.NUM_CLASSES) model.cuda() #model = torch.nn.DataParallel(model) DIR = '/' + FLAGS.case + '/' + FLAGS.model_name + '/fold' + str( FLAGS.nfold) RESULT_PATH = '' if FLAGS.confidence_border is not None: DIR = DIR + '/with_pseudo_labeling' RESULT_PATH = RESULT_PATH + FLAGS.result_path if FLAGS.result_case is not None: RESULT_PATH = RESULT_PATH + '/' + FLAGS.result_case RESULT_PATH = RESULT_PATH + '/inference_with_c.csv' PARAM_DIR = FLAGS.params_path + DIR os.makedirs(PARAM_DIR, exist_ok=True) PARAM_NAME = PARAM_DIR + '/' + FLAGS.case if FLAGS.executed_epoch > 0: TRAINED_PARAM_PATH = FLAGS.restart_param_path + '/' + FLAGS.case + str( FLAGS.executed_epoch) restart_epoch = FLAGS.executed_epoch + 1 if FLAGS.restart_from_final: TRAINED_PARAM_PATH = TRAINED_PARAM_PATH + '_final' TRAINED_PARAM_PATH = TRAINED_PARAM_PATH + '.pth' model.load_state_dict(torch.load(TRAINED_PARAM_PATH)) else: restart_epoch = 0 optimizer = optim.Adam(model.parameters(), lr=cons.start_lr) model, optimizer = amp.initialize(model, optimizer, opt_level=FLAGS.opt_level) if FLAGS.add_class_weight: loader = load_train_data(train_images_path=FLAGS.train_images_path, train_labels_path=FLAGS.train_labels_path, batch_size=FLAGS.batch_size, num_worker=FLAGS.num_worker, nfold=FLAGS.nfold) count_label = np.zeros(10, dtype=np.int64) for feed in loader: _, labels = feed count_label += np.sum(labels.numpy().astype(np.int64), axis=0) weight = torch.from_numpy(count_label).cuda() else: weight = None criterion = nn.BCEWithLogitsLoss(weight=weight) writer = SummaryWriter(log_dir=FLAGS.logs_path + DIR + '/tensorboardX/') best_acc = 0 if FLAGS.augmentation and FLAGS.aug_decrease: p = 0.5 for e in range(restart_epoch, FLAGS.final_epoch): p_partical = p * (FLAGS.final_epoch - e) / FLAGS.final_epoch lr = set_lr.cosine_annealing(optimizer, cons.start_lr, e, 100) writer.add_scalar('LearningRate', lr, e) train_loader = load_train_data( train_images_path=FLAGS.train_images_path, train_labels_path=FLAGS.train_labels_path, batch_size=FLAGS.batch_size, num_worker=FLAGS.num_worker, nfold=FLAGS.nfold, confidence_border=FLAGS.confidence_border, result_path=RESULT_PATH, test_images_path=FLAGS.test_images_path, over_sampling=FLAGS.over_sampling, transform_aug=Compose([ aug.HueSaturationValue(p=p_partical), aug.RandomBrightnessContrast(p=p_partical), aug.CLAHE(p=p_partical), aug.JpegCompression(p=p_partical), aug.GaussNoise(p=p), aug.MedianBlur(p=p), aug.ElasticTransform(p=p_partical), aug.HorizontalFlip(p=p), aug.Rotate(p=p), aug.CoarseDropout(p=p_partical), aug.RandomSizedCrop(p=p) ]), mixup=FLAGS.mixup, transform=transform) train_loss = train_loop(model, train_loader, criterion, optimizer) writer.add_scalar('train_loss', train_loss, e) valid_loss, valid_acc = valid_loop(model, valid_loader, criterion) writer.add_scalar('valid_loss', valid_loss, e) writer.add_scalar('valid_acc', valid_acc, e) print( 'Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}, Valid Accuracy:{:.2f}' .format(e + 1, train_loss, valid_loss, valid_acc)) if e % 10 == 0: torch.save(model.state_dict(), PARAM_NAME + '_' + str(e) + '.pth') if valid_acc > best_acc: best_acc = valid_acc torch.save(model.state_dict(), PARAM_NAME + '_best.pth') else: if FLAGS.augmentation and not FLAGS.augmix: transform_aug = Compose([ aug.HueSaturationValue(), aug.RandomBrightnessContrast(), aug.CLAHE(), aug.JpegCompression(), aug.GaussNoise(), aug.MedianBlur(), aug.ElasticTransform(), aug.HorizontalFlip(), aug.Rotate(), aug.CoarseDropout(), aug.RandomSizedCrop() ]) else: transform_aug = None train_loader = load_train_data( train_images_path=FLAGS.train_images_path, train_labels_path=FLAGS.train_labels_path, batch_size=FLAGS.batch_size, num_worker=FLAGS.num_worker, valid=False, nfold=FLAGS.nfold, over_sampling=FLAGS.over_sampling, transform_aug=transform_aug, augmix=FLAGS.augmix, mixup=FLAGS.mixup, transform=transform) total_time = 0 for e in range(restart_epoch, FLAGS.final_epoch): start = time.time() lr = set_lr.cosine_annealing(optimizer, cons.start_lr, e, 100) writer.add_scalar('LearningRate', lr, e) train_loss = train_loop(model, train_loader, criterion, optimizer) writer.add_scalar('train_loss', train_loss, e) valid_loss, valid_acc = valid_loop(model, valid_loader, criterion) writer.add_scalar('valid_loss', valid_loss, e) writer.add_scalar('valid_acc', valid_acc, e) print( 'Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}, Valid Accuracy:{:.2f}' .format(e + 1, train_loss, valid_loss, valid_acc)) if e % 10 == 0: torch.save(model.state_dict(), PARAM_NAME + '_' + str(e) + '.pth') if valid_acc > best_acc: best_acc = valid_acc torch.save(model.state_dict(), PARAM_NAME + '_best.pth') total_time = total_time + (time.time() - start) print('average time: {}[sec]'.format(total_time / (e + 1))) torch.save(model.state_dict(), PARAM_NAME + '_' + str(FLAGS.final_epoch - 1) + '_final.pth')
def main(): print 'Running', __file__, '...' params = { 'Model' : 'neuralnetwork', 'TrainFile' : '../data/train.csv', 'TestFile' : '../data/test.csv', 'n_fold' : 5, 'TrainSize' : .9 } # 1. Generate data df = dataset.load_train_data(params) train_data = df.values # Skip the passengerid X_train = train_data[:,2:] Y_train = train_data[:,0].astype(int) # 2. Partition training data trainSize = int(params['TrainSize'] * np.size(Y_train)) x_train, x_valid = X_train[:trainSize, :], X_train[trainSize:,:] y_train, y_valid = Y_train[:trainSize], Y_train[trainSize:] df = dataset.load_test_data(params) X_test = df.values x_test_index = X_test[:,0] x_test = X_test[:,1:] print 'Analyzing training data ', params['Model'], 'datapoints=', x_train.shape[0], 'features=',x_train.shape[1] rng = np.random.RandomState(5000) classifier = N.NeuralNetwork() param_grid = dict(network = [[9,18,18,1],[9,24,1],[9,45,1]], connection_rate = [.6,.7], learning_rate = [.07,.1], learning_momentum = [.005,.05], initial_weight = [.73,.82], desired_error = [0.0001], epoch = [100], hidden_activation = [N.SIGMOID, N.SIGMOID_STEPWISE, N.SIGMOID_SYMMETRIC], output_activation = [N.SIGMOID_SYMMETRIC], training_algorithm = [N.TRAIN_RPROP], show = [500]) # 3. Search for the best estimator cv_ = cv.StratifiedShuffleSplit(y_train, n_iter=params['n_fold'], train_size=params['TrainSize'], random_state=rng) grid = grid_search.GridSearchCV(classifier, param_grid=param_grid, cv=cv_) grid.fit(x_train, y_train) best_estimator = grid.best_estimator_ print 'Best estimator:', best_estimator scores = cv.cross_val_score(best_estimator, x_train, y_train, cv=params['n_fold']) print('Train: (folds=%d) Score for %s accuracy=%0.5f (+/- %0.5f)' % \ (params['n_fold'], params['Model'], scores.mean(), scores.std())) y_valid_pred = best_estimator.predict(x_valid) print"Valid: Score for %s accuracy=%0.5f rmse=%0.5f" % \ (params['Model'], metrics.accuracy_score(y_valid, y_valid_pred), np.sqrt(metrics.mean_squared_error(y_valid, y_valid_pred))) # 4. Run found estimator on the test data. print 'Analyzing test data ', params['Model'], 'datapoints=', x_test.shape[0], 'features=',x_test.shape[1] process_test_data(params, best_estimator, x_test_index, x_test)
#Parameters current_iter = tf.Variable(0) training_epochs = 150000 batch_size = 64 display_step = 5 checkpoint_path = 'checkpoint' #Network parameters n_hidden_1 = 512 # 1st layer number of features n_hidden_2 = 512 # 2nd layer number of features n_input = 51 # data input n_classes = 2 # the number of classes # gen the data set for train and test all_x, all_y = dataset.load_train_data("train.csv") clip_num = int(len(all_y) * 0.8) input_x, input_y = all_x[:clip_num], all_y[:clip_num] test_x, test_y = all_x[clip_num:], all_y[clip_num:] #tf graph input x = tf.placeholder("float", [None, n_input]) y = tf.placeholder("float", [None, n_classes]) #Create model def multilayer_perceptron(x, weights, biases): #Hidden layer with relu activation layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) layer_1 = tf.nn.relu(layer_1) #hidden layer with RELU activation
def train(): logging.basicConfig(level=logging.INFO, filename='log.txt', format='%(message)s') tag_path = TRAIN_TAG_PATH corpus_path = TRAIN_CORPUS_PATH save_model_name = MODEL_NAME best_model_name = BEST_NAME load_model_path = None embedding_dim = EMBEDDING_DIM hidden_dim = HIDDEN_DIM train_epoch = TRAIN_EPOCH word_to_ix = WORD_TO_IX start_epoch = 0 best_score = 0. loss_info, train_avg_info, test_avg_info = [], [], [] sentences, tags = load_train_data(tag_path, corpus_path) tag_to_ix = {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4} label = torch.tensor([[tag_to_ix[tag]] for tag in tags]) model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim, dropout=0.3) optimizer = optim.Adam(model.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss() if load_model_path is not None: checkpoints = torch.load(load_model_path) model.load_state_dict(checkpoints['model_state_dict']) optimizer.load_state_dict(checkpoints['optim_state_dict']) start_epoch = checkpoints['epoch'] start_time = time.time() logging.info('----------------------') for epoch in range(start_epoch, train_epoch): running_loss = 0.0 for i, sen in enumerate(tqdm(sentences)): optimizer.zero_grad() input = prepare_sequence(sen, word_to_ix) output = model(input) loss = criterion(output, label[i]) running_loss += loss.item() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 15) optimizer.step() torch.save( { 'model_state_dict': model.state_dict(), 'optim_state_dict': optimizer.state_dict(), 'epoch': epoch + 1 }, save_model_name) train_avg = eval(TRAIN_TAG_PATH, TRAIN_CORPUS_PATH) test_avg = eval(TEST_TAG_PATH, TEST_CORPUS_PATH) loss_info.append(running_loss) train_avg_info.append(train_avg) test_avg_info.append(test_avg) logging.info('********') logging.info('epoch: {}'.format(epoch + 1)) logging.info('loss: {}'.format(running_loss)) logging.info('train avg: {}'.format(train_avg)) logging.info('test avg: {}'.format(test_avg)) if test_avg > best_score: torch.save({ 'model_state_dict': model.state_dict(), }, best_model_name) best_score = test_avg print('save best') print('training time:', time.time() - start_time)
optimizer = keras.optimizers.Adam(lr=0.0003) model.compile( loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) model.summary() return model, top_model_dense print("Preparing model...") # 'model' is used to train. 'top_model_dense' is only used for checkpointing. model, top_model_dense = prepare_model() print("prepared model") print("Loading data...") train_indices, train_segments, train_results, val_indices, val_segments, val_results = dataset.load_train_data(TRAIN_SAMPLES, VAL_SAMPLES) print("Loaded data") tensorboard = TensorBoard(log_dir=LOG_PATH+"/{}".format(time())) # Prints some predicted vs actual results, called after each epoch. def run_validation(epoch, logs): MAX_PRINT = 100 global model a = model.predict([val_indices[:MAX_PRINT], val_segments[:MAX_PRINT]]).reshape(min(VAL_SAMPLES, MAX_PRINT)) b = val_results[:MAX_PRINT] combined = np.array([a, b]).transpose() print("Predicted vs actual: ", combined.tolist()) val_cb = keras.callbacks.LambdaCallback( on_epoch_end=lambda epoch, logs: run_validation(epoch, logs))