def test(): with tf.Session() as sess: saver = tf.train.import_meta_graph( os.path.join(FLAGS.saved_path, 'char_level_cnn.meta')) g = tf.get_default_graph() # input handle = g.get_tensor_by_name('Placeholder:0') keep_prob = g.get_tensor_by_name('dropout_prob:0') test_set, num_test_iters = create_dataset(FLAGS.test_set, FLAGS.alphabet, FLAGS.max_length, FLAGS.batch_size, False) test_iterator = test_set.make_initializable_iterator() test_handle = sess.run(test_iterator.string_handle()) iterator = tf.data.Iterator.from_string_handle(test_handle, test_set.output_types, test_set.output_shapes) texts, labels = iterator.get_next() num_classes = get_num_classes(FLAGS.test_set) output = g.get_tensor_by_name('fc3/dense:0') acc, acc_op = tf.metrics.accuracy(labels=tf.cast(labels, tf.int64), predictions=tf.argmax(output, 1)) #accuracy = g.get_tensor_by_name('accuracy:0') sess.run(test_iterator.initializer) sess.run(tf.local_variables_initializer()) #sess.run(tf.global_variables_initializer()) saver.restore(sess, os.path.join(FLAGS.saved_path, 'char_level_cnn')) # run graph output fc3/dense counter = 0 while True: counter += 1 try: print('calculating accuracy for batch {}/{}...'.format( counter, int(60000 / FLAGS.batch_size))) out = sess.run([acc_op], feed_dict={ handle: test_handle, keep_prob: 1.0 }) print('accuracy for batch {:.2f}', out) except (tf.errors.OutOfRangeError, StopIteration): break print('Accuracy of model: {:.2f}%'.format(sess.run(acc) * 100)) exit()
def predict(BATCH_SIZE, TIME_STEPS, SUBJECT, LSTM_UNITS, FEATURES_COUNT, EPOCH, ITERATIONS, options='figure'): csv = pd.read_csv('stock/{s}/{s}.csv'.format(s=SUBJECT)).sort_index() stock = preprocess(csv) denorm_stock = stock.copy() denorm_stock.index = pd.to_datetime(denorm_stock.index) stock = normalize(stock) stock = stock.fillna(method='ffill') TEST_NUM = (BATCH_SIZE * 10 + TIME_STEPS) test = stock[-TEST_NUM:] cut = (len(stock)-TIME_STEPS) % BATCH_SIZE _stock = stock[cut:] _denorm_stock = denorm_stock[cut:] x_test, y_test = utils.create_dataset(test.to_numpy(), TIME_STEPS, FEATURES_COUNT) x_data, y_data = utils.create_dataset(_stock.to_numpy(), TIME_STEPS, FEATURES_COUNT) model = get_model_weight(BATCH_SIZE, TIME_STEPS, EPOCH, ITERATIONS, SUBJECT, LSTM_UNITS) pred = model.predict(x_data, batch_size = BATCH_SIZE) future = model.predict(x_test[-BATCH_SIZE:], batch_size=BATCH_SIZE) if options == 'future': return future _y_hat = np.array(list(stock['Close'][:TIME_STEPS]) + list(pred) + list(future)) y_hat = [i[0] if isinstance(i, (np.ndarray, np.generic)) else i for i in _y_hat ] denorm_pred = np.array(denormalize(y_hat, denorm_stock['Close'])).reshape(-1, 1) pred_candle = pd.DataFrame(denorm_pred, columns=['Close']) y_candle = _denorm_stock.sort_index() batch_days = utils.batch_workdays(denorm_stock.index[-1], BATCH_SIZE) pred_candle.index = y_candle.index.append(pd.to_datetime(batch_days)) fig = draw_graph(y_candle, pred_candle) name = "{4}_bs{3}ts{0}ep{1}it{2}lstm{5}".format(TIME_STEPS, EPOCH, ITERATIONS, BATCH_SIZE, SUBJECT, LSTM_UNITS) pyo.plot(fig, filename="stock/{0}/{1}.html".format(SUBJECT, name), auto_open=False)
def test_create_dataset(): utils.create_dataset(path) json_path = f"{path}/dataset.json" with open(json_path) as json_file: result = json.load(json_file) assert result == true_result
def train(): num_classes = get_num_classes(FLAGS.train_set) model = Char_level_cnn(batch_size=FLAGS.batch_size, num_classes=num_classes, feature=FLAGS.feature) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True training_set, num_training_iters = create_dataset( FLAGS.train_set, FLAGS.alphabet, FLAGS.max_length, FLAGS.batch_size, True) test_set, num_test_iters = create_dataset(FLAGS.test_set, FLAGS.alphabet, FLAGS.max_length, FLAGS.batch_size, False) train_iterator = training_set.make_initializable_iterator() test_iterator = test_set.make_initializable_iterator() handle = tf.placeholder(tf.string, shape=[]) keep_prob = tf.placeholder(tf.float32, name='dropout_prob') iterator = tf.data.Iterator.from_string_handle( handle, training_set.output_types, training_set.output_shapes) texts, labels = iterator.get_next() logits = model.forward(texts, keep_prob) loss = model.loss(logits, labels) loss_summary = tf.summary.scalar("loss", loss) accuracy = model.accuracy(logits, labels) accuracy_summary = tf.summary.scalar("accuracy", accuracy) batch_size = tf.unstack(tf.shape(texts))[0] confusion = model.confusion_matrix(logits, labels) global_step = tf.Variable(0, name="global_step", trainable=False) if FLAGS.optimizer == "sgd": values = [FLAGS.lr] boundaries = [] for i in range(1, 10): values.append(FLAGS.lr / pow(2, i)) boundaries.append(3 * num_training_iters * i) learning_rate = tf.train.piecewise_constant( global_step, boundaries, values) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) else: optimizer = tf.train.AdamOptimizer(FLAGS.lr) train_op = optimizer.minimize(loss, global_step=global_step) merged = tf.summary.merge([loss_summary, accuracy_summary]) init = tf.global_variables_initializer() saver = tf.train.Saver() if os.path.isdir(FLAGS.log_path): shutil.rmtree(FLAGS.log_path) os.makedirs(FLAGS.log_path) if os.path.isdir(FLAGS.saved_path): shutil.rmtree(FLAGS.saved_path) os.makedirs(FLAGS.saved_path) output_file = open(FLAGS.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format( FLAGS.flag_values_dict())) best_loss = 1e5 best_epoch = 0 with tf.Session(config=session_conf) as sess: train_writer = tf.summary.FileWriter( FLAGS.log_path + os.sep + 'train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_path + os.sep + 'test') sess.run(init) for epoch in range(FLAGS.num_epochs): sess.run(train_iterator.initializer) sess.run(test_iterator.initializer) train_handle = sess.run(train_iterator.string_handle()) test_handle = sess.run(test_iterator.string_handle()) train_iter = 0 while True: try: _, tr_loss, tr_accuracy, summary, step = sess.run( [train_op, loss, accuracy, merged, global_step], feed_dict={ handle: train_handle, keep_prob: FLAGS.dropout }) print( "Epoch: {}/{}, Iteration: {}/{}, Loss: {}, Accuracy: {}" .format(epoch + 1, FLAGS.num_epochs, train_iter + 1, num_training_iters, tr_loss, tr_accuracy)) train_writer.add_summary(summary, step) train_iter += 1 except (tf.errors.OutOfRangeError, StopIteration): break if epoch % FLAGS.test_interval == 0: loss_ls = [] loss_summary = tf.Summary() accuracy_ls = [] accuracy_summary = tf.Summary() confusion_matrix = np.zeros([num_classes, num_classes], np.int32) num_samples = 0 while True: try: test_loss, test_accuracy, test_confusion, samples = sess.run( [loss, accuracy, confusion, batch_size], feed_dict={ handle: test_handle, keep_prob: 1.0 }) loss_ls.append(test_loss * samples) accuracy_ls.append(test_accuracy * samples) confusion_matrix += test_confusion num_samples += samples except (tf.errors.OutOfRangeError, StopIteration): break mean_test_loss = sum(loss_ls) / num_samples loss_summary.value.add(tag='loss', simple_value=mean_test_loss) test_writer.add_summary(loss_summary, epoch) mean_test_accuracy = sum(accuracy_ls) / num_samples accuracy_summary.value.add(tag='accuracy', simple_value=mean_test_accuracy) test_writer.add_summary(accuracy_summary, epoch) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n" .format(epoch + 1, FLAGS.num_epochs, mean_test_loss, mean_test_accuracy, confusion_matrix)) print("Epoch: {}/{}, Final loss: {}, Final accuracy: {}". format(epoch + 1, FLAGS.num_epochs, mean_test_loss, mean_test_accuracy)) if mean_test_loss + FLAGS.es_min_delta < best_loss: best_loss = mean_test_loss best_epoch = epoch saver.save( sess, FLAGS.saved_path + os.sep + "char_level_cnn") if epoch - best_epoch > FLAGS.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, best_loss)) break output_file.close()
def train(): num_classes = get_num_classes(FLAGS.train_set) model = Very_deep_cnn(batch_size=FLAGS.batch_size, num_classes=num_classes, depth=FLAGS.depth, num_embedding=len(FLAGS.alphabet)) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True training_set, num_training_iters = create_dataset(FLAGS.train_set, FLAGS.alphabet, FLAGS.max_length, FLAGS.batch_size, True) test_set, num_test_iters = create_dataset(FLAGS.test_set, FLAGS.alphabet, FLAGS.max_length, FLAGS.batch_size, False) train_iterator = training_set.make_initializable_iterator() test_iterator = test_set.make_initializable_iterator() handle = tf.placeholder(tf.string, shape=[]) is_training = tf.placeholder(tf.bool, name='is_training') iterator = tf.data.Iterator.from_string_handle(handle, training_set.output_types, training_set.output_shapes) texts, labels = iterator.get_next() logits = model.forward(texts, is_training) loss = model.loss(logits, labels) loss_summary = tf.summary.scalar("loss", loss) accuracy = model.accuracy(logits, labels) accuracy_sumary = tf.summary.scalar("accuracy", accuracy) batch_size = tf.unstack(tf.shape(texts))[0] confusion = model.confusion_matrix(logits, labels) global_step = tf.Variable(0, name="global_step", trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): lr = tf.train.exponential_decay(FLAGS.lr, global_step, FLAGS.num_epochs * num_training_iters, 0.96, staircase=True) optimizer = tf.train.MomentumOptimizer(lr, FLAGS.momentum) gradients, variables = zip(*optimizer.compute_gradients(loss)) gradients, _ = tf.clip_by_global_norm(gradients, 5.0) train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step) merged = tf.summary.merge([loss_summary, accuracy_sumary]) init = tf.global_variables_initializer() saver = tf.train.Saver() if os.path.isdir(FLAGS.log_path): shutil.rmtree(FLAGS.log_path) os.makedirs(FLAGS.log_path) if os.path.isdir(FLAGS.saved_path): shutil.rmtree(FLAGS.saved_path) os.makedirs(FLAGS.saved_path) output_file = open(FLAGS.saved_path + os.sep + "logs.txt", "w") output_file.write("Model's parameters: {}".format(FLAGS.flag_values_dict())) best_loss = 1e5 best_epoch = 0 with tf.Session(config=session_conf) as sess: train_writer = tf.summary.FileWriter(FLAGS.log_path + os.sep + 'train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_path + os.sep + 'test') sess.run(init) for epoch in range(FLAGS.num_epochs): sess.run(train_iterator.initializer) sess.run(test_iterator.initializer) train_handle = sess.run(train_iterator.string_handle()) test_handle = sess.run(test_iterator.string_handle()) train_iter = 0 while True: try: _, tr_loss, tr_accuracy, summary, step = sess.run( [train_op, loss, accuracy, merged, global_step], feed_dict={handle: train_handle, is_training: True}) print("Epoch: {}/{}, Iteration: {}/{}, Loss: {}, Accuracy: {}".format( epoch + 1, FLAGS.num_epochs, train_iter + 1, num_training_iters, tr_loss, tr_accuracy)) train_writer.add_summary(summary, step) train_iter += 1 except (tf.errors.OutOfRangeError, StopIteration): break if epoch % FLAGS.test_interval == 0: loss_ls = [] loss_summary = tf.Summary() accuracy_ls = [] accuracy_summary = tf.Summary() confusion_matrix = np.zeros([num_classes, num_classes], np.int32) num_samples = 0 while True: try: test_loss, test_accuracy, test_confusion, samples = sess.run( [loss, accuracy, confusion, batch_size], feed_dict={handle: test_handle, is_training: False}) loss_ls.append(test_loss * samples) accuracy_ls.append(test_accuracy * samples) confusion_matrix += test_confusion num_samples += samples except (tf.errors.OutOfRangeError, StopIteration): break mean_test_loss = sum(loss_ls) / num_samples loss_summary.value.add(tag='loss', simple_value=mean_test_loss) test_writer.add_summary(loss_summary, epoch) mean_test_accuracy = sum(accuracy_ls) / num_samples accuracy_summary.value.add(tag='accuracy', simple_value=mean_test_accuracy) test_writer.add_summary(accuracy_summary, epoch) output_file.write( "Epoch: {}/{} \nTest loss: {} Test accuracy: {} \nTest confusion matrix: \n{}\n\n".format( epoch + 1, FLAGS.num_epochs, mean_test_loss, mean_test_accuracy, confusion_matrix)) print("Epoch: {}/{}, Final loss: {}, Final accuracy: {}".format(epoch + 1, FLAGS.num_epochs, mean_test_loss, mean_test_accuracy)) if mean_test_loss + FLAGS.es_min_delta < best_loss: best_loss = mean_test_loss best_epoch = epoch saver.save(sess, "{}/char_level_cnn".format(FLAGS.saved_path)) if epoch - best_epoch > FLAGS.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {} at epoch {}".format(epoch, best_loss, best_epoch)) break output_file.close()
item_features.rename(columns={'product_id': 'item_id'}, inplace=True) user_features.rename(columns={'household_key': 'user_id'}, inplace=True) # data filtering data = prefilter_items(data, item_features=item_features, take_n_popular=5000) candidates = pd.DataFrame(data['user_id'].unique()) candidates = candidates.rename(columns={0: 'user_id'}) recommender = MainRecommender(data) # Рекомендации по BM25 взвешиванию candidates['candidates'] = candidates['user_id'].apply(lambda x: recommender.get_bm25_recommendations(x, N=100)) # Создадим датафрейм в целевой переменной и добавим фичи юзеров и товаров targets = create_dataset(data=data, data_candidates=candidates, users_info=user_features, items_info=item_features) # Сгенерируем новые фичи targets = dataset_processing(dataset=targets, data=data, items_info=item_features) # Разобьем даатсет X_train = targets.drop('target', axis=1) y_train = targets[['target']] cat_feats = [ 'user_id', 'item_id', 'department', 'commodity_desc', 'sub_commodity_desc', 'curr_size_of_product',