import processor from model import Model # Command-line parameters tf.flags.DEFINE_boolean("eval_all", False, "Evaluate on all data (default: false)") tf.flags.DEFINE_string("dir", "", "Directory to read network from") tf.flags.DEFINE_string("data_file", "data/features.csv", "File to read data from (default: 'data/features.csv')") F = tf.flags.FLAGS F._parse_flags() # Get data print("Loading data...") x, y = processor.load_data(F.data_file) print("Data loaded.") print("Preparing data...") if F.eval_all: x = np.array(x) y = np.array(y) else: amount = int(0.1 * len(x)) x = np.array(x)[-amount:] y = np.array(y)[-amount:] print("Data prepared.") print("Initializing model...") checkpoint_file = tf.train.latest_checkpoint(os.path.join(F.dir, "checkpoints")) print("Reading checkpoint from {}".format(checkpoint_file))
tf.flags.DEFINE_integer("max_data", -1, "Maximum number of data points to use") tf.flags.DEFINE_integer("batch_size", 10, "Batch Size (default: 10)") tf.flags.DEFINE_integer("num_epochs", 20, "Number of training epochs (default: 100)") tf.flags.DEFINE_integer("evaluate_every", 50, "Evaluate model on dev set after this many steps (default: 5)") tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 20)") # Misc Parameters tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") F = tf.flags.FLAGS F._parse_flags() # Data print("Loading data...") input_text, input_extra, y = processor.load_data(F.max_data) print("Data loaded.") # Build vocabulary print("Building vocabulary...") max_document_length = max([len(t.split(" ")) for t in input_text]) vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) text = np.array(list(vocab_processor.fit_transform(input_text))) print("Vocabulary built.") # shuffle data print("Preparing data...") shuffle_indices = np.random.permutation(np.arange(len(y))) text_shuffled = text[shuffle_indices] extra_shuffled = np.array(extra)[shuffle_indices] y_shuffled = np.array(y)[shuffle_indices]
tf.flags.DEFINE_string("hidden_layer_sizes", "9", "Comma-separated list of hidden layer sizes (default: '9')") tf.flags.DEFINE_float("learning_rate", 0.0001, "Learning rate (default: 0.0001)") tf.flags.DEFINE_float("beta", 0.001, "Beta value for L2 regularization (default: 0.001)") tf.flags.DEFINE_integer("max_data", -1, "Maximum number of data points to use") tf.flags.DEFINE_integer("batch_size", 40, "Batch size (default: 40)") tf.flags.DEFINE_integer("num_epochs", 80, "Number of training epochs (default: 80)") tf.flags.DEFINE_integer("evaluate_every", 50, "Evaluate model on dev set after this many steps (default: 50)") tf.flags.DEFINE_string("data_file", "data/features.csv", "File to read data from (default: 'data/features.csv')") F = tf.flags.FLAGS F._parse_flags() # Get data print("Loading data...") x, y = processor.load_data(F.data_file, F.max_data) print("Data loaded.") print("Preparing data...") x = np.array(x) y = np.array(y) # split train vs test amount = int(0.1 * len(x)) x_train, x_eval = x[:-amount], x[-amount:] y_train, y_eval = y[:-amount], y[-amount:] print("Data prepared.") with tf.Session() as session: print("Initializing model...") network = Model(