def train(data, test=False): batch_loader = BatchLoader(data) with batch_loader as batch: print('Preparing data...') if test: X_train, X_test, y_train, y_test = train_test_split(batch.features, batch.labels, test_size=0.2, random_state=0) else: X_train = batch.features y_train = batch.labels print('Train...') text_classifier = RandomForestClassifier(n_estimators=10, random_state=0) text_classifier.fit(X_train, y_train) print('Training finished!') if test: predictions = text_classifier.predict(X_test) print(confusion_matrix(y_test, predictions)) print(classification_report(y_test, predictions)) print(accuracy_score(y_test, predictions)) return text_classifier, batch_loader.vectorizer
def __init__(self, config, rng): self.config = config self.rng = rng self.task = config.task self.model_dir = config.model_dir self.gpu_memory_fraction = config.gpu_memory_fraction self.checkpoint_secs = config.checkpoint_secs self.log_step = config.log_step self.num_epoch = config.num_epochs ## import data Loader ## data_dir = config.data_dir dataset_name = config.task batch_size = config.batch_size num_time_steps = config.num_time_steps num_node = config.num_node self.data_loader = BatchLoader(data_dir, dataset_name, batch_size, num_time_steps, num_node) ## Need to think about how we construct adj matrix(W) W = self.data_loader.adj laplacian = W / W.max() laplacian = scipy.sparse.csr_matrix(laplacian, dtype=np.float32) lmax = graph.lmax(laplacian) #idx2char = batchLoader_.idx2char #char2idx = batchLoader_.char2idx #batch_x, batch_y = batchLoader_.next_batch(0) 0:train 1:valid 2:test #batchLoader_.reset_batch_pointer(0) ## define model ## self.model = Model(config, laplacian, lmax) ## model saver / summary writer ## self.saver = tf.train.Saver() self.model_saver = tf.train.Saver(self.model.model_vars) self.summary_train_writer = tf.summary.FileWriter(self.model_dir + '/train') self.summary_test_writer = tf.summary.FileWriter(self.model_dir + '/test') sv = tf.train.Supervisor(logdir=self.model_dir, is_chief=True, saver=self.saver, summary_op=None, summary_writer=self.summary_train_writer, save_summaries_secs=300, save_model_secs=self.checkpoint_secs, global_step=self.model.model_step) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self.gpu_memory_fraction, allow_growth=True) # seems to be not working sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) self.sess = sv.prepare_or_wait_for_session(config=sess_config)
def predict_to_csv(model, vectorizer, data, dest='prediction.csv'): batch_loader = BatchLoader(data, vectorizer, has_labels=False) with batch_loader as batch: predictions = model.predict(batch.features) df = pandas.DataFrame(predictions, columns=['Category'], index=batch.ids) df.index.name = 'Id' df.to_csv(dest, index=True, header=True) print("Predictions exported to {}.".format(dest))
def __init__(self, config, rng): self.config = config self.rng = rng self.task = config.task self.model_dir = config.model_dir self.gpu_memory_fraction = config.gpu_memory_fraction self.checkpoint_secs = config.checkpoint_secs self.log_step = config.log_step self.num_epoch = config.num_epochs ## import data Loader ## data_dir = config.data_dir dataset_name = config.task batch_size = config.batch_size num_time_steps = config.num_time_steps self.data_loader = BatchLoader(data_dir, dataset_name, batch_size, num_time_steps) ## Need to think about how we construct adj matrix(W) # Oh no. Are you kidding me?? W = self.data_loader.adj laplacian = W / W.max() # 作了归一化 laplacian = scipy.sparse.csr_matrix(laplacian, dtype=np.float32) # 将矩阵用CSR的方式存储 lmax = graph.lmax(laplacian) # Q:作用未知 ## define model ## self.model = Model(config, laplacian, lmax) ## model saver / summary writer ## self.saver = tf.train.Saver() self.model_saver = tf.train.Saver(self.model.model_vars) self.summary_writer = tf.summary.FileWriter(self.model_dir) sv = tf.train.Supervisor(logdir=self.model_dir, is_chief=True, saver=self.saver, summary_op=None, summary_writer=self.summary_writer, save_summaries_secs=300, save_model_secs=self.checkpoint_secs, global_step=self.model.model_step) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self.gpu_memory_fraction, allow_growth=True) # seems to be not working sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) self.sess = sv.prepare_or_wait_for_session(config=sess_config)
#!/bin/python from utils import BatchLoader loader = BatchLoader('data/training_eeg.csv', 5, 2) loader.next_batch()
batch_size = 100 display_step = 10 # Network Parameters num_features = 10 # Number of dimensions in tangent space produced by pyriemann timesteps = 6 # Number of eeg epochs per sequence num_hidden = 2048 # hidden layer num of neurons num_classes = 2 # distracted or concentrated num_layers = 1 # number of hidden layers input_keep_prob = 1 # portion of incoming connections to keep output_keep_prob = 0.5 # portion of outgoing connections to keep logging.info("LR = " + str(learning_rate) + " Epochs = " + str(epochs)) # Initialize data feed train_loader = BatchLoader('data/training_eeg.csv', batch_size, timesteps, num_features, num_classes, train=True) valid_loader = BatchLoader('data/valid_eeg.csv', batch_size, timesteps, num_features, num_classes, train=False) # tf Graph input X = tf.placeholder("float", [batch_size, timesteps, num_features]) Y = tf.placeholder("int64", [batch_size]) # Define weights weights = { 'out': tf.Variable(tf.random_normal([num_hidden, num_classes])) } biases = { 'out': tf.Variable(tf.random_normal([num_classes])) }
def __init__(self, config, rng): self.config = config self.rng = rng self.model_dir = config.model_dir self.gpu_memory_fraction = config.gpu_memory_fraction self.checkpoint_secs = config.checkpoint_secs self.log_step = config.log_step self.num_epoch = config.num_epochs self.stop_win_size = config.stop_win_size self.stop_early = config.stop_early ## import data Loader ##ir batch_size = config.batch_size server_name = config.server_name mode = config.mode target = config.target sample_rate = config.sample_rate win_size = config.win_size hist_range = config.hist_range s_month = config.s_month e_month = config.e_month e_date = config.e_date s_date = config.s_date data_rm = config.data_rm coarsening_level = config.coarsening_level cnn_mode = config.conv is_coarsen = config.is_coarsen self.data_loader = BatchLoader(server_name, mode, target, sample_rate, win_size, hist_range, s_month, s_date, e_month, e_date, data_rm, batch_size, coarsening_level, cnn_mode, is_coarsen) actual_node = self.data_loader.adj.shape[0] if config.conv == 'gcnn': graphs = self.data_loader.graphs if config.is_coarsen: L = [ graph.laplacian(A, normalized=config.normalized) for A in graphs ] else: L = [ graph.laplacian(self.data_loader.adj, normalized=config.normalized) ] * len(graphs) elif config.conv == 'cnn': L = [actual_node] tmp_node = actual_node while tmp_node > 0: tmp_node = int(tmp_node / 2) L.append(tmp_node) else: raise ValueError("Unsupported config.conv {}".format(config.conv)) tf.reset_default_graph() ## define model ## self.model = Model(config, L, actual_node) ## model saver / summary writer ## self.saver = tf.train.Saver() self.model_saver = tf.train.Saver(self.model.model_vars) self.summary_writer = tf.summary.FileWriter(self.model_dir) # Checkpoint # meta file: describes the saved graph structure, includes # GraphDef, SaverDef, and so on; then apply # tf.train.import_meta_graph('/tmp/model.ckpt.meta'), # will restore Saver and Graph. # index file: it is a string-string immutable # table(tensorflow::table::Table). Each key is a name of a tensor # and its value is a serialized BundleEntryProto. # Each BundleEntryProto describes the metadata of a # tensor: which of the "data" files contains the content of a tensor, # the offset into that file, checksum, some auxiliary data, etc. # # data file: it is TensorBundle collection, save the values of all variables. sv = tf.train.Supervisor(logdir=self.model_dir, is_chief=True, saver=self.saver, summary_op=None, summary_writer=self.summary_writer, save_summaries_secs=300, save_model_secs=self.checkpoint_secs, global_step=self.model.model_step) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self.gpu_memory_fraction, allow_growth=True) # seems to be not working sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) # self.sess = sv.prepare_or_wait_for_session(config=sess_config)