Exemple #1
0
def train(data, test=False):
    batch_loader = BatchLoader(data)
    with batch_loader as batch:
        print('Preparing data...')

        if test:
            X_train, X_test, y_train, y_test = train_test_split(batch.features,
                                                                batch.labels,
                                                                test_size=0.2,
                                                                random_state=0)
        else:
            X_train = batch.features
            y_train = batch.labels

        print('Train...')
        text_classifier = RandomForestClassifier(n_estimators=10,
                                                 random_state=0)
        text_classifier.fit(X_train, y_train)

        print('Training finished!')

        if test:
            predictions = text_classifier.predict(X_test)

            print(confusion_matrix(y_test, predictions))
            print(classification_report(y_test, predictions))
            print(accuracy_score(y_test, predictions))

        return text_classifier, batch_loader.vectorizer
Exemple #2
0
    def __init__(self, config, rng):
        self.config = config
        self.rng = rng
        self.task = config.task
        self.model_dir = config.model_dir
        self.gpu_memory_fraction = config.gpu_memory_fraction
        self.checkpoint_secs = config.checkpoint_secs
        self.log_step = config.log_step
        self.num_epoch = config.num_epochs

        ## import data Loader ##
        data_dir = config.data_dir
        dataset_name = config.task
        batch_size = config.batch_size
        num_time_steps = config.num_time_steps
        num_node = config.num_node
        self.data_loader = BatchLoader(data_dir, dataset_name, batch_size,
                                       num_time_steps, num_node)

        ## Need to think about how we construct adj matrix(W)
        W = self.data_loader.adj
        laplacian = W / W.max()
        laplacian = scipy.sparse.csr_matrix(laplacian, dtype=np.float32)
        lmax = graph.lmax(laplacian)

        #idx2char = batchLoader_.idx2char
        #char2idx = batchLoader_.char2idx
        #batch_x, batch_y = batchLoader_.next_batch(0) 0:train 1:valid 2:test
        #batchLoader_.reset_batch_pointer(0)

        ## define model ##
        self.model = Model(config, laplacian, lmax)

        ## model saver / summary writer ##
        self.saver = tf.train.Saver()
        self.model_saver = tf.train.Saver(self.model.model_vars)
        self.summary_train_writer = tf.summary.FileWriter(self.model_dir +
                                                          '/train')
        self.summary_test_writer = tf.summary.FileWriter(self.model_dir +
                                                         '/test')

        sv = tf.train.Supervisor(logdir=self.model_dir,
                                 is_chief=True,
                                 saver=self.saver,
                                 summary_op=None,
                                 summary_writer=self.summary_train_writer,
                                 save_summaries_secs=300,
                                 save_model_secs=self.checkpoint_secs,
                                 global_step=self.model.model_step)

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=self.gpu_memory_fraction,
            allow_growth=True)  # seems to be not working
        sess_config = tf.ConfigProto(allow_soft_placement=True,
                                     gpu_options=gpu_options)

        self.sess = sv.prepare_or_wait_for_session(config=sess_config)
Exemple #3
0
def predict_to_csv(model, vectorizer, data, dest='prediction.csv'):
    batch_loader = BatchLoader(data, vectorizer, has_labels=False)
    with batch_loader as batch:
        predictions = model.predict(batch.features)

        df = pandas.DataFrame(predictions,
                              columns=['Category'],
                              index=batch.ids)
        df.index.name = 'Id'

        df.to_csv(dest, index=True, header=True)

    print("Predictions exported to {}.".format(dest))
Exemple #4
0
    def __init__(self, config, rng):
        self.config = config
        self.rng = rng
        self.task = config.task
        self.model_dir = config.model_dir
        self.gpu_memory_fraction = config.gpu_memory_fraction
        self.checkpoint_secs = config.checkpoint_secs
        self.log_step = config.log_step
        self.num_epoch = config.num_epochs

        ## import data Loader ##
        data_dir = config.data_dir
        dataset_name = config.task
        batch_size = config.batch_size
        num_time_steps = config.num_time_steps
        self.data_loader = BatchLoader(data_dir, dataset_name, batch_size,
                                       num_time_steps)

        ## Need to think about how we construct adj matrix(W)
        # Oh no. Are you kidding me??
        W = self.data_loader.adj
        laplacian = W / W.max()  # 作了归一化
        laplacian = scipy.sparse.csr_matrix(laplacian,
                                            dtype=np.float32)  # 将矩阵用CSR的方式存储
        lmax = graph.lmax(laplacian)  # Q:作用未知

        ## define model ##
        self.model = Model(config, laplacian, lmax)

        ## model saver / summary writer ##
        self.saver = tf.train.Saver()
        self.model_saver = tf.train.Saver(self.model.model_vars)
        self.summary_writer = tf.summary.FileWriter(self.model_dir)

        sv = tf.train.Supervisor(logdir=self.model_dir,
                                 is_chief=True,
                                 saver=self.saver,
                                 summary_op=None,
                                 summary_writer=self.summary_writer,
                                 save_summaries_secs=300,
                                 save_model_secs=self.checkpoint_secs,
                                 global_step=self.model.model_step)

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=self.gpu_memory_fraction,
            allow_growth=True)  # seems to be not working
        sess_config = tf.ConfigProto(allow_soft_placement=True,
                                     gpu_options=gpu_options)

        self.sess = sv.prepare_or_wait_for_session(config=sess_config)
Exemple #5
0
#!/bin/python


from utils import BatchLoader

loader = BatchLoader('data/training_eeg.csv', 5, 2)
loader.next_batch()

Exemple #6
0
batch_size = 100
display_step = 10

# Network Parameters
num_features = 10 # Number of dimensions in tangent space produced by pyriemann
timesteps = 6 # Number of eeg epochs per sequence
num_hidden = 2048 # hidden layer num of neurons
num_classes = 2 # distracted or concentrated
num_layers = 1 # number of hidden layers
input_keep_prob = 1 # portion of incoming connections to keep
output_keep_prob = 0.5 # portion of outgoing connections to keep

logging.info("LR = " + str(learning_rate) + " Epochs = " + str(epochs))

# Initialize data feed
train_loader = BatchLoader('data/training_eeg.csv', batch_size, timesteps, num_features, num_classes, train=True)
valid_loader = BatchLoader('data/valid_eeg.csv', batch_size, timesteps, num_features, num_classes, train=False)

# tf Graph input
X = tf.placeholder("float", [batch_size, timesteps, num_features])
Y = tf.placeholder("int64", [batch_size])

# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([num_classes]))
}

Exemple #7
0
    def __init__(self, config, rng):
        self.config = config
        self.rng = rng
        self.model_dir = config.model_dir
        self.gpu_memory_fraction = config.gpu_memory_fraction
        self.checkpoint_secs = config.checkpoint_secs
        self.log_step = config.log_step
        self.num_epoch = config.num_epochs
        self.stop_win_size = config.stop_win_size
        self.stop_early = config.stop_early

        ## import data Loader ##ir
        batch_size = config.batch_size
        server_name = config.server_name
        mode = config.mode
        target = config.target
        sample_rate = config.sample_rate
        win_size = config.win_size
        hist_range = config.hist_range
        s_month = config.s_month
        e_month = config.e_month
        e_date = config.e_date
        s_date = config.s_date
        data_rm = config.data_rm
        coarsening_level = config.coarsening_level
        cnn_mode = config.conv
        is_coarsen = config.is_coarsen

        self.data_loader = BatchLoader(server_name, mode, target, sample_rate,
                                       win_size, hist_range, s_month, s_date,
                                       e_month, e_date, data_rm, batch_size,
                                       coarsening_level, cnn_mode, is_coarsen)

        actual_node = self.data_loader.adj.shape[0]
        if config.conv == 'gcnn':
            graphs = self.data_loader.graphs
            if config.is_coarsen:
                L = [
                    graph.laplacian(A, normalized=config.normalized)
                    for A in graphs
                ]
            else:
                L = [
                    graph.laplacian(self.data_loader.adj,
                                    normalized=config.normalized)
                ] * len(graphs)
        elif config.conv == 'cnn':
            L = [actual_node]
            tmp_node = actual_node
            while tmp_node > 0:
                tmp_node = int(tmp_node / 2)
                L.append(tmp_node)
        else:
            raise ValueError("Unsupported config.conv {}".format(config.conv))

        tf.reset_default_graph()
        ## define model ##
        self.model = Model(config, L, actual_node)

        ## model saver / summary writer ##
        self.saver = tf.train.Saver()
        self.model_saver = tf.train.Saver(self.model.model_vars)
        self.summary_writer = tf.summary.FileWriter(self.model_dir)
        # Checkpoint
        # meta file: describes the saved graph structure, includes
        # GraphDef, SaverDef, and so on; then apply
        # tf.train.import_meta_graph('/tmp/model.ckpt.meta'),
        # will restore Saver and Graph.

        # index file: it is a string-string immutable
        # table(tensorflow::table::Table). Each key is a name of a tensor
        # and its value is a serialized BundleEntryProto.
        # Each BundleEntryProto describes the metadata of a
        # tensor: which of the "data" files contains the content of a tensor,
        # the offset into that file, checksum, some auxiliary data, etc.
        #
        # data file: it is TensorBundle collection, save the values of all variables.
        sv = tf.train.Supervisor(logdir=self.model_dir,
                                 is_chief=True,
                                 saver=self.saver,
                                 summary_op=None,
                                 summary_writer=self.summary_writer,
                                 save_summaries_secs=300,
                                 save_model_secs=self.checkpoint_secs,
                                 global_step=self.model.model_step)

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=self.gpu_memory_fraction,
            allow_growth=True)  # seems to be not working
        sess_config = tf.ConfigProto(allow_soft_placement=True,
                                     gpu_options=gpu_options)
        #
        self.sess = sv.prepare_or_wait_for_session(config=sess_config)