def read_one_with_window(self, window_size, divide): # sequence_matrix = array[size of 1st doc][data.x, data.y, data.z, data.gt] sequence_matrix = self.get_sequence() # d_x1 = array[size of 1st doc][x, y, z] d_x = preprocess_sequence(sequence_matrix[:, 0 : self.n_in]) # d_x1 = preprocess_sequence(sequence_matrix[:, 0:self.n_in]) # d_y1 = array[size of 1st doc][labels] d_y = sequence_matrix[:, self.n_in : self.n_in + 1].reshape(-1) n_samples = d_x.shape[0] - window_size + 1 d_x_window = [d_x[i : i + window_size].flatten() for i in xrange(n_samples)] d_y_window = [d_y[i + window_size / 2] for i in xrange(n_samples)] if not divide: set_x = theano.shared(numpy.asarray(d_x_window, dtype=theano.config.floatX), borrow=True) set_y = T.cast(theano.shared(numpy.asarray(d_y_window, dtype=theano.config.floatX), borrow=True), "int32") return (set_x, set_y) data = zip(d_x_window, d_y_window) visible_seqs = [] for label in xrange(7): d_x_for_label = [] for row in data: if row[-1] == label: d_x_for_label.append(row[0]) # data_for_cur_label = all_data[numpy.where(all_data[:,1] == label)] set_x = theano.shared(numpy.asarray(d_x_for_label, dtype=theano.config.floatX), borrow=True) visible_seqs.append(set_x) return visible_seqs
def read_all(self): # sequence_matrix = array[size of 1st doc][data.x, data.y, data.z, data.gt] sequence_matrix = self.get_sequence() # d_x1 = array[size of 1st doc][x, y, z] d_x1 = preprocess_sequence(sequence_matrix[:, 0 : self.n_in]) # d_y1 = array[size of 1st doc][labels] d_y1 = sequence_matrix[:, self.n_in : self.n_in + 1].reshape(-1) # data_x_ar = union for (x, y, z) coordinates in all files data_x = d_x1 # data_y_ar = union for labels in all files data_y = d_y1 for t in range(len(self.seqs) - 1): # sequence_matrix = array[size of t-th doc][data.x, data.y, data.z, data.gt] sequence_matrix = self.get_sequence() # d_x = array[size of t-th doc][x, y, z] d_x = preprocess_sequence(sequence_matrix[:, 0 : self.n_in]) # d_y = array[size of t-th doc][labels] d_y = sequence_matrix[:, self.n_in : self.n_in + 1].reshape(-1) # concatenate data in current file with data in prev files in one array data_x = numpy.vstack((data_x, d_x)) data_y = numpy.concatenate((data_y, d_y)) gc.collect() set_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=True) set_y = T.cast(theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=True), "int32") return (set_x, set_y)
def read_next_doc(self): # sequence_matrix = array[size of doc][data.x, data.y, data.z, data.gt] sequence_matrix = self.get_sequence() # d_x = array[size of doc][x, y, z] d_x = preprocess_sequence(sequence_matrix[:, 0 : self.n_in]) # d_y = array[size of doc][labels] d_y = sequence_matrix[:, self.n_in : self.n_in + 1].reshape(-1) data_x = d_x data_y = d_y gc.collect() set_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=True) set_y = T.cast(theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=True), "int32") return (set_x, set_y)