def create_label_after_das(da_output_seq, algo, rank, window): if algo == "avg_disp": d_x = create_av_disp( sequence = da_output_seq, rank = rank, window_size = window ) elif algo == "avg": d_x = create_av( sequence = da_output_seq, rank = rank, window_size = window ) elif algo == "filter+avg": d_x = create_av( sequence = filter_data(da_output_seq), rank = rank, window_size = window ) elif algo == "filter+avg_disp": d_x = create_av_disp( sequence = filter_data(da_output_seq), rank = rank, window_size = window ) else: d_x = [numpy.mean(create_int_labels( sequence = da_output_seq, rank = rank ))] return d_x[0]
def read_next_doc(self, algo, rank=1, window=1, divide = False): # sequence_matrix = array[size of doc][data.z, data.gt] sequence_matrix = self.get_sequence() # d_x = array[size of doc][z] if algo == "filter": d_x = filter_data( sequence = sequence_matrix[:, self.n_in-1] ) elif algo == "normalize_1_1": d_x = normalize_sequence_1_1( sequence = sequence_matrix[:, self.n_in-1] ) elif algo == "normalize_0_1": d_x = normalize_sequence_0_1( sequence = sequence_matrix[:, self.n_in-1] ) elif algo == "int_labels": d_x = create_int_labels( sequence = sequence_matrix[:, self.n_in-1], rank = rank ) elif algo == "avg_disp": d_x = create_av_disp( sequence = sequence_matrix[:, self.n_in-1], rank = rank, window_size = window ) elif algo == "avg": d_x = create_av( sequence = sequence_matrix[:, self.n_in-1], rank = rank, window_size = window ) elif algo == "filter+avg": d_x = create_av( sequence = filter_data(sequence_matrix[:, self.n_in-1]), rank = rank, window_size = window ) elif algo == "filter+avg_disp": d_x = create_av_disp( sequence = filter_data(sequence_matrix[:, self.n_in-1]), rank = rank, window_size = window ) else: d_x = sequence_matrix[:, self.n_in-1] # d_y = array[size of doc][labels] d_y = sequence_matrix[:, self.n_in] d_y = d_y[window/2: len(d_y) + window/2 -window +1] gc.collect() if not divide: set_x = theano.shared(numpy.asarray(d_x), borrow=True) set_y = T.cast(theano.shared(numpy.asarray(d_y, dtype=theano.config.floatX), borrow=True), 'int32') return (set_x, set_y) data = zip(d_x, d_y) #pairs (modified z-coord, label) visible_seqs = [] for label in xrange(7): d_x_for_label=[] for row in data: if row[-1] == label: d_x_for_label.append(row[0]) set_x = theano.shared(numpy.asarray(d_x_for_label, dtype=theano.config.floatX), borrow=True) visible_seqs.append(set_x) return visible_seqs