Beispiel #1
0
def create_label_after_das(da_output_seq, algo, rank, window):
    if algo == "avg_disp":
        d_x = create_av_disp(
            sequence = da_output_seq,
            rank = rank,
            window_size = window
        )
    elif algo == "avg":
        d_x = create_av(
            sequence = da_output_seq,
            rank = rank,
            window_size = window
        )
    elif algo == "filter+avg":
        d_x = create_av(
            sequence = filter_data(da_output_seq),
            rank = rank,
            window_size = window
        )
    elif algo == "filter+avg_disp":
        d_x = create_av_disp(
            sequence = filter_data(da_output_seq),
            rank = rank,
            window_size = window
        )
    else:
        d_x = [numpy.mean(create_int_labels(
            sequence = da_output_seq,
            rank = rank
        ))]
    return d_x[0]
Beispiel #2
0
    def read_all(self):
        # sequence_matrix = array[size of 1st doc][ data.z, data.gt]
        sequence_matrix = self.get_sequence()

        # d_x1 = array[size of 1st doc][z]
        d_x1 = filter_data(
            sequence = sequence_matrix[:, self.n_in-1]
        )
        
        # d_y1 = array[size of 1st doc][labels]
        d_y1 = sequence_matrix[:, self.n_in]

        # data_x_ar = union for z-coordinate in all files
        data_x = d_x1
        
        # data_y_ar = union for labels in all files
        data_y = d_y1
        
        for t in range(len(self.seqs) - 1):
            # sequence_matrix = array[size of t-th doc][data.z, data.gt]
            sequence_matrix = self.get_sequence()

            # d_x = array[size of t-th doc][z]
            d_x = filter_data(
                sequence = sequence_matrix[:, self.n_in-1]
            )
            
            # d_y = array[size of t-th doc][labels]
            d_y = sequence_matrix[:, self.n_in]
            
            # concatenate data in current file with data in prev files in one array
            data_x = numpy.concatenate((data_x, d_x))
            data_y = numpy.concatenate((data_y, d_y))
                            
            gc.collect()
        
        set_x = theano.shared(numpy.asarray(data_x,
                                                   dtype=theano.config.floatX),
                                     borrow=True)
        set_y = T.cast(theano.shared(numpy.asarray(data_y,
                                                   dtype=theano.config.floatX),
                                     borrow=True), 'int32')
        
        return (set_x, set_y) 
Beispiel #3
0
 def read_next_doc(self, algo, rank=1, window=1, divide = False):    
    
     # sequence_matrix = array[size of doc][data.z, data.gt]
     sequence_matrix = self.get_sequence()
     
     # d_x = array[size of doc][z]
     if algo == "filter":
         d_x = filter_data(
             sequence = sequence_matrix[:, self.n_in-1]
         )
     elif algo == "normalize_1_1":
         d_x = normalize_sequence_1_1(
             sequence = sequence_matrix[:, self.n_in-1]
         )
     elif algo == "normalize_0_1":
         d_x = normalize_sequence_0_1(
             sequence = sequence_matrix[:, self.n_in-1]
         )
     elif algo == "int_labels":
         d_x = create_int_labels(
             sequence = sequence_matrix[:, self.n_in-1],
             rank = rank
         )
     elif algo == "avg_disp":
         d_x = create_av_disp(
             sequence = sequence_matrix[:, self.n_in-1],
             rank = rank,
             window_size = window
         )
     elif algo == "avg":
         d_x = create_av(
             sequence = sequence_matrix[:, self.n_in-1],
             rank = rank,
             window_size = window
         )
     elif algo == "filter+avg":
         d_x = create_av(
             sequence = filter_data(sequence_matrix[:, self.n_in-1]),
             rank = rank,
             window_size = window
         )
     elif algo == "filter+avg_disp":
         d_x = create_av_disp(
             sequence = filter_data(sequence_matrix[:, self.n_in-1]),
             rank = rank,
             window_size = window
         )
     else:
         d_x = sequence_matrix[:, self.n_in-1]
     
     # d_y = array[size of doc][labels]
     d_y = sequence_matrix[:, self.n_in]
     d_y = d_y[window/2: len(d_y) + window/2 -window +1]
        
     gc.collect()
     if not divide:
         set_x = theano.shared(numpy.asarray(d_x),
                                  borrow=True)
         set_y = T.cast(theano.shared(numpy.asarray(d_y,
                                                    dtype=theano.config.floatX),
                                      borrow=True), 'int32')       
         return (set_x, set_y)
     
     data = zip(d_x, d_y) #pairs (modified z-coord, label)
     visible_seqs = []
     
     for label in xrange(7):
         d_x_for_label=[]
         for row in data:
             if row[-1] == label:
                 d_x_for_label.append(row[0])
         set_x = theano.shared(numpy.asarray(d_x_for_label,
                                                    dtype=theano.config.floatX),
                                      borrow=True)
         
         visible_seqs.append(set_x)
     
     return visible_seqs