コード例 #1
0
    def read_all_and_divide(self, rank, start_base):
        # sequence_matrix = array[size of 1st doc][data.x, data.y, data.z, data.gt]
        sequence_matrix = self.get_sequence()

        # d_x1 = array[size of 1st doc][x, y, z]
        d_x1 = preprocess_for_HMM(sequence_matrix[:, 0 : self.n_in], rank, start_base)
        # d_x1 = preprocess_sequence(sequence_matrix[:, 0:self.n_in])

        # d_y1 = array[size of 1st doc][labels]
        d_y1 = sequence_matrix[:, self.n_in : self.n_in + 1].reshape(-1)

        # data_x_ar = union for (x, y, z) coordinates in all files
        data_x = d_x1

        # data_y_ar = union for labels in all files
        data_y = d_y1

        for t in range(len(self.seqs) - 1):
            # sequence_matrix = array[size of t-th doc][data.x, data.y, data.z, data.gt]
            sequence_matrix = self.get_sequence()

            # d_x = array[size of t-th doc][x, y, z]
            d_x = preprocess_for_HMM(sequence_matrix[:, 0 : self.n_in], rank, start_base)
            # d_x = preprocess_sequence(sequence_matrix[:, 0:self.n_in])

            # d_y = array[size of t-th doc][labels]
            d_y = sequence_matrix[:, self.n_in : self.n_in + 1].reshape(-1)

            # concatenate data in current file with data in prev files in one array
            data_x = numpy.concatenate((data_x, d_x))
            data_y = numpy.concatenate((data_y, d_y))

            gc.collect()

        all_data = zip(data_x, data_y)
        all_visible_seqs = []

        for label in xrange(7):
            data_x_for_cur_label = []
            for row in all_data:
                if row[1] == label:
                    data_x_for_cur_label.append(row[0])
            # data_for_cur_label = all_data[numpy.where(all_data[:,1] == label)]

            set_x = theano.shared(numpy.asarray(data_x_for_cur_label, dtype=theano.config.floatX), borrow=True)

            all_visible_seqs.append((set_x, label))

        return all_visible_seqs
コード例 #2
0
    def read_all_seqs_on_labels(self, rank, start_base):
        all_visible_seqs = []
        for label in xrange(7):
            # visible_seqs = array[count of labels][size of each label in doc][data.x, data.y, data.z, data.gt]
            visible_seqs = self.get_sequence_on_labels()
            # visible_seqs[label] is ndarray

            if visible_seqs[label] != []:
                # d_x1 = array[size of 1st doc][x, y, z]
                d_x1 = preprocess_for_HMM(visible_seqs[label][:, 0 : self.n_in], rank, start_base)

                # d_y1 = array[size of 1st doc][labels]
                d_y1 = visible_seqs[label][:, self.n_in : self.n_in + 1].reshape(-1)

                # data_x_ar = union for (x, y, z) coordinates in all files
                data_x = d_x1

                # data_y_ar = union for labels in all files
                data_y = d_y1
            else:
                data_x = []
                data_y = []

            for t in range(len(self.seqs) - 1):
                # sequence_matrix = array[size of t-th doc][data.x, data.y, data.z, data.gt]
                visible_seqs = self.get_sequence_on_labels()

                if visible_seqs[label] != []:
                    # d_x = array[size of t-th doc]
                    # consider new labels for data
                    d_x = preprocess_for_HMM(visible_seqs[label][:, 0 : self.n_in], rank, start_base)

                    # d_y = array[size of t-th doc][labels]
                    d_y = visible_seqs[label][:, self.n_in : self.n_in + 1].reshape(-1)

                    # concatenate data in current file with data in prev files in one array
                    data_x = numpy.concatenate((data_x, d_x))
                    data_y = numpy.concatenate((data_y, d_y))

                gc.collect()

            set_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=True)
            set_y = T.cast(theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=True), "int32")
            all_visible_seqs.append((set_x, set_y))

        return all_visible_seqs
コード例 #3
0
    def read_doc_for_second_hmm(self, rank, start_base):
        # sequence_matrix = array[size of 1st doc][data.x, data.y, data.z, data.gt]
        sequence_matrix = self.get_sequence()

        d_x = preprocess_for_HMM(sequence_matrix[:, 0 : self.n_in], rank, start_base)

        d_y = sequence_matrix[:, self.n_in : self.n_in + 1].reshape(-1)

        set_x = theano.shared(d_x)
        set_y = theano.shared(d_y)

        return (set_x, set_y)
コード例 #4
0
    def read_all_for_second_hmm(self, rank, start_base):
        # sequence_matrix = array[size of 1st doc][data.x, data.y, data.z, data.gt]
        sequence_matrix = self.get_sequence()

        # d_x1 = array[size of 1st doc][x, y, z]
        d_x1 = preprocess_for_HMM(sequence_matrix[:, 0 : self.n_in], rank, start_base)

        # d_y1 = array[size of 1st doc][labels]
        d_y1 = sequence_matrix[:, self.n_in : self.n_in + 1].reshape(-1)

        # data_x_ar = union for (x, y, z) coordinates in all files
        data_x = []
        data_x.append(d_x1)

        # data_y_ar = union for labels in all files
        data_y = []
        data_y.append(d_y1)

        for t in range(len(self.seqs) - 1):
            # sequence_matrix = array[size of t-th doc][data.x, data.y, data.z, data.gt]
            sequence_matrix = self.get_sequence()

            # d_x = array[size of t-th doc][x, y, z]
            d_x = preprocess_for_HMM(sequence_matrix[:, 0 : self.n_in], rank, start_base)

            # d_y = array[size of t-th doc][labels]
            d_y = sequence_matrix[:, self.n_in : self.n_in + 1].reshape(-1)

            # concatenate data in current file with data in prev files in one array
            data_x.append(d_x)
            data_y.append(d_y)

            gc.collect()

        set_x = theano.shared(data_x)
        set_y = theano.shared(data_y)

        return (set_x, set_y)