Python Dataset.get_entries 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: libact.base.dataset

클래스/타입: Dataset

메소드/함수: get_entries

hotexamples.com에서의 예제들: 2

Python Dataset.get_entries - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 libact.base.dataset.Dataset.get_entries에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Dataset(20)

update(6)

format_sklearn(4)

get_entries(2)

get_labeled_entries(2)

len_labeled(2)

append(1)

get_unlabeled_entries(1)

예제 #1

파일 보기

def split_train_test(test_size):
    # choose a dataset with unbalanced class instances
    #data = sklearn.datasets.fetch_mldata('segment')
    data = sklearn.datasets.fetch_mldata('vehicle')

    X = StandardScaler().fit_transform(data['data'])
    target = np.unique(data['target'])
    # mapping the targets to 0 to n_classes-1
    y = np.array([np.where(target == i)[0][0] for i in data['target']])

    X_trn, X_tst, y_trn, y_tst = \
        train_test_split(X, y, test_size=test_size, stratify=y)

    # making sure each class appears ones initially
    init_y_ind = np.array(
        [np.where(y_trn == i)[0][0] for i in range(len(target))])
    y_ind = np.array([i for i in range(len(X_trn)) if i not in init_y_ind])
    trn_ds = Dataset(
        np.vstack((X_trn[init_y_ind], X_trn[y_ind])),
        np.concatenate((y_trn[init_y_ind], [None] * (len(y_ind)))))
    tst_ds = Dataset(X_tst, y_tst)

    fully_labeled_trn_ds = Dataset(
        np.vstack((X_trn[init_y_ind], X_trn[y_ind])),
        np.concatenate((y_trn[init_y_ind], y_trn[y_ind])))

    cost_matrix = 2000. * np.random.rand(len(target), len(target))
    np.fill_diagonal(cost_matrix, 0)

    print(trn_ds.get_entries()[0])
    print(np.shape(fully_labeled_trn_ds.get_entries()))
    return trn_ds, tst_ds, fully_labeled_trn_ds, cost_matrix

예제 #2

파일 보기

class UncertaintySampler(object):
    def __init__(self, X, y, labs, n=2):

        y = [yy if yy >= 0 else None for yy in y]

        self.dataset = Dataset(X, y)
        self.labs = labs

        self.uc = UncertaintySampling(self.dataset,
                                      method='lc',
                                      model=LinearSVC())
        self.n = n

    def get_next(self):
        print >> sys.stderr, 'get_next: start'
        out = self.uc.make_query(n=self.n)
        print >> sys.stderr, 'get_next: done'
        return out

    def set_label(self, idx, label):
        print >> sys.stderr, 'set_label: start'
        out = self.dataset.update(idx, label)
        print >> sys.stderr, 'set_label: done'
        return out

    def get_data(self):
        X, y = zip(*self.dataset.get_entries())
        X, y = np.vstack(X), np.array(
            [yy if yy is not None else -1 for yy in y])
        return X, y

    def n_hits(self):
        labels = np.array(zip(*self.dataset.get_entries())[1])
        return (labels == 1).sum()

    def n_labeled(self):
        return self.dataset.len_labeled()

    def is_labeled(self, idx):
        return idx in np.where(zip(*self.dataset.get_entries())[1])[0]

    def save(self, outpath):
        """ !! This should be updated to save in same format as simple_las """
        X, y = self.get_data()

        f = h5py.File(
            '%s-%s-%s.h5' %
            (outpath, 'uncertainty', datetime.now().strftime('%Y%m%d_%H%M%S')))
        f['X'] = X
        f['y'] = y
        f['labs'] = self.labs
        f.close()