Python FileStream.has_more_samples 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: skmultiflow.data.file_stream

클래스/타입: FileStream

메소드/함수: has_more_samples

hotexamples.com에서의 예제들: 6

Python FileStream.has_more_samples - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 skmultiflow.data.file_stream.FileStream.has_more_samples에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FileStream(30)

has_more_samples(6)

get_classes(3)

has_more_instances(2)

get_targets(2)

X(2)

estimated_remaining_instances(1)

get_num_targets(1)

get_target_values(1)

get_plot_name(1)

get_num_values_per_nominal_attribute(1)

get_num_nominal_attributes(1)

get_num_numerical_attributes(1)

get_attributes_header(1)

get_last_instance(1)

get_info(1)

get_data_info(1)

get_classes_header(1)

get_num_attributes(1)

예제 #1

파일 보기

def train_tree(csv_path, tree):

    print("Training the tree")

    stream = FileStream(csv_path)

    accuracy = 0
    n_samples = 0
    correct_cnt = 0

    t0 = time.time()

    while stream.has_more_samples():
        X, y = stream.next_sample()
        y_pred = tree.predict(X)
        if y[0] == y_pred[0]:
            correct_cnt += 1
        tree = tree.partial_fit(X, y)
        n_samples += 1

    t1 = time.time()
    total = t1 - t0

    accuracy = 100.0 * correct_cnt / n_samples

    print("Training data instances: ", n_samples)
    print("Tree trained on ", n_samples, " instances & has ", accuracy,
          "% accuracy.")
    print("Training tree completed in ", total, " (s)")

예제 #2

파일 보기

def test_file_stream(test_path, package_path):
    test_file = os.path.join(package_path,
                             'src/skmultiflow/data/datasets/sea_stream.csv')
    stream = FileStream(test_file)
    stream.prepare_for_use()

    assert stream.n_remaining_samples() == 40000

    expected_names = ['attrib1', 'attrib2', 'attrib3']
    assert stream.feature_names == expected_names

    expected_targets = [0, 1]
    assert stream.target_values == expected_targets

    assert stream.target_names == ['class']

    assert stream.n_features == 3

    assert stream.n_cat_features == 0

    assert stream.n_num_features == 3

    assert stream.n_targets == 1

    assert stream.get_data_info() == 'sea_stream.csv - 1 target(s), 2 classes'

    assert stream.has_more_samples() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'sea_stream_file.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.last_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_sample(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    assert stream.n_targets == np.array(y).ndim

    assert stream.n_features == X.shape[1]

    assert 'stream' == stream._estimator_type

    expected_info = "FileStream(filename='sea_stream.csv', target_idx=-1, n_targets=1, cat_features=None)"
    assert stream.get_info() == expected_info

예제 #3

파일 보기

def demo():
    """ _test_mol

    This demo tests the MOL learner on a file stream, which reads from 
    the music.csv file.

    The test computes the performance of the MOL learner as well as 
    the time to create the structure and classify all the samples in 
    the file.

    """
    # Setup logging
    logging.basicConfig(format='%(message)s', level=logging.INFO)

    # Setup the file stream
    stream = FileStream("../datasets/music.csv", 0, 6)
    stream.prepare_for_use()

    # Setup the classifier, by default it uses Logistic Regression
    # classifier = MultiOutputLearner()
    # classifier = MultiOutputLearner(h=SGDClassifier(n_iter=100))
    classifier = MultiOutputLearner(h=Perceptron())

    # Setup the pipeline
    pipe = Pipeline([('classifier', classifier)])

    pretrain_size = 150
    logging.info('Pre training on %s samples', str(pretrain_size))
    X, y = stream.next_sample(pretrain_size)
    # classifier.fit(X, y)
    pipe.partial_fit(X, y, classes=stream.get_targets())
    count = 0
    true_labels = []
    predicts = []
    init_time = timer()
    logging.info('Evaluating...')
    while stream.has_more_samples():
        X, y = stream.next_sample()
        # p = classifier.predict(X)
        p = pipe.predict(X)
        predicts.extend(p)
        true_labels.extend(y)
        count += 1
    perf = hamming_score(true_labels, predicts)
    logging.info('Evaluation time: %s s', str(timer() - init_time))
    logging.info('Total samples analyzed: %s', str(count))
    logging.info('The classifier\'s static Hamming score    : %0.3f' % perf)

예제 #4

파일 보기

def test_tree(csv_path, tree):

    print("Testing the tree")
    
    stream = FileStream(csv_path)
    
    
    n_samples = 0
    correct_cnt = 0
    
    t2 = time.time()
    
    y_true_all = list()
    y_pred_all = list()
    while stream.has_more_samples():
        X, y = stream.next_sample()
        y_pred = tree.predict(X)
        if y[0] == y_pred[0]:
            correct_cnt += 1
        tree = tree.partial_fit(X, y)
        n_samples += 1
        
        y_true_all.append(y[0])
        y_pred_all.append(y_pred[0])
    
    
    t3 = time.time()
    total = t3-t2
    
    accuracy = 100.0 * correct_cnt / n_samples
    fscore = f1_score(y_true_all, y_pred_all, average='binary')
    gm = geometric_mean_score(y_true_all, y_pred_all, average='binary')
    
    print("Test data instances: ", n_samples)
    print("Tree tested on ", n_samples, " instances & has ", accuracy, "% accuracy.")
    print("Tree has F-score: %.3f" % fscore)
    print("Tree has GM: %.3f" % gm)
    print("Testing tree completed in ", total, " (s)")
    
    return round(fscore,3), round(gm,3)

예제 #5

파일 보기

파일: Chapter1_codes.py 프로젝트: sayanddude/practical-ml-streaming-data-python

# Retrieving 5 samples
data_stream.next_sample(5)
# Output-
#(array([[ 36.   ,   0.   ,   7.   ,   3.   ,   1.   , 118.   ,  13.   ,
#          18.   ,  50.   , 239.554,  97.   ,   1.   ,   1.   ,   1.   ,
#           1.   ,   0.   ,   0.   ,  98.   , 178.   ,  31.   ],
#        [  3.   ,  23.   ,   7.   ,   4.   ,   1.   , 179.   ,  51.   ,
#          18.   ,  38.   , 239.554,  97.   ,   0.   ,   1.   ,   0.   ,
#           1.   ,   0.   ,   0.   ,  89.   , 170.   ,  31.   ],
#        [  7.   ,   7.   ,   7.   ,   5.   ,   1.   , 279.   ,   5.   ,
#          14.   ,  39.   , 239.554,  97.   ,   0.   ,   1.   ,   2.   ,
#           1.   ,   1.   ,   0.   ,  68.   , 168.   ,  24.   ],
#        [ 11.   ,  23.   ,   7.   ,   5.   ,   1.   , 289.   ,  36.   ,
#          13.   ,  33.   , 239.554,  97.   ,   0.   ,   1.   ,   2.   ,
#           1.   ,   0.   ,   1.   ,  90.   , 172.   ,  30.   ],
#        [  3.   ,  23.   ,   7.   ,   6.   ,   1.   , 179.   ,  51.   ,
#          18.   ,  38.   , 239.554,  97.   ,   0.   ,   1.   ,   0.   ,
#           1.   ,   0.   ,   0.   ,  89.   , 170.   ,  31.   ]]),
# array([0, 2, 4, 2, 2]))

data_stream.has_more_samples()
# Output-
# True

data_stream.n_remaining_samples()
# Output-
# 734

#####################################################################################

예제 #6

파일 보기

X_init, y_init = stream.next_sample(CHUNK_SIZE)
print(X_init)
print(y_init)
goowe.partial_fit(X_init, y_init)

accuracy = 0.0
total = 0.0
true_predictions = 0.0

for i in range(CHUNK_SIZE):
     total += 1
     cur = stream.next_sample()
     X, y = cur[0], cur[1]
     preds = goowe.predict(X)
     true_predictions += np.sum(preds == y)
     accuracy = true_predictions / total
     print('\tData instance: {} - Accuracy: {}'.format(total, accuracy))
     goowe.partial_fit(X, y)

# Now, for the remaining instances, do ITTT (Interleaved Test Then Train).
while(stream.has_more_samples()):
    total += 1
    cur = stream.next_sample()
    X, y = cur[0], cur[1]
    preds = goowe.predict(X)            # Test
    true_predictions += np.sum(preds == y)
    accuracy = true_predictions / total
    print('\tData instance: {} - Accuracy: {}'.format(int(total), round(accuracy*100.0, 3)))
    goowe.partial_fit(X, y)             # Then train