Python FileStream.has_more_samples Beispiele

Programmiersprache: Python

Namespace / Paketname: skmultiflow.data.file_stream

Klasse / Typ: FileStream

Methode / Funktion: has_more_samples

Beispiele auf hotexamples.com: 6

Python FileStream.has_more_samples - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die skmultiflow.data.file_stream.FileStream.has_more_samples, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

FileStream(30)

has_more_samples(6)

get_classes(3)

has_more_instances(2)

get_targets(2)

X(2)

estimated_remaining_instances(1)

get_num_targets(1)

get_target_values(1)

get_plot_name(1)

get_num_values_per_nominal_attribute(1)

get_num_nominal_attributes(1)

get_num_numerical_attributes(1)

get_attributes_header(1)

get_last_instance(1)

get_info(1)

get_data_info(1)

get_classes_header(1)

get_num_attributes(1)

Beispiel #1

Datei anzeigen

def train_tree(csv_path, tree):

    print("Training the tree")

    stream = FileStream(csv_path)

    accuracy = 0
    n_samples = 0
    correct_cnt = 0

    t0 = time.time()

    while stream.has_more_samples():
        X, y = stream.next_sample()
        y_pred = tree.predict(X)
        if y[0] == y_pred[0]:
            correct_cnt += 1
        tree = tree.partial_fit(X, y)
        n_samples += 1

    t1 = time.time()
    total = t1 - t0

    accuracy = 100.0 * correct_cnt / n_samples

    print("Training data instances: ", n_samples)
    print("Tree trained on ", n_samples, " instances & has ", accuracy,
          "% accuracy.")
    print("Training tree completed in ", total, " (s)")

Beispiel #2

Datei anzeigen

def test_file_stream(test_path, package_path):
    test_file = os.path.join(package_path,
                             'src/skmultiflow/data/datasets/sea_stream.csv')
    stream = FileStream(test_file)
    stream.prepare_for_use()

    assert stream.n_remaining_samples() == 40000

    expected_names = ['attrib1', 'attrib2', 'attrib3']
    assert stream.feature_names == expected_names

    expected_targets = [0, 1]
    assert stream.target_values == expected_targets

    assert stream.target_names == ['class']

    assert stream.n_features == 3

    assert stream.n_cat_features == 0

    assert stream.n_num_features == 3

    assert stream.n_targets == 1

    assert stream.get_data_info() == 'sea_stream.csv - 1 target(s), 2 classes'

    assert stream.has_more_samples() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'sea_stream_file.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.last_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_sample(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    assert stream.n_targets == np.array(y).ndim

    assert stream.n_features == X.shape[1]

    assert 'stream' == stream._estimator_type

    expected_info = "FileStream(filename='sea_stream.csv', target_idx=-1, n_targets=1, cat_features=None)"
    assert stream.get_info() == expected_info

Beispiel #3

Datei anzeigen

def demo():
    """ _test_mol

    This demo tests the MOL learner on a file stream, which reads from 
    the music.csv file.

    The test computes the performance of the MOL learner as well as 
    the time to create the structure and classify all the samples in 
    the file.

    """
    # Setup logging
    logging.basicConfig(format='%(message)s', level=logging.INFO)

    # Setup the file stream
    stream = FileStream("../datasets/music.csv", 0, 6)
    stream.prepare_for_use()

    # Setup the classifier, by default it uses Logistic Regression
    # classifier = MultiOutputLearner()
    # classifier = MultiOutputLearner(h=SGDClassifier(n_iter=100))
    classifier = MultiOutputLearner(h=Perceptron())

    # Setup the pipeline
    pipe = Pipeline([('classifier', classifier)])

    pretrain_size = 150
    logging.info('Pre training on %s samples', str(pretrain_size))
    X, y = stream.next_sample(pretrain_size)
    # classifier.fit(X, y)
    pipe.partial_fit(X, y, classes=stream.get_targets())
    count = 0
    true_labels = []
    predicts = []
    init_time = timer()
    logging.info('Evaluating...')
    while stream.has_more_samples():
        X, y = stream.next_sample()
        # p = classifier.predict(X)
        p = pipe.predict(X)
        predicts.extend(p)
        true_labels.extend(y)
        count += 1
    perf = hamming_score(true_labels, predicts)
    logging.info('Evaluation time: %s s', str(timer() - init_time))
    logging.info('Total samples analyzed: %s', str(count))
    logging.info('The classifier\'s static Hamming score    : %0.3f' % perf)

Beispiel #4

Datei anzeigen

def test_tree(csv_path, tree):

    print("Testing the tree")
    
    stream = FileStream(csv_path)
    
    
    n_samples = 0
    correct_cnt = 0
    
    t2 = time.time()
    
    y_true_all = list()
    y_pred_all = list()
    while stream.has_more_samples():
        X, y = stream.next_sample()
        y_pred = tree.predict(X)
        if y[0] == y_pred[0]:
            correct_cnt += 1
        tree = tree.partial_fit(X, y)
        n_samples += 1
        
        y_true_all.append(y[0])
        y_pred_all.append(y_pred[0])
    
    
    t3 = time.time()
    total = t3-t2
    
    accuracy = 100.0 * correct_cnt / n_samples
    fscore = f1_score(y_true_all, y_pred_all, average='binary')
    gm = geometric_mean_score(y_true_all, y_pred_all, average='binary')
    
    print("Test data instances: ", n_samples)
    print("Tree tested on ", n_samples, " instances & has ", accuracy, "% accuracy.")
    print("Tree has F-score: %.3f" % fscore)
    print("Tree has GM: %.3f" % gm)
    print("Testing tree completed in ", total, " (s)")
    
    return round(fscore,3), round(gm,3)

Beispiel #5

Datei anzeigen

Datei: Chapter1_codes.py Projekt: sayanddude/practical-ml-streaming-data-python

# Retrieving 5 samples
data_stream.next_sample(5)
# Output-
#(array([[ 36.   ,   0.   ,   7.   ,   3.   ,   1.   , 118.   ,  13.   ,
#          18.   ,  50.   , 239.554,  97.   ,   1.   ,   1.   ,   1.   ,
#           1.   ,   0.   ,   0.   ,  98.   , 178.   ,  31.   ],
#        [  3.   ,  23.   ,   7.   ,   4.   ,   1.   , 179.   ,  51.   ,
#          18.   ,  38.   , 239.554,  97.   ,   0.   ,   1.   ,   0.   ,
#           1.   ,   0.   ,   0.   ,  89.   , 170.   ,  31.   ],
#        [  7.   ,   7.   ,   7.   ,   5.   ,   1.   , 279.   ,   5.   ,
#          14.   ,  39.   , 239.554,  97.   ,   0.   ,   1.   ,   2.   ,
#           1.   ,   1.   ,   0.   ,  68.   , 168.   ,  24.   ],
#        [ 11.   ,  23.   ,   7.   ,   5.   ,   1.   , 289.   ,  36.   ,
#          13.   ,  33.   , 239.554,  97.   ,   0.   ,   1.   ,   2.   ,
#           1.   ,   0.   ,   1.   ,  90.   , 172.   ,  30.   ],
#        [  3.   ,  23.   ,   7.   ,   6.   ,   1.   , 179.   ,  51.   ,
#          18.   ,  38.   , 239.554,  97.   ,   0.   ,   1.   ,   0.   ,
#           1.   ,   0.   ,   0.   ,  89.   , 170.   ,  31.   ]]),
# array([0, 2, 4, 2, 2]))

data_stream.has_more_samples()
# Output-
# True

data_stream.n_remaining_samples()
# Output-
# 734

#####################################################################################

Beispiel #6

Datei anzeigen

X_init, y_init = stream.next_sample(CHUNK_SIZE)
print(X_init)
print(y_init)
goowe.partial_fit(X_init, y_init)

accuracy = 0.0
total = 0.0
true_predictions = 0.0

for i in range(CHUNK_SIZE):
     total += 1
     cur = stream.next_sample()
     X, y = cur[0], cur[1]
     preds = goowe.predict(X)
     true_predictions += np.sum(preds == y)
     accuracy = true_predictions / total
     print('\tData instance: {} - Accuracy: {}'.format(total, accuracy))
     goowe.partial_fit(X, y)

# Now, for the remaining instances, do ITTT (Interleaved Test Then Train).
while(stream.has_more_samples()):
    total += 1
    cur = stream.next_sample()
    X, y = cur[0], cur[1]
    preds = goowe.predict(X)            # Test
    true_predictions += np.sum(preds == y)
    accuracy = true_predictions / total
    print('\tData instance: {} - Accuracy: {}'.format(int(total), round(accuracy*100.0, 3)))
    goowe.partial_fit(X, y)             # Then train