Python BinaryClassifier Examples

Programming Language: Python

Namespace/Package Name: classifier

Class/Type: BinaryClassifier

Examples at hotexamples.com: 4

Python BinaryClassifier - 4 examples found. These are the top rated real world Python examples of classifier.BinaryClassifier extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

load(2)

classify(2)

BinaryClassifier(1)

save(1)

train(1)

Example #1

Show file

File: __init__.py Project: lucasguillermo/opp-tools

def update_classifier(topic_id):
    from classifier import BinaryClassifier, doc2text
    db = get_db()
    cur = db.cursor(MySQLdb.cursors.DictCursor)
    query = '''
         SELECT D.*, M.strength
         FROM docs D, docs2topics M
         WHERE M.doc_id = D.doc_id AND M.topic_id = {0} AND M.is_training = 1
         ORDER BY D.found_date DESC
         LIMIT 100
    '''
    app.logger.debug(query)
    cur.execute(query.format(topic_id))
    rows = cur.fetchall()
    docs = [doc2text(row) for row in rows]
    classes = [row['strength'] for row in rows]
    msg = ''
    if (0 in classes and 1 in classes):
        with Capturing() as output:
            clf = BinaryClassifier(topic_id)        
            clf.train(docs, classes)
            clf.save()
        msg += '\n'.join(output)
        # We could reclassify all documents now, but we postpone this step
        # until the documents are actually displayed (which may be never
        # for sufficiently old ones). So we simply undefine the topic
        # strengths to mark that no classification has yet been made.
        query = "UPDATE docs2topics SET strength = NULL WHERE topic_id = {0} AND is_training < 1"
        app.logger.debug(query)
        cur.execute(query.format(topic_id))
        db.commit()
    else:
        msg = "classifier not yet ready because only positive or negative training samples"
    return msg

Example #2

Show file

File: __init__.py Project: lucasguillermo/opp-tools

def classify(rows, topic, topic_id):
    from classifier import BinaryClassifier, doc2text
    docs = [doc2text(row) for row in rows]
    with Capturing() as output:
        clf = BinaryClassifier(topic_id)
        clf.load()
        probs = clf.classify(docs)
    app.logger.debug('\n'.join(output))
    db = get_db()
    cur = db.cursor()
    for i, (p_spam, p_ham) in enumerate(probs):
        app.logger.debug("doc {} classified for topic {}: {}".format(
            rows[i]['doc_id'], topic_id, p_ham))
        query = '''
            INSERT INTO docs2topics (doc_id, topic_id, strength)
            VALUES ({0},{1},{2})
            ON DUPLICATE KEY UPDATE strength={2}
        '''
        query = query.format(rows[i]['doc_id'], topic_id, p_ham)
        app.logger.debug(query)
        cur.execute(query)
        db.commit()
    return [p[1] for p in probs]

Example #3

Show file

File: __init__.py Project: lucasguillermo/opp-tools

            app.logger.error('cannot parse {}: {}'.format(post['url'], e))
            continue
        if default_author:
            # overwrite whatever blogpostparser identified as the
            # author -- should probably make an exception for guest
            # posts:
            post['authors'] = default_author
        posts.append(post)
        
    if not posts:
        app.logger.warn('no posts to save')
        return 'OK'

    from classifier import BinaryClassifier, doc2text
    docs = [doc2text(post) for post in posts]
    clf = BinaryClassifier(0) # classifier 0 is for blogspam; note that 1=>blogspam, 0=>blogham
    clf.load()
    probs = clf.classify(docs)
    for i, (p_no, p_yes) in enumerate(probs):
        post = posts[i]
        app.logger.debug(u"post {} has blogspam probability {}".format(post['title'], p_yes))
        if p_yes > app.config['MAX_SPAM'] * 3/2:
            app.logger.debug("> max {}".format(app.config['MAX_SPAM'] * 3/2))
            continue
        post['status'] = 1 if p_yes < app.config['MAX_SPAM'] * 3/4 else 0
        post['spamminess'] = p_yes
        post['meta_confidence'] = 0.75
        query = "INSERT INTO docs ({}, found_date) VALUES ({} NOW())".format(
            ', '.join(post.keys()), '%s, '*len(post.keys()))
        app.logger.debug(query + ', '.join(map(unicode, post.values())))
        try:

Example #4

Show file

def main():
    x = tf.placeholder(tf.float32, [None, AUDIO_FEATURE_SIZE])
    y = tf.placeholder(tf.float32, [None, 2])

    n_units = [100, 100, 50]
    n_batches = 10000
    batch_size = 10
    sound_event = 137  # Testing with sound event class of music.

    classifier = BinaryClassifier(x, y, n_units)

    path = "./trainingFeatures/bal_train/"
    filenames = [path + f for f in listdir(path)]
    """filenames = [path + "ZZ.tfrecord",
                 path + "Zy.tfrecord",
                 path + "ZY.tfrecord",
                 path + "zz.tfrecord",
                 path + "zZ.tfrecord",
                 path + "uT.tfrecord",
                 path + "Ut.tfrecord",
                 path + "UT.tfrecord",
                 path + "uu.tfrecord",
                 path + "uU.tfrecord"
                 ]"""
    eval_path = "./trainingFeatures/eval/"
    eval_filenames = [path + f for f in listdir(path)]

    batch = tf.train.batch(extract_example(filenames),
                           batch_size,
                           dynamic_pad=True)
    eval_batch = tf.train.batch(extract_example(eval_filenames),
                                EVAL_SET_SIZE,
                                dynamic_pad=True)

    with tf.Session() as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        coordinator = tf.train.Coordinator()
        tf.train.start_queue_runners(sess, coordinator)

        # Train the model.
        for i in range(n_batches):
            labels, audio_features = sess.run(batch)
            targets = []

            for j in range(batch_size):
                if sound_event in labels[j]:
                    targets.append(POSITIVE)
                else:
                    targets.append(NEGATIVE)

            sess.run(classifier.train,
                     feed_dict={
                         x: audio_features,
                         y: targets
                     })

        # Evaluate the model.
        labels, audio_features = sess.run(eval_batch)
        targets = []

        for i in range(EVAL_SET_SIZE):
            if sound_event in labels[i]:
                targets.append(POSITIVE)
            else:
                targets.append(NEGATIVE)

        print(
            sess.run(classifier.f1_score,
                     feed_dict={
                         x: audio_features,
                         y: targets
                     }))

        coordinator.request_stop()
        coordinator.join()