def main():
    args = docopt.docopt(__doc__)

    paths = list(utils.iter_paths(args['<dir>']))

    if len(paths) == 0:
        return

    extractor = utils.load_classifier('feature_extractor.pkl')

    print("# Read images")
    images = map(utils.load_image, paths)
    print("# Extract features")
    features = extractor.fit_transform(images)

    # Determine number of samples and features
    n_sample = len(paths)
    n_features = features.shape[1]

    # Create dataset
    with h5py.File('test_set.hdf5', 'w', driver='core') as f:
        f.create_dataset(
            'X',
            (n_sample, n_features),
            dtype='f',
            data=features)
Exemple #2
0
def get_response(last_message, emb_type, clas_type, print_info = False):
    """
    Loads all data needed to create a response and creates it. User can choose
    which embedding type and classifier will be used
    """
    # Load data for responses
    qa_data = read_qa(qa_path = 'data/qa_data/assignment_data.json')

    # Load embedding model
    embedding_model = load_semantic_model(model_type = emb_type)

    # Load classifier
    classifier = load_classifier(classifier_type = clas_type, model_type = emb_type)

    # Generate response
    response, emotion, topic, word_intersection = create_response(last_message,
                                                           qa_data,
                                                           classifier,
                                                           embedding_model, emb_type,
                                                           clas_type)
    # Print info if wanted
    if print_info:
        print()
        print("Received: {message}".format(message=last_message))
        print("Responded: {response}".format(response=response))
        print("Topic detected: {topic}".format(topic=topic))
        print("Emotion detected: {emotion}".format(emotion=emotion))
        print("Keywords detected [(keyword): (message_token)]: \n\t{intersection}".format(intersection=word_intersection))

    return response, emotion, topic, word_intersection
Exemple #3
0
def classify_text(text):
	 classifier = utils.load_classifier(CLASSIFIER_FILE_NAME)
	 prob_dist = classifier.prob_classify(text)
	 max_val =  prob_dist.max()
	 print max_val
	 print round(prob_dist.prob("general"), 2)
	 print round(prob_dist.prob("app"), 2)
	 print round(prob_dist.prob("pricing"), 2)
	 print classifier.show_informative_features(5)  
def classify_messages(classifier_state_file: str, messages: Dataset):
    model = SpamClassifier(VOCABULARY_SIZE)
    model = model.cuda()
    vocabulary = load_classifier(classifier_state_file, model)

    print('Loaded classifier state from:', classifier_state_file, file=sys.stderr)

    samples = WordIndexDataset(messages, vocabulary, MAX_MESSAGE_LENGTH_WORDS)
    samples_loader = DataLoader(samples, batch_size=100, collate_fn=IndexVectorCollatorEval(), num_workers=8)

    print('Processing messages in:', messages, file=sys.stderr)

    spam, ham = 0, 0

    for x, x_len, path in samples_loader:
        y_pred = model(x.cuda(), x_len).cpu()
        spam_score = nn.functional.softmax(y_pred, dim=-1)[:, 1]
        is_spam = spam_score > 0.5
        spam += is_spam.sum().item()
        ham += (is_spam == False).sum().item()
        for s_s, i_s, p in zip(spam_score, is_spam, path):
            print(f'{p}: {"SPAM" if i_s.item() else "HAM"}, score: {s_s.item()}')
    print(f'Statistics: spam: {spam}, ham: {ham}, total: {spam + ham}', file=sys.stderr)
__author__ = 'anurag'
import sqlite3
import json
import utils
from flask import Flask
app = Flask(__name__)


classifier = utils.load_classifier('classifier.pickle')

HTML = '''<html>
  <head>
    <script type="text/javascript" src="https://www.google.com/jsapi"></script>
    <script type="text/javascript">
      google.load("visualization", "1", {packages:["corechart"]});
      google.setOnLoadCallback(drawChart);
      function drawChart() {

        var data = google.visualization.arrayToDataTable(%s);

        var options = {
          title: 'My Daily Activities'
        };

        var chart = new google.visualization.PieChart(document.getElementById('piechart'));

        chart.draw(data, options);
      }
    </script>
  </head>
  <body>
    df = utils.load_dataframe(args.dataset)
    df_samples = df.index
    df_genes = df.columns

    labels, classes = utils.load_labels(args.labels)

    print('loaded input dataset (%s genes, %s samples)' %
          (df.shape[1], df.shape[0]))

    # impute missing values
    df.fillna(value=df.min().min(), inplace=True)

    # initialize classifier
    print('initializing classifier...')

    clf = utils.load_classifier(args.model_config, args.model)

    print('initialized %s classifier' % args.model)

    # load gene sets file if it was provided
    if args.gene_sets != None:
        print('loading gene sets...')

        gene_sets = utils.load_gene_sets(args.gene_sets)
        gene_sets = utils.filter_gene_sets(gene_sets, df_genes)

        print('loaded %d gene sets' % (len(gene_sets)))
    else:
        gene_sets = []

    # generate random gene sets if specified
Exemple #7
0
from time import time
import numpy as np
import flask
from flask import Flask
from flask import request

from utils import stem_sentence, vectorize_sentence, load_classifier

app = Flask(__name__)

model = load_classifier(fname="classifier_model.pkl")


@app.route("/predict", methods=["GET"])
def predict():
    results = {}
    raw_sentence = request.args.get("sentence", "EMPTY")
    t0 = time()
    sentence = stem_sentence(raw_sentence)
    t1 = time()
    sentence = vectorize_sentence(sentence)
    t2 = time()
    predicted_scores = model.predict_proba(sentence)
    t3 = time()
    negative_score = predicted_scores[0][0]
    positive_score = predicted_scores[0][1]

    stemming_time = f"{np.round(t1-t0, 2)} seconds."
    vectorizing_time = f"{np.round(t2-t1, 2)} seconds."
    prediction_time = f"{np.round(t3-t2, 2)} seconds."
Exemple #8
0
from moviepy.editor import VideoFileClip

color_space = config.Classifier.COLOR_SPACE
orient = config.Classifier.ORIENT
pix_per_cell = config.Classifier.PIX_PER_CELL
cell_per_block = config.Classifier.CELL_PER_BLOCK
hog_channel = config.Classifier.HOG_CHANNEL
spatial_size = config.Classifier.SPATIAL_SIZE
hist_bins = config.Classifier.HIST_BINS
spatial_f = config.Classifier.SPATIAL_F
hist_f = config.Classifier.HIST_F
hog_f = config.Classifier.HOG_F

scales = config.Pipeline.SCALES

X_scaler = utils.load_classifier(config.Classifier.SCALER_FILE)
svc = utils.load_classifier(config.Classifier.CLS_FILE)


def process_heat_image(img, heatmap_history, render_heatmap=True, save_matches=False):
    windows = []

    for s in scales:
        scale, ystart, ystop, xstart, xstop = s
        windows_s = classifier.find_cars(img,
                                         ystart, ystop, xstart, xstop, scale,
                                         svc, X_scaler,
                                         orient, pix_per_cell, cell_per_block,
                                         spatial_size, hist_bins,
                                         min_confidence=config.Pipeline.MIN_CONFIDENCE)
        windows.extend(windows_s)
Exemple #9
0
                'x_train': flatten(x_train_predictions.tolist()),
                'x_val': flatten(x_val_predictions.tolist()),
                'x_test': flatten(x_test_predictions.tolist()),
            }
        else:
            vectorizer = Sklearn.VECTORIZERS[data[2]](
                **{
                    'binary': True,
                    'ngram_range': (int(data[3]), int(data[4]))
                })

            x_train = vectorizer.fit_transform(train_inputs).todense()
            x_val = vectorizer.transform(val_inputs).todense()
            x_test = vectorizer.transform(test_data_preprocessed).todense()

            cls = load_classifier(model_path)

            print(key)
            # print(f'a - {cls.score(x_train, y_train)}')
            # print(f'va - {cls.score(x_val, y_val)}')
            # print(f'ta - {cls.score(x_test, y_test)}')
            # print('----------------------------')

            PREDICTIONS[folder][f'{key}-{preprocessing_algorithm_id}'] = {
                'x_train': cls.predict(x_train).tolist(),
                'x_val': cls.predict(x_val).tolist(),
                'x_test': cls.predict(x_test).tolist(),
            }

save_to_file('./predictions_v_6.json', {**PREDICTIONS, **STATICS})