def main(): args = docopt.docopt(__doc__) paths = list(utils.iter_paths(args['<dir>'])) if len(paths) == 0: return extractor = utils.load_classifier('feature_extractor.pkl') print("# Read images") images = map(utils.load_image, paths) print("# Extract features") features = extractor.fit_transform(images) # Determine number of samples and features n_sample = len(paths) n_features = features.shape[1] # Create dataset with h5py.File('test_set.hdf5', 'w', driver='core') as f: f.create_dataset( 'X', (n_sample, n_features), dtype='f', data=features)
def get_response(last_message, emb_type, clas_type, print_info = False): """ Loads all data needed to create a response and creates it. User can choose which embedding type and classifier will be used """ # Load data for responses qa_data = read_qa(qa_path = 'data/qa_data/assignment_data.json') # Load embedding model embedding_model = load_semantic_model(model_type = emb_type) # Load classifier classifier = load_classifier(classifier_type = clas_type, model_type = emb_type) # Generate response response, emotion, topic, word_intersection = create_response(last_message, qa_data, classifier, embedding_model, emb_type, clas_type) # Print info if wanted if print_info: print() print("Received: {message}".format(message=last_message)) print("Responded: {response}".format(response=response)) print("Topic detected: {topic}".format(topic=topic)) print("Emotion detected: {emotion}".format(emotion=emotion)) print("Keywords detected [(keyword): (message_token)]: \n\t{intersection}".format(intersection=word_intersection)) return response, emotion, topic, word_intersection
def classify_text(text): classifier = utils.load_classifier(CLASSIFIER_FILE_NAME) prob_dist = classifier.prob_classify(text) max_val = prob_dist.max() print max_val print round(prob_dist.prob("general"), 2) print round(prob_dist.prob("app"), 2) print round(prob_dist.prob("pricing"), 2) print classifier.show_informative_features(5)
def classify_messages(classifier_state_file: str, messages: Dataset): model = SpamClassifier(VOCABULARY_SIZE) model = model.cuda() vocabulary = load_classifier(classifier_state_file, model) print('Loaded classifier state from:', classifier_state_file, file=sys.stderr) samples = WordIndexDataset(messages, vocabulary, MAX_MESSAGE_LENGTH_WORDS) samples_loader = DataLoader(samples, batch_size=100, collate_fn=IndexVectorCollatorEval(), num_workers=8) print('Processing messages in:', messages, file=sys.stderr) spam, ham = 0, 0 for x, x_len, path in samples_loader: y_pred = model(x.cuda(), x_len).cpu() spam_score = nn.functional.softmax(y_pred, dim=-1)[:, 1] is_spam = spam_score > 0.5 spam += is_spam.sum().item() ham += (is_spam == False).sum().item() for s_s, i_s, p in zip(spam_score, is_spam, path): print(f'{p}: {"SPAM" if i_s.item() else "HAM"}, score: {s_s.item()}') print(f'Statistics: spam: {spam}, ham: {ham}, total: {spam + ham}', file=sys.stderr)
__author__ = 'anurag' import sqlite3 import json import utils from flask import Flask app = Flask(__name__) classifier = utils.load_classifier('classifier.pickle') HTML = '''<html> <head> <script type="text/javascript" src="https://www.google.com/jsapi"></script> <script type="text/javascript"> google.load("visualization", "1", {packages:["corechart"]}); google.setOnLoadCallback(drawChart); function drawChart() { var data = google.visualization.arrayToDataTable(%s); var options = { title: 'My Daily Activities' }; var chart = new google.visualization.PieChart(document.getElementById('piechart')); chart.draw(data, options); } </script> </head> <body>
df = utils.load_dataframe(args.dataset) df_samples = df.index df_genes = df.columns labels, classes = utils.load_labels(args.labels) print('loaded input dataset (%s genes, %s samples)' % (df.shape[1], df.shape[0])) # impute missing values df.fillna(value=df.min().min(), inplace=True) # initialize classifier print('initializing classifier...') clf = utils.load_classifier(args.model_config, args.model) print('initialized %s classifier' % args.model) # load gene sets file if it was provided if args.gene_sets != None: print('loading gene sets...') gene_sets = utils.load_gene_sets(args.gene_sets) gene_sets = utils.filter_gene_sets(gene_sets, df_genes) print('loaded %d gene sets' % (len(gene_sets))) else: gene_sets = [] # generate random gene sets if specified
from time import time import numpy as np import flask from flask import Flask from flask import request from utils import stem_sentence, vectorize_sentence, load_classifier app = Flask(__name__) model = load_classifier(fname="classifier_model.pkl") @app.route("/predict", methods=["GET"]) def predict(): results = {} raw_sentence = request.args.get("sentence", "EMPTY") t0 = time() sentence = stem_sentence(raw_sentence) t1 = time() sentence = vectorize_sentence(sentence) t2 = time() predicted_scores = model.predict_proba(sentence) t3 = time() negative_score = predicted_scores[0][0] positive_score = predicted_scores[0][1] stemming_time = f"{np.round(t1-t0, 2)} seconds." vectorizing_time = f"{np.round(t2-t1, 2)} seconds." prediction_time = f"{np.round(t3-t2, 2)} seconds."
from moviepy.editor import VideoFileClip color_space = config.Classifier.COLOR_SPACE orient = config.Classifier.ORIENT pix_per_cell = config.Classifier.PIX_PER_CELL cell_per_block = config.Classifier.CELL_PER_BLOCK hog_channel = config.Classifier.HOG_CHANNEL spatial_size = config.Classifier.SPATIAL_SIZE hist_bins = config.Classifier.HIST_BINS spatial_f = config.Classifier.SPATIAL_F hist_f = config.Classifier.HIST_F hog_f = config.Classifier.HOG_F scales = config.Pipeline.SCALES X_scaler = utils.load_classifier(config.Classifier.SCALER_FILE) svc = utils.load_classifier(config.Classifier.CLS_FILE) def process_heat_image(img, heatmap_history, render_heatmap=True, save_matches=False): windows = [] for s in scales: scale, ystart, ystop, xstart, xstop = s windows_s = classifier.find_cars(img, ystart, ystop, xstart, xstop, scale, svc, X_scaler, orient, pix_per_cell, cell_per_block, spatial_size, hist_bins, min_confidence=config.Pipeline.MIN_CONFIDENCE) windows.extend(windows_s)
'x_train': flatten(x_train_predictions.tolist()), 'x_val': flatten(x_val_predictions.tolist()), 'x_test': flatten(x_test_predictions.tolist()), } else: vectorizer = Sklearn.VECTORIZERS[data[2]]( **{ 'binary': True, 'ngram_range': (int(data[3]), int(data[4])) }) x_train = vectorizer.fit_transform(train_inputs).todense() x_val = vectorizer.transform(val_inputs).todense() x_test = vectorizer.transform(test_data_preprocessed).todense() cls = load_classifier(model_path) print(key) # print(f'a - {cls.score(x_train, y_train)}') # print(f'va - {cls.score(x_val, y_val)}') # print(f'ta - {cls.score(x_test, y_test)}') # print('----------------------------') PREDICTIONS[folder][f'{key}-{preprocessing_algorithm_id}'] = { 'x_train': cls.predict(x_train).tolist(), 'x_val': cls.predict(x_val).tolist(), 'x_test': cls.predict(x_test).tolist(), } save_to_file('./predictions_v_6.json', {**PREDICTIONS, **STATICS})