def run(self):
     soup = super(ForbesScraper, self).get_soup_object()
     # Gets the breaking article from Forbes investing page
     headline = soup.find_all(
         "a",
         {"class": "headlink h1--dense card__color--benjamins-green"})[0]
     headline_text = headline.text
     headline_link = headline.get('href', '')
     print("----")
     print("Breaking article: %s" % (headline_text))
     print("Breaking article link: %s" % (headline_link))
     txt_classifier = Classifier(headline_text)
     print(txt_classifier.sentiment())
     # Gets the editors' picks on the left side
     latest_picks = {}
     for latest_picks_article in soup.find_all(
             "a", {"class": "section-pick__title"}):
         self.article_link, link = latest_picks_article.get(
             'href', ''), latest_picks_article.get('href', '')
         self.article_title, title = latest_picks_article.text, latest_picks_article.text
         latest_picks[link] = title
         print("----")
         print("Latest pick link title: %s" % (title))
         print("Latest pick link: %s" % (link))
         self.classify_headline(title)
Beispiel #2
0
class POSTagger():
    def __init__(self):
        self.model = Model()
        self.model.model_load()
        self.r = Reader()
        self.r.read_corpus()
        self.tagger = Classifier(self.r.train_sents, self.model)

    def evaluate(self, featureset):
        """
        Evaluate the accuracy of the classifer based POS tagger
        featureset: [[features extracted for a word, tag in a gold standard]]
        stdout: accuracy_score
        """
        #sequence, tag = featureset
        gs, labels = [], []
        for s, t in featureset:
            gs.append(t)
            label = self.tagger.choose_tag(s)
            labels.append(label)
            print(t, label)

        assert (len(gs) == len(labels))
        self.write_to_file(labels)
        words = self.tagger.test(self.r.test_sents, word=True)
        print(accuracy_score(gs, labels))

    def write_to_file(self, labels):
        with open('labels.txt', 'w') as file_handler:
            for label in labels:
                file_handler.write("{}\n".format(label))
Beispiel #3
0
def classify_pages(in_path, out_path):
    classifier = Classifier()

    with open(out_path, 'wb') as f:
        for site, html in utils.read_file_multiple(in_path):
            if classifier.classify(html):
                pickle.dump((site, html), f)
Beispiel #4
0
 def classify_headline(self, headline):
     # Set self.sentiment
     txt_classifier = Classifier(headline)
     sentiment = txt_classifier.sentiment()
     print(sentiment)
     self.sentiment = sentiment
     self.update_avgs()
    def __init__(self, db):
        self.db = db

        self.city = self.db["area"].find_one({
            "name": configuration.AREA
        })

        self.classifier = Classifier(self.db)
Beispiel #6
0
def getIntent():
    print(request.json)
    print(request.json['sentence'])
    request_object = request.json
    sentence = request.json['sentence']
    if client is not None:
        if 'classifier' not in cache.keys():
            cache["classifier"] = Classifier()

        classifier = cache["classifier"]

        result = classifier.classifyIntent(sentence)
        classification = dict()
        print(result)
        if len(result) > 0:

            print(result)
            if result[1] < classifier.ERROR_THRESHOLD:
                get_database_context().add_not_found_sentence(sentence)

            classification['intent'] = result[0]
        else:
            classification['intent'] = ""
            get_database_context().add_not_found_sentence(sentence)
    else:
        print("NO DATABASE")

        classification = dict()
        classification['intent'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return jsonify(response_object)
Beispiel #7
0
def getIntent():
    request_object = request.json
    sentence = request.json['sentence']
    if client is not None:
        if 'intents' not in cache.keys():
            cache["intents"] = Classifier("intents", client)

        classifier = cache["intents"]

        results = classifier.classify(sentence)

        classification = dict()
        if len(results) > 0:
            classification['intent'] = results[0][0]
        else:
            classification['intent'] = ""
    else:
        print("NO DATABASE")

        classification = dict()
        classification['intent'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return jsonify(response_object)
Beispiel #8
0
def getEntity():
    request_object = request.json
    sentence = request.json['sentence']
    prior_intents = request.json['context']["priorIntent"]["intent"]
    if client is not None:
        classifier_name = "entities@" + prior_intents

        if classifier_name not in cache.keys():
            cache[classifier_name] = Classifier(classifier_name, client)

        classifier = cache[classifier_name]

        results = classifier.classify(sentence)

        classification = dict()
        if len(results) > 0:
            classification['entity'] = results[0][0]
        else:
            classification['entity'] = ""
    else:
        print("NO DATABASE")

        classification = dict()
        classification['entity'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return jsonify(response_object)
Beispiel #9
0
def getEntity():
    request_object = request.json
    sentence = request.json['sentence']

    if client is not None:
        if 'classifier' not in cache.keys():
            cache["classifier"] = Classifier()

        classifier = cache["classifier"]
        # keep
        results = classifier.classifyEntity(sentence)
        # strip keep only name of entity
        classification = dict()
        if len(results) > 0:
            classification['entity'] = results[0][0]
        else:
            classification['entity'] = ""
    else:
        print("NO DATABASE")

        classification = dict()
        classification['entity'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return jsonify(response_object)
Beispiel #10
0
def main(config_):
    config = Config(config_)
    print("Model Framework: ", config.get("framework"), " Model Labels: ", config.get("labels"))
    broker = Broker(config)
    broker.listen()
    classifier = Classifier(config)
    thread = threading.Thread(target=process_request, kwargs={"broker": broker, "classifier": classifier})
    thread.start()
Beispiel #11
0
def train_Engine():
    result = get_trainer().start_training()
    if result:
        cache["classifier"] = Classifier()
        cache["classifier"].load(DatabaseContext(client), get_cos_context())
        return jsonify("Success! Engine was trained"), 200
    else:
        return jsonify("Error! Engine wasn't trained.."), 404
Beispiel #12
0
def trainIntents():
    if client is not None:
        intents = Trainer("intents", client)
        intents.start_training()
        if 'intents' not in cache.keys():
            cache['intents'] = Classifier('intents', client)
        else:
            cache['intents'].load()
        return jsonify([])
    else:
        print("NO DATABASE")
        return "NO DATABASE"
Beispiel #13
0
def load_sites_feeds():
    from tech_rss.models import Site
    fix_multiprocessing()

    clf = Classifier()
    for site in Site.objects.all():
        print('Starting {}'.format(site.domain))
        news = site.get_new_news()

        if not news:
            continue

        categories = clf.predict(news)
        for category, page in zip(categories, news):
            print(CATEGORIES_SHORT[category])
            print(page['title'], '\n')

            url, title = save_post(category, page, site)

            users = site.users.filter(categories__contains=[category])
            users_id = [getattr(user, 'id') for user in users]

            send_post_to_subscribers(TelegramBot, users_id, url, title)
    def __call__(self):
        # test
        # self.train = self.train.head(200)
        # self.test = self.test.head(100)

        self.clf = Classifier(output_folder=self.output_folder,
                              RS=15,
                              train=self.train,
                              test=self.test,
                              fold_splits=self.splits,
                              clf_name=self.clf_name,
                              mapping_dict=config.mapping_dict)
        self.clf()
        print('Saved to %s' % self.output_folder)
Beispiel #15
0
def trainEntity():
    intent = request.json['intent']
    if client is not None:
        classifier_name = "entities@" + intent
        entities = Trainer(classifier_name, client)
        entities.start_training()
        if classifier_name not in cache.keys():
            cache[classifier_name] = Classifier(classifier_name, client)
        else:
            cache[classifier_name].load()
        return jsonify([])
    else:
        print("NO DATABASE")
        return "NO DATABASE"
Beispiel #16
0
def load_sites_feeds():
    from tech_rss.models import Site
    fix_multiprocessing()

    clf = Classifier()
    for site in Site.objects.all():
        print('Starting {}'.format(site.domain))
        news = site.get_new_news()

        if not news:
            continue

        categories = clf.predict(news)
        for category, page in zip(categories, news):
            print(CATEGORIES_SHORT[category])
            print(page['title'], '\n')

            url, title = save_post(category, page, site)

            users = site.users.filter(categories__contains=[category])
            users_id = [getattr(user, 'id') for user in users]

            send_post_to_subscribers(TelegramBot, users_id, url, title)
Beispiel #17
0
def main():
    classifier = Classifier(model_name="random_forest")

    logger.debug(
        "top 20 feature importances: {}".format(
            get_feature_importance(classifier)
        )
    )

    test_features, test_labels = get_test_data("test")

    logger.debug(
        "classification report: {}".format(
            get_classification_report(
                test_labels["is_returning_customer"].values,
                classifier.classify(test_features))
        )
    )
Beispiel #18
0
def compare_crawler():
    heuristic_file = os.path.join(consts.DATA_DIR,
                                  'using-heuristic-pages.pickle')
    bsf_file = os.path.join(consts.DATA_DIR, 'bfs-pages.pickle')
    hr_bfs = os.path.join(consts.RESULTS_DIR, 'bfs_harvest_ratio_results.csv')
    hr_heuristic = os.path.join(consts.RESULTS_DIR,
                                'heuristic_harvest_ratio_results.csv')

    if not os.path.exists(heuristic_file):
        crawl(True, heuristic_file)

    if not os.path.exists(bfs_file):
        crawl(True, bsf_file)

    classifier = Classifier()

    harvest_ratio(heuristic_file, hr_heuristic, classifier)
    harvest_ratio(bsf_file, hr_bfs, classifier)
Beispiel #19
0
def testIntent():
    request_object = request.json
    sentence = request.json['sentence']
    if client is not None:
        if sentence == 'populate':
            # populate database with base data and train all neuronal netwroks
            populate_intents(client)
            populate_entities_for_meal(client)
            populate_entities_for_timetables(client)
            populate_entities_for_navigation(client)
            cache["intents"].load()
            cache["entities@timetables"].load()
            cache["entities@meal"].load()

            classification = dict()
            classification['intent'] = "Populated"
        else:
            if 'intents' not in cache.keys():
                cache["intents"] = Classifier("intents", client)

            classifier = cache["intents"]

            results = classifier.classify(sentence)

            classification = dict()
            if len(results) > 0:
                classification['intent'] = results[0][0]
            else:
                classification['intent'] = ""
    else:
        print("NO DATABASE")

        classification = dict()
        classification['intent'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return 'Results: %s' % classification['intent']
Beispiel #20
0
def main():
    global reporting, print_classification, classifier
    args = get_args()
    # load either web or pop-up reporting based on args
    reporting_module = 'reporting.' + ('web' if args.web else 'popup')
    print("Loading " + reporting_module)
    reporting = importlib.import_module(reporting_module)

    classifier = Classifier(args.age_gender)

    # if process is killed with ctrl+c display stats
    signal.signal(signal.SIGINT, sigint_handler)

    if args.video is not None:
        cap = cv2.VideoCapture(args.video)
        frame_nr = 0
        while cap.isOpened():
            ret, frame = cap.read()
            frame = cv2.resize(frame, None, fx=0.25, fy=0.25)
            if frame_nr % 4 == 0:
                every_frame(frame, time.time())
            frame_nr += 1
            if cv2.waitKey(1) & 0xFF == ord('q'):
                raise SystemExit
        return

    if args.file is not None:
        frame = cv2.imread(args.file)
        every_frame(frame, time.time())
        if cv2.waitKey() & 0xFF == ord('q'):
            raise SystemExit
        return

    if args.print_classification:
        print_classification = True

    # on every frame from the stream run stuff
    stream_video(every_frame)
Beispiel #21
0
import telebot
from flask import Flask, request

import settings

from classifier.classifier import Classifier
from classifier.data.image_processing import image_from_file

bot = telebot.TeleBot(settings.TOKEN,)
server = Flask(__name__)

# Init image classifier
classifier = Classifier(
    base_net_path=settings.BASIC_NET_PATH,
    refferi_net_path=settings.REFFERI_NET_PATH,
    white_net_path=settings.WHITE_NET_PATH,
    blue_net_path=settings.BLUE_NET_PATH,
    device=settings.DEVICE
)


@bot.message_handler(content_types=['photo'])
def get_photo_message(message):
    """
    Predict label of request photos.
    :param message: massage that contains photo
    :return: label of photo, string
    """
    # Download photo and save as file object
    telegram_file_id = message.photo[-1].file_id
    telegram_file = bot.get_file(telegram_file_id)
class Annotator:

    def __init__(self, db):
        self.db = db

        self.city = self.db["area"].find_one({
            "name": configuration.AREA
        })

        self.classifier = Classifier(self.db)


    def tokenize(self,tweet):
        stop_words_list = get_stop_words("en")

        tweet_text = tweet["text"]

        if tweet["truncated"]:
            tweet_text = tweet["extended_tweet"]["full_text"]

        tweet_text = re.sub(r"(?:\@|https?\://)\S+", "", tweet_text)

        tokens = [token for token in utils.simple_preprocess(
            tweet_text, deacc=False, min_len=3) if token not in stop_words_list]

        tweet["tokens"] = tokens

        return tweet

    def add_date(self,tweet):
        tweet["date"] = datetime.datetime.fromtimestamp(int(tweet["timestamp_ms"]) // 1000)
        return tweet

    def annotate_tweet_location(self, tweet):

        if tweet["geo"] is None and tweet["place"] is None:
            return tweet

        point = None
        if tweet["geo"] is not None:
            point = Point(tweet["geo"]["coordinates"][1], tweet["geo"]["coordinates"][0])

        for a in self.city["geojson"]["features"]:
            area = shape(a["geometry"])
            if (point is not None and area.contains(point)) or a["properties"]["name"] == tweet["place"]["name"]:
                tweet["area_name"] = a["properties"]["name"]
                #tweet["area_id"] = a["id"]
                print("Found a tweet in",tweet["area_name"])
                break

        return tweet

    def classify_tweet(self, tweet):
        return self.classifier.classify(tweet)


    def classify_offline(self):
        tweets = list(self.db["tweet"].find())

        print("Classifying tweets")
        for t in tweets:
            print(t["id"])
            c_tweet = self.classifier.classify(t)
            self.db["tweet"].update({"id": t["id"]}, {"$set": {"categories": c_tweet["categories"]}})

        print("Done")

    def tokenize_offline(self):
        tweets = list(self.db["tweet"].find())

        print("Updating tweets")
        for t in tweets:
            stop_words_list = get_stop_words("en")

            tweet_text = t["text"]

            if t["truncated"]:
                tweet_text = t["extended_tweet"]["full_text"]

            tweet_text = re.sub(r"(?:\@|https?\://)\S+", "", tweet_text)

            tokens = [token for token in utils.simple_preprocess(
                tweet_text, deacc=False, min_len=3) if token not in stop_words_list]

            query = {
                "_id": t["_id"]
            }

            update = {
                "$set": {
                    "tokens": tokens
                }
            }
            self.db["tweet"].update(query, update)

        print("Done")
Beispiel #23
0
if __name__ == "__main__":
    from torch.utils import data
    from sklearn.model_selection import train_test_split

    from generator.generator import Generator
    from discriminator.discriminator_semi import SemiSupervisedDiscriminator
    from classifier.classifier import Classifier
    from data.data_loader import ImageDataset, ImageTransform, make_datapath_list

    z_dim = 20
    image_size_g = 64
    image_size_d = 12
    num_classes = 10
    G = Generator(image_size_g, z_dim)
    D = SemiSupervisedDiscriminator(image_size_d, num_classes)
    C = Classifier(image_size_d, num_classes)

    G.apply(weights_init)
    D.apply(weights_init)

    print("Finish initialization of the network")

    label_list = list(range(num_classes))
    img_list, label_list = make_datapath_list(label_list)
    train_img_list, test_img_list, train_label_list, test_label_list = train_test_split(
        img_list, label_list, test_size=0.2)

    mean = (0.5, )
    std = (0.5, )
    train_dataset = ImageDataset(data_list=train_img_list,
                                 transform=ImageTransform(mean, std),
__author__ = 'dungdt'

import time
from classifier.classifier import Classifier
from classifier.data.dictionary import Dictionary
from classifier.data_reader import DataReader

if __name__ == '__main__':
    dictionary = Dictionary()
    dataReader = DataReader(dictionary)
    classifier = Classifier(dataReader, trainingDataPath='data/training',
                            testDataPath='data/test')

    print 'Training...'
    t = time.time()
    classifier.train()
    print 'Training time: %d' %(time.time() - t)

    t = time.time()
    print 'Testing...'
    print 'Accuracy: %s%%' % ('{:4.2f}'.format(classifier.test() * 100))
    print 'Testing time: %d' %(time.time() - t)

    testData = classifier.dataReader.readTestData(classifier.testDataPath)
    print classifier.classify(testData[0][0])
Beispiel #25
0
elif os.path.isfile('vcap-local.json'):
    with open('vcap-local.json') as f:
        vcap = json.load(f)
        print('Found local VCAP_SERVICES')
        creds = vcap['services']['cloudantNoSQLDB'][0]['credentials']
        user = creds['username']
        password = creds['password']
        url = 'https://' + creds['host']
        client = Cloudant(user, password, url=url, connect=True)
        client.create_database('trainer', throw_on_exists=False)
        client.create_database('synapse', throw_on_exists=False)

cache = dict()
if client is not None:
    # create Classifier cache on startup
    cache["intents"] = Classifier("intents", client)
    cache["intents"].load()
    cache["entities@timetables"] = Classifier("entities@timetables", client)
    cache["entities@timetables"].load()
    cache["entities@meal"] = Classifier("entities@meal", client)
    cache["entities@meal"].load()

# On Bluemix, get the port number from the environment variable PORT
# When running this app on the local machine, default the port to 8000
port = int(os.getenv('PORT', 8000))


def removekey(d, key):
    r = dict(d)
    del r[key]
    return r
Beispiel #26
0
def run(opt):

    # output dir
    if os.path.exists(opt.save_dir):
        shutil.rmtree(opt.save_dir)
    os.makedirs(opt.save_dir)

    # load dataset
    dataset = Dataloader(source=opt.source, imgsz=opt.img_size).dataset

    # load object detection model, and weights
    detector = Detector(detector_type=opt.detector_type,
                        cfg_file=opt.detector_cfg_file)
    detector.run_through_once(opt.img_size)  # 空跑一次

    # load object tracking model
    tracker = Tracker(tracker_type=opt.tracker_type,
                      cfg_file=opt.tracker_cfg_file)

    # load pose detection model
    poser = Poser(poser_type=opt.poser_type, cfg_file=opt.poser_cfg_file)

    # load classifier model
    clssifier = Classifier(classifier_type=opt.classifier_type,
                           cfg_file=opt.classifier_cfg_file)

    print(detector.device, detector.cfg)
    filt_with_txt = False  # 先分析一下status标注文件.txt,存在的才进行检测,这样能加快速度
    if filt_with_txt:
        from classifier.data_analyse import anaylise_label
        label_ret = anaylise_label()
        label_stems = [x[0] for x in label_ret]

    for img_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        # print(type(img), type(im0s))
        # print(type(im0s), im0s.shape)
        if dataset.is_camera:
            im0s = im0s[0]
            path = f'{path[0]}/{img_idx:0<6}.jpg'
        if filt_with_txt:
            fold_stem = path.split('/')[-2]
            idx = label_stems.index(fold_stem)
            # print(fold_stem, label_stems, idx)
            img_stem = Path(path).stem
            valid_stems = [Path(x).stem for x in label_ret[idx][-1]]
            in_it = f'track_{img_stem}' in valid_stems
            # print(path, in_it, label_ret[idx][-1][0])
            if not in_it:
                continue
        # img: [3, w, h], preprocess, inference, NMS,
        det_ret = detector.detect(
            path, img,
            im0s)  # detect result: nparray, [num_obj, 6] 6: xyxy,conf,cls
        # detector.imshow(im0s, det_ret)
        # track
        tra_ret = tracker.track(
            det_ret,
            im0s)  # track result: list, [num_obj, 7], 7: xyxy, cls, tid, trace
        # print(tra_ret[:, 5])
        # tracker.imshow(im0s, tra_ret, path)
        # pose detect
        pose_ret = poser.detect_pose(tra_ret, im0s, path, return_type='zzd')
        # zzd format: np.array(object): [num_obj, 10],10: xyxy cls tid trace keypoints kp_score proposal_score
        # print(pose_ret)
        poser.imshow(im0s, pose_ret, path, resize=(1280, 720))
        # classifier
        if opt.feature_save_dir is not None:  # 保存特征的
            clssifier.build_and_save_feature(pose_ret,
                                             path,
                                             save_dir=opt.feature_save_dir)
            print(f'\rsaving features: [{img_idx + 1:>3}/{len(dataset)}] ',
                  end='')
            continue

        # status_ret = clssifier.detect_status(pose_ret, path, is_camera=dataset.is_camera)
        # zzd format: np.array(object): [num_obj, 12], 12: 比10多了status_idx和status
        # clssifier.imshow(im0s, status_ret, show_name='x', resize=(1280, 720))
        # print(status_ret)

        if img_idx == 10:
            if cv2.waitKeyEx(0) == ord('q'):
                raise StopIteration
Beispiel #27
0
 def classify_pattern(cls, pattern):
     prediction = Classifier.classify(pattern)
     return cls.PATTERN_MAPPING[int(np.argmax(prediction))]
Beispiel #28
0
from classifier.classifier import Classifier

classifier = Classifier()
classifier.train_model()
print(classifier.is_question('do you hold a credit card'))
Beispiel #29
0
 def __init__(self):
     self.model = Model()
     self.model.model_load()
     self.r = Reader()
     self.r.read_corpus()
     self.tagger = Classifier(self.r.train_sents, self.model)
def main():
    classifier = Classifier()
    classifier.build_model()
    classifier.add_smoothing()
    classifier.spam_vocabulary_probs, classifier.ham_vocabulary_probs = classifier.write_model_data(
        'model.txt', classifier.vocabulary)
    classifier.test_model('baseline-result.txt',
                          classifier.spam_vocabulary_probs,
                          classifier.ham_vocabulary_probs)
    print("------Experiment 2, Stop Words Filtering------")
    classifier.experiment2_stop_words()
    print("------Experiment 3, Word Length Filtering------")
    classifier.experiment3_length_filtering()
    print("------Experiment 4, Frequency 1 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered0',
                                               lower_cutoff_frequency=1,
                                               higher_cutoff_frequency=1)
    print("------Experiment 4, Frequency <=5 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered1',
                                               lower_cutoff_frequency=0,
                                               higher_cutoff_frequency=5)
    print("------Experiment 4, Frequency <=10 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered2',
                                               lower_cutoff_frequency=0,
                                               higher_cutoff_frequency=10)
    print("------Experiment 4, Frequency <=15 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered3',
                                               lower_cutoff_frequency=0,
                                               higher_cutoff_frequency=15)
    print("------Experiment 4, Frequency <=20 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered4',
                                               lower_cutoff_frequency=0,
                                               higher_cutoff_frequency=20)
    print("------Experiment 4, Top 10 percent Filtering------")
    classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered0',
                                                   10)
    print("------Experiment 4, Top 15 percent Filtering------")
    classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered1',
                                                   15)
    print("------Experiment 4, Top 20 percent Filtering------")
    classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered2',
                                                   20)
    print("------Experiment 4, Top 25 percent Filtering------")
    classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered3',
                                                   25)

    experiment5_file_name = 'smoothing'

    for n in range(0, 11):
        smoothing_value = round((n * 0.1), 1)
        file_name = experiment5_file_name + str(smoothing_value)
        print("------Experiment 5, smoothing value %s------" % smoothing_value)
        classifier_5 = Classifier()
        classifier_5.build_model()
        classifier_5.add_smoothing(smoothing_value)
        classifier_5.spam_vocabulary_probs, classifier_5.ham_vocabulary_probs = classifier_5.write_model_data(
            file_name + 'model.txt',
            classifier_5.vocabulary,
            smoothing_value=smoothing_value)
        classifier_5.test_model(file_name + 'baseline-result.txt',
                                classifier_5.spam_vocabulary_probs,
                                classifier_5.ham_vocabulary_probs)
Beispiel #31
0
import sys
import os
from PIL import Image
from classifier.classifier import Classifier

cnn = Classifier(json_file='model.json', weights_file='model.h5')
exit_program = False
count_true = 0
count_false = 0
while (exit_program == False):
    type_input = input("Folder(F) or Single File(S)?: ")
    if type_input == "F" or type_input == "f":
        if not os.path.exists('animals_and_humans'):
            os.mkdir('animals_and_humans')
        if not os.path.exists('nothing'):
            os.mkdir('nothing')
        folder_name = input("Folder Name: ")
        if os.path.exists(folder_name):
            test_images = os.listdir(folder_name)
            if len(test_images) > 0:
                for image in test_images:
                    print(image)
                    if image.startswith('.'):
                        print(image + " not read")
                    else:
                        path_image = "./" + folder_name + "/" + image
                        animal, accuracy = cnn.predict_animal(path_image)
                        if (animal):
                            os.rename(path_image,
                                      "./animals_and_humans/" + image)
                            count_true += 1
Beispiel #32
0
def main(*args):
    """Predict the top K classes of an image.

    Args:
        *args: args to be parsed by the ArgumentParser

    Returns:
        None
    """
    # Instantiating with formatter_class argument will make default values print
    # in the help message.
    parser = argparse.ArgumentParser(
        description='Process an image & report results.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        'image_path',
        type=str,
        help=('path to the image to process or to a dataset ' +
              'directory with images to choose randomly from ' +
              'Ex: flowers/test/1/image_06743.jpg or ' + 'flowers/test'))
    parser.add_argument('checkpoint',
                        type=str,
                        help='path to the model checkpoint to load')
    parser.add_argument('--top_k',
                        type=int,
                        default=1,
                        help='Return top K most likely classes')
    parser.add_argument('--category_names',
                        type=str,
                        help='use a mapping of categories to real names')
    parser.add_argument('--gpu',
                        action='store_true',
                        help=('if available, use gpu to process the image ' +
                              'instead of the cpu'))
    args = parser.parse_args(args)

    if os.path.isdir(args.image_path):
        print(f'{args.image_path} is a directory.',
              'Choosing a random image to process.')
        image_path = get_random_image_from_dir(args.image_path)
        print(f'Using image: {image_path}')
    else:
        image_path = args.image_path

    if not os.path.isfile(args.checkpoint):
        print(f'ERROR: {args.checkpoint} is not a file.', file=sys.stderr)
        sys.exit(-1)

    if args.category_names:
        cat_to_name = load_json(args.category_names)
    else:
        cat_to_name = None

    if args.gpu:
        device = 'cuda'
        if not torch.cuda.is_available():
            print('ERROR: cuda is not available on this machine.',
                  'Use cpu for prediction instead.',
                  file=sys.stderr)
            sys.exit(-1)
    else:
        device = 'cpu'

    classifier = Classifier(checkpoint=args.checkpoint)
    probs, classes = classifier.predict(image_path,
                                        topk=args.top_k,
                                        device=device)

    if cat_to_name is not None:
        classes = [cat_to_name[c] for c in classes]
        class_len = len(max(cat_to_name.values(), key=len))
    else:
        class_len = 10  # padding needed to space column 1 title 'Class' below

    print(f'{"Class":{class_len}}{"Probability"}')
    for prob, class_ in zip(probs, classes):
        print(f'{class_:{class_len}}{prob:4.2f}')