def run(self): soup = super(ForbesScraper, self).get_soup_object() # Gets the breaking article from Forbes investing page headline = soup.find_all( "a", {"class": "headlink h1--dense card__color--benjamins-green"})[0] headline_text = headline.text headline_link = headline.get('href', '') print("----") print("Breaking article: %s" % (headline_text)) print("Breaking article link: %s" % (headline_link)) txt_classifier = Classifier(headline_text) print(txt_classifier.sentiment()) # Gets the editors' picks on the left side latest_picks = {} for latest_picks_article in soup.find_all( "a", {"class": "section-pick__title"}): self.article_link, link = latest_picks_article.get( 'href', ''), latest_picks_article.get('href', '') self.article_title, title = latest_picks_article.text, latest_picks_article.text latest_picks[link] = title print("----") print("Latest pick link title: %s" % (title)) print("Latest pick link: %s" % (link)) self.classify_headline(title)
def classify_headline(self, headline): # Set self.sentiment txt_classifier = Classifier(headline) sentiment = txt_classifier.sentiment() print(sentiment) self.sentiment = sentiment self.update_avgs()
def getIntent(): print(request.json) print(request.json['sentence']) request_object = request.json sentence = request.json['sentence'] if client is not None: if 'classifier' not in cache.keys(): cache["classifier"] = Classifier() classifier = cache["classifier"] result = classifier.classifyIntent(sentence) classification = dict() print(result) if len(result) > 0: print(result) if result[1] < classifier.ERROR_THRESHOLD: get_database_context().add_not_found_sentence(sentence) classification['intent'] = result[0] else: classification['intent'] = "" get_database_context().add_not_found_sentence(sentence) else: print("NO DATABASE") classification = dict() classification['intent'] = "NO DATABASE" response_object = removekey(request_object, "sentence") response_object["classifications"] = classification return jsonify(response_object)
def getIntent(): request_object = request.json sentence = request.json['sentence'] if client is not None: if 'intents' not in cache.keys(): cache["intents"] = Classifier("intents", client) classifier = cache["intents"] results = classifier.classify(sentence) classification = dict() if len(results) > 0: classification['intent'] = results[0][0] else: classification['intent'] = "" else: print("NO DATABASE") classification = dict() classification['intent'] = "NO DATABASE" response_object = removekey(request_object, "sentence") response_object["classifications"] = classification return jsonify(response_object)
def getEntity(): request_object = request.json sentence = request.json['sentence'] prior_intents = request.json['context']["priorIntent"]["intent"] if client is not None: classifier_name = "entities@" + prior_intents if classifier_name not in cache.keys(): cache[classifier_name] = Classifier(classifier_name, client) classifier = cache[classifier_name] results = classifier.classify(sentence) classification = dict() if len(results) > 0: classification['entity'] = results[0][0] else: classification['entity'] = "" else: print("NO DATABASE") classification = dict() classification['entity'] = "NO DATABASE" response_object = removekey(request_object, "sentence") response_object["classifications"] = classification return jsonify(response_object)
def getEntity(): request_object = request.json sentence = request.json['sentence'] if client is not None: if 'classifier' not in cache.keys(): cache["classifier"] = Classifier() classifier = cache["classifier"] # keep results = classifier.classifyEntity(sentence) # strip keep only name of entity classification = dict() if len(results) > 0: classification['entity'] = results[0][0] else: classification['entity'] = "" else: print("NO DATABASE") classification = dict() classification['entity'] = "NO DATABASE" response_object = removekey(request_object, "sentence") response_object["classifications"] = classification return jsonify(response_object)
def classify_pages(in_path, out_path): classifier = Classifier() with open(out_path, 'wb') as f: for site, html in utils.read_file_multiple(in_path): if classifier.classify(html): pickle.dump((site, html), f)
def __init__(self, db): self.db = db self.city = self.db["area"].find_one({ "name": configuration.AREA }) self.classifier = Classifier(self.db)
def train_Engine(): result = get_trainer().start_training() if result: cache["classifier"] = Classifier() cache["classifier"].load(DatabaseContext(client), get_cos_context()) return jsonify("Success! Engine was trained"), 200 else: return jsonify("Error! Engine wasn't trained.."), 404
def main(config_): config = Config(config_) print("Model Framework: ", config.get("framework"), " Model Labels: ", config.get("labels")) broker = Broker(config) broker.listen() classifier = Classifier(config) thread = threading.Thread(target=process_request, kwargs={"broker": broker, "classifier": classifier}) thread.start()
def trainIntents(): if client is not None: intents = Trainer("intents", client) intents.start_training() if 'intents' not in cache.keys(): cache['intents'] = Classifier('intents', client) else: cache['intents'].load() return jsonify([]) else: print("NO DATABASE") return "NO DATABASE"
def __call__(self): # test # self.train = self.train.head(200) # self.test = self.test.head(100) self.clf = Classifier(output_folder=self.output_folder, RS=15, train=self.train, test=self.test, fold_splits=self.splits, clf_name=self.clf_name, mapping_dict=config.mapping_dict) self.clf() print('Saved to %s' % self.output_folder)
def trainEntity(): intent = request.json['intent'] if client is not None: classifier_name = "entities@" + intent entities = Trainer(classifier_name, client) entities.start_training() if classifier_name not in cache.keys(): cache[classifier_name] = Classifier(classifier_name, client) else: cache[classifier_name].load() return jsonify([]) else: print("NO DATABASE") return "NO DATABASE"
def main(): classifier = Classifier(model_name="random_forest") logger.debug( "top 20 feature importances: {}".format( get_feature_importance(classifier) ) ) test_features, test_labels = get_test_data("test") logger.debug( "classification report: {}".format( get_classification_report( test_labels["is_returning_customer"].values, classifier.classify(test_features)) ) )
def compare_crawler(): heuristic_file = os.path.join(consts.DATA_DIR, 'using-heuristic-pages.pickle') bsf_file = os.path.join(consts.DATA_DIR, 'bfs-pages.pickle') hr_bfs = os.path.join(consts.RESULTS_DIR, 'bfs_harvest_ratio_results.csv') hr_heuristic = os.path.join(consts.RESULTS_DIR, 'heuristic_harvest_ratio_results.csv') if not os.path.exists(heuristic_file): crawl(True, heuristic_file) if not os.path.exists(bfs_file): crawl(True, bsf_file) classifier = Classifier() harvest_ratio(heuristic_file, hr_heuristic, classifier) harvest_ratio(bsf_file, hr_bfs, classifier)
def testIntent(): request_object = request.json sentence = request.json['sentence'] if client is not None: if sentence == 'populate': # populate database with base data and train all neuronal netwroks populate_intents(client) populate_entities_for_meal(client) populate_entities_for_timetables(client) populate_entities_for_navigation(client) cache["intents"].load() cache["entities@timetables"].load() cache["entities@meal"].load() classification = dict() classification['intent'] = "Populated" else: if 'intents' not in cache.keys(): cache["intents"] = Classifier("intents", client) classifier = cache["intents"] results = classifier.classify(sentence) classification = dict() if len(results) > 0: classification['intent'] = results[0][0] else: classification['intent'] = "" else: print("NO DATABASE") classification = dict() classification['intent'] = "NO DATABASE" response_object = removekey(request_object, "sentence") response_object["classifications"] = classification return 'Results: %s' % classification['intent']
def main(): global reporting, print_classification, classifier args = get_args() # load either web or pop-up reporting based on args reporting_module = 'reporting.' + ('web' if args.web else 'popup') print("Loading " + reporting_module) reporting = importlib.import_module(reporting_module) classifier = Classifier(args.age_gender) # if process is killed with ctrl+c display stats signal.signal(signal.SIGINT, sigint_handler) if args.video is not None: cap = cv2.VideoCapture(args.video) frame_nr = 0 while cap.isOpened(): ret, frame = cap.read() frame = cv2.resize(frame, None, fx=0.25, fy=0.25) if frame_nr % 4 == 0: every_frame(frame, time.time()) frame_nr += 1 if cv2.waitKey(1) & 0xFF == ord('q'): raise SystemExit return if args.file is not None: frame = cv2.imread(args.file) every_frame(frame, time.time()) if cv2.waitKey() & 0xFF == ord('q'): raise SystemExit return if args.print_classification: print_classification = True # on every frame from the stream run stuff stream_video(every_frame)
def load_sites_feeds(): from tech_rss.models import Site fix_multiprocessing() clf = Classifier() for site in Site.objects.all(): print('Starting {}'.format(site.domain)) news = site.get_new_news() if not news: continue categories = clf.predict(news) for category, page in zip(categories, news): print(CATEGORIES_SHORT[category]) print(page['title'], '\n') url, title = save_post(category, page, site) users = site.users.filter(categories__contains=[category]) users_id = [getattr(user, 'id') for user in users] send_post_to_subscribers(TelegramBot, users_id, url, title)
def main(): classifier = Classifier() classifier.build_model() classifier.add_smoothing() classifier.spam_vocabulary_probs, classifier.ham_vocabulary_probs = classifier.write_model_data( 'model.txt', classifier.vocabulary) classifier.test_model('baseline-result.txt', classifier.spam_vocabulary_probs, classifier.ham_vocabulary_probs) print("------Experiment 2, Stop Words Filtering------") classifier.experiment2_stop_words() print("------Experiment 3, Word Length Filtering------") classifier.experiment3_length_filtering() print("------Experiment 4, Frequency 1 Filtering------") classifier.experiment4_frequency_filtering(file_name='frequencyFiltered0', lower_cutoff_frequency=1, higher_cutoff_frequency=1) print("------Experiment 4, Frequency <=5 Filtering------") classifier.experiment4_frequency_filtering(file_name='frequencyFiltered1', lower_cutoff_frequency=0, higher_cutoff_frequency=5) print("------Experiment 4, Frequency <=10 Filtering------") classifier.experiment4_frequency_filtering(file_name='frequencyFiltered2', lower_cutoff_frequency=0, higher_cutoff_frequency=10) print("------Experiment 4, Frequency <=15 Filtering------") classifier.experiment4_frequency_filtering(file_name='frequencyFiltered3', lower_cutoff_frequency=0, higher_cutoff_frequency=15) print("------Experiment 4, Frequency <=20 Filtering------") classifier.experiment4_frequency_filtering(file_name='frequencyFiltered4', lower_cutoff_frequency=0, higher_cutoff_frequency=20) print("------Experiment 4, Top 10 percent Filtering------") classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered0', 10) print("------Experiment 4, Top 15 percent Filtering------") classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered1', 15) print("------Experiment 4, Top 20 percent Filtering------") classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered2', 20) print("------Experiment 4, Top 25 percent Filtering------") classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered3', 25) experiment5_file_name = 'smoothing' for n in range(0, 11): smoothing_value = round((n * 0.1), 1) file_name = experiment5_file_name + str(smoothing_value) print("------Experiment 5, smoothing value %s------" % smoothing_value) classifier_5 = Classifier() classifier_5.build_model() classifier_5.add_smoothing(smoothing_value) classifier_5.spam_vocabulary_probs, classifier_5.ham_vocabulary_probs = classifier_5.write_model_data( file_name + 'model.txt', classifier_5.vocabulary, smoothing_value=smoothing_value) classifier_5.test_model(file_name + 'baseline-result.txt', classifier_5.spam_vocabulary_probs, classifier_5.ham_vocabulary_probs)
from classifier.classifier import Classifier classifier = Classifier() classifier.train_model() print(classifier.is_question('do you hold a credit card'))
def main(*args): """Train the model. Args: *args: args to be parsed by the ArgumentParser Returns: None """ # Instantiating with formatter_class argument will make default values print # in the help message. parser = argparse.ArgumentParser( description=('Train a new network on a dataset and save the model as ' + 'a checkpoint'), formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('data_directory', type=str, help=('path to the directory containing the ' + 'training, validation and testing sets.')) parser.add_argument('--save_dir', type=str, default='.', help='set the directory to save checkpoints in') parser.add_argument('--checkpoint', type=str, help='load a checkpoint to continue training') parser.add_argument('--arch', type=str.lower, default='alexnet', choices=[k.lower() for k in Classifier.IMAGENET_MODELS.keys()], help='choose the model architecture') parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate to use while training') parser.add_argument('--hidden_units', type=int, nargs='+', default=[4096, 1000], help="sizes of the classifier's hidden layers") parser.add_argument('--epochs', type=int, default=3, help='number of epochs to go through during training') parser.add_argument('--no_validate', action='store_true', help=("don't validate using validation set during " + "training"), ) parser.add_argument('--test_model', action='store_true', help=('use test dataset to test model accuracy after ' + 'training')) parser.add_argument('--gpu', action='store_true', help=('use the gpu to train the network if one is ' + 'available')) parser.add_argument('--no_active_session', action='store_true', help="don't keep session alive (if on a local machine)") parser.add_argument('--no_save_checkpoint', action='store_true', help=("don't save a checkpoint after training to " + "save disk space")) parser.add_argument('--write_log_file', action='store_true', help=('write training loss and accuracy data to a ' + 'log file at {save_dir}/{model_arch}.out')) args = parser.parse_args(args) keep_active = not args.no_active_session # whether we need active_session validate = not args.no_validate # whether to use validation set save_checkpoint = not args.no_save_checkpoint data_dir = args.data_directory.rstrip('/') try: num_categories = len([ d for d in os.listdir(data_dir + '/test') if d.isnumeric() ]) except FileNotFoundError: print(f'ERROR: {data_dir} not found.', file=sys.stderr) sys.exit(-1) except NotADirectoryError: print(f'ERROR: {data_dir} is not a directory.', file=sys.stderr) sys.exit(-1) if args.gpu: device = 'cuda' if not torch.cuda.is_available(): print('ERROR: cuda is not available on this machine.', 'Use cpu for training instead.', file=sys.stderr) sys.exit(-1) else: device = 'cpu' if args.checkpoint: trainer = ModelTrainer( data_dir, classifier=Classifier(checkpoint=args.checkpoint) ) else: trainer = ModelTrainer( data_dir, model_architecture=args.arch, output_size=num_categories, hidden_layers=args.hidden_units, learn_rate=args.learning_rate ) save_dir = args.save_dir.rstrip('/') try: os.listdir(save_dir) except FileNotFoundError: os.mkdir(save_dir) except NotADirectoryError: print(f'WARNING: {save_dir} is not a directory. ' + 'Saving checkpoint and writing any training logs to current ' + 'directory instead.', file=sys.stderr) save_dir = '.' with open(f'{save_dir}/{args.arch}.txt', 'w') \ if args.write_log_file else dont_open() as log_file: try: with active_session() if keep_active else no_context(): trainer.train_classifier(validate=validate, num_epochs=args.epochs, device=device, output_file=log_file, print_status=True) except (NewConnectionError, ConnectionError) as e: print('Exception raised in active_session context manager.', file=sys.stderr) print('If running on a local machine, use', '--no_active_session flag.', file=sys.stderr) print(e, file=sys.stderr) sys.exit(-1) if save_checkpoint: trainer.classifier.save_checkpoint(save_dir + '/checkpoint.pth') if args.test_model: try: with active_session() if keep_active else no_context(): accuracy = trainer.test_accuracy(device=device, print_status=True) except (NewConnectionError, ConnectionError) as e: print('Exception raised in active_session context manager.', file=sys.stderr) print('If running on a local machine, use', '--no_active_session flag.', file=sys.stderr) print(e, file=sys.stderr) sys.exit(-1) msg = f'Test Accuracy: {accuracy*100:.4f}%' print(msg) if args.write_log_file: with open(f'{save_dir}/{args.arch}.txt', 'a') as log_file: print(msg, file=log_file)
elif os.path.isfile('vcap-local.json'): with open('vcap-local.json') as f: vcap = json.load(f) print('Found local VCAP_SERVICES') creds = vcap['services']['cloudantNoSQLDB'][0]['credentials'] user = creds['username'] password = creds['password'] url = 'https://' + creds['host'] client = Cloudant(user, password, url=url, connect=True) client.create_database('trainer', throw_on_exists=False) client.create_database('synapse', throw_on_exists=False) cache = dict() if client is not None: # create Classifier cache on startup cache["intents"] = Classifier("intents", client) cache["intents"].load() cache["entities@timetables"] = Classifier("entities@timetables", client) cache["entities@timetables"].load() cache["entities@meal"] = Classifier("entities@meal", client) cache["entities@meal"].load() # On Bluemix, get the port number from the environment variable PORT # When running this app on the local machine, default the port to 8000 port = int(os.getenv('PORT', 8000)) def removekey(d, key): r = dict(d) del r[key] return r
import telebot from flask import Flask, request import settings from classifier.classifier import Classifier from classifier.data.image_processing import image_from_file bot = telebot.TeleBot(settings.TOKEN,) server = Flask(__name__) # Init image classifier classifier = Classifier( base_net_path=settings.BASIC_NET_PATH, refferi_net_path=settings.REFFERI_NET_PATH, white_net_path=settings.WHITE_NET_PATH, blue_net_path=settings.BLUE_NET_PATH, device=settings.DEVICE ) @bot.message_handler(content_types=['photo']) def get_photo_message(message): """ Predict label of request photos. :param message: massage that contains photo :return: label of photo, string """ # Download photo and save as file object telegram_file_id = message.photo[-1].file_id telegram_file = bot.get_file(telegram_file_id)
def main(*args): """Predict the top K classes of an image. Args: *args: args to be parsed by the ArgumentParser Returns: None """ # Instantiating with formatter_class argument will make default values print # in the help message. parser = argparse.ArgumentParser( description='Process an image & report results.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( 'image_path', type=str, help=('path to the image to process or to a dataset ' + 'directory with images to choose randomly from ' + 'Ex: flowers/test/1/image_06743.jpg or ' + 'flowers/test')) parser.add_argument('checkpoint', type=str, help='path to the model checkpoint to load') parser.add_argument('--top_k', type=int, default=1, help='Return top K most likely classes') parser.add_argument('--category_names', type=str, help='use a mapping of categories to real names') parser.add_argument('--gpu', action='store_true', help=('if available, use gpu to process the image ' + 'instead of the cpu')) args = parser.parse_args(args) if os.path.isdir(args.image_path): print(f'{args.image_path} is a directory.', 'Choosing a random image to process.') image_path = get_random_image_from_dir(args.image_path) print(f'Using image: {image_path}') else: image_path = args.image_path if not os.path.isfile(args.checkpoint): print(f'ERROR: {args.checkpoint} is not a file.', file=sys.stderr) sys.exit(-1) if args.category_names: cat_to_name = load_json(args.category_names) else: cat_to_name = None if args.gpu: device = 'cuda' if not torch.cuda.is_available(): print('ERROR: cuda is not available on this machine.', 'Use cpu for prediction instead.', file=sys.stderr) sys.exit(-1) else: device = 'cpu' classifier = Classifier(checkpoint=args.checkpoint) probs, classes = classifier.predict(image_path, topk=args.top_k, device=device) if cat_to_name is not None: classes = [cat_to_name[c] for c in classes] class_len = len(max(cat_to_name.values(), key=len)) else: class_len = 10 # padding needed to space column 1 title 'Class' below print(f'{"Class":{class_len}}{"Probability"}') for prob, class_ in zip(probs, classes): print(f'{class_:{class_len}}{prob:4.2f}')
def __init__(self, dataset_root='flowers', classifier=None, **classifier_kwargs): """You can create a ModelTrainer with a Classifier object if you have a model checkpoint to load and want to continue training, or you can pass a valid set of keyword arguments to create a new Classifier object before training. Args: dataset_root (str): the directory where the train, valid, and test datasets are located from. classifier (classifier.Classifier): the Classifier containing the PyTorch model to train. If no Classifier is given as an argument, a new Classifier can be created using the classifier_kwargs passed instead. **classifier_kwargs: if no classifier is given, these will be used to create a new Classifier to train. See Classifier.__init__() in classifier.py for valid kwargs. Examples: trainer = ModelTrainer( 'flowers', classifier=Classifier(checkpoint='checkpoint.pth') ) trainer = ModelTrainer( 'flowers', model_architecture='alexnet', output_size=102, hidden_layers=[4096, 1000], learn_rate=0.005 ) """ self._data_dir = dataset_root self._train_dir = self._data_dir + '/train' self._valid_dir = self._data_dir + '/valid' self._test_dir = self._data_dir + '/test' # Define transforms for training, validation and testing sets. # validation and test transforms are the same. self._data_transforms = { 'train': transforms.Compose([ transforms.RandomRotation(30), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'valid': transforms.Compose([ transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'test': transforms.Compose([ transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } # Load the datasets with ImageFolder self._image_datasets = { 'train': datasets.ImageFolder(self._train_dir, transform=self._data_transforms['train']), 'valid': datasets.ImageFolder(self._valid_dir, transform=self._data_transforms['valid']), 'test': datasets.ImageFolder(self._test_dir, transform=self._data_transforms['test']) } # Using the image datasets and the transforms, define the dataloaders self._dataloaders = { 'train': torch.utils.data.DataLoader(self._image_datasets['train'], batch_size=64, shuffle=True), 'valid': torch.utils.data.DataLoader(self._image_datasets['valid'], batch_size=64), 'test': torch.utils.data.DataLoader(self._image_datasets['test'], batch_size=64) } if classifier: self.classifier = classifier else: self.classifier = Classifier( class_to_idx=self._image_datasets['train'].class_to_idx, **classifier_kwargs)
def train(args): # Read dataset source.json if not os.path.exists(os.path.join(DATASETS, args.dataset_name)): print(f"Dataset {args.dataset_name} does not exist", file=sys.stderr) sys.exit(1) dataset_source_fname = os.path.join(DATASETS, args.dataset_name, "source.json") with open(dataset_source_fname, "r") as f: dataset_source = json.load(f) if "weights" not in dataset_source: print( f"Dataset {args.dataset_name} was not created using noise classes.", file=sys.stderr) print( "An attempt will be made to match the labels to existing classes.", file=sys.stderr, flush=True) dataset_source["weights"] = dict( (label, 1 / len(dataset_source["labels"])) for label in dataset_source["labels"]) if args.update is not None: noise_classes_old = load_noise_classes(args, False) if noise_classes_old is None: args.update = None # Iterate over noise classes print("Initializing classifiers") noise_classes = NoiseClass.from_file(args.noise_classes) default_settings = None for label in dataset_source["weights"]: if label not in noise_classes: print( f"Label {label} of dataset {args.dataset_name} is not among the defined noise classes in {args.noise_classes}", file=sys.stderr, flush=True) continue nc = noise_classes[label] if nc.classifiers is None: # Pseudo-class continue train_all = True if args.update is not None and nc.id in noise_classes_old: nc_old = noise_classes_old[nc.id] train_all = False if json.dumps(nc.degradations) != json.dumps(nc_old.degradations): print( f"Warning: the degradation definition of {nc.id} has changed.", file=sys.stderr) print( "If you have not already, please generate a new dataset using degradations/create_dataset.py", file=sys.stderr, flush=True) print( f"All classifiers in {nc.id} will be retrained from scratch" ) train_all = True print(label + ":") for classifier_ind, classifier_spec in enumerate(nc.classifiers): print( f"\t {classifier_spec['type']} (feature: {classifier_spec['feature']}", end="") if len(nc.classifiers) > 1: print(", weight:", classifier_spec.get('weight', 1), end="") if "bootstrap" in classifier_spec: print(", bootstrap:", classifier_spec["bootstrap"], end="") if classifier_spec.get("vad", None): print(", VAD:", "unvoiced" if classifier_spec["vad"].get( "inverse", False) else "voiced", end="") print(")") # Setup classifier specifications classifier_complete_defaults(classifier_spec, args.classifier_defaults, default_settings) # Initialize or copy old classifier _type = next((m for m in available_models if m.__name__ == classifier_spec["type"]), None) if _type is None: print( f"Unrecognized classifier type {classifier_spec['type']}", file=sys.stderr) if classifier_spec['type'] == 'GenHMM': raise genhmm_err config = { _type.__name__: dict((cat, classifier_spec[cat]) for cat in setting_categories if classifier_spec[cat] is not None) } if args.update is not None and nc.id in noise_classes_old and classifier_ind < len( nc_old.classifiers): classifier_spec_old = nc_old.classifiers[classifier_ind] if not train_all and nc.id not in args.update \ and classifier_specs_equal(classifier_spec, classifier_spec_old): classifier_spec["instance"] = classifier_spec_old[ "instance"] classifier_spec["notrain"] = True continue classifier_spec["instance"] = Classifier( [nc.id, nc.id + " (negative)"], _type, config, silent=False) # Prune noise_classes for nc_id in tuple(noise_classes.keys()): classifiers = noise_classes[nc_id].classifiers if classifiers is None or sum( 1 for spec in classifiers if spec.get("instance") is not None) == 0: del noise_classes[nc_id] # Check if skipping training is allowed if args.update is not None and set(noise_classes.keys()) != set( noise_classes_old.keys()): print("The set of noise classes have changed", file=sys.stderr) print( "All classifiers for all noise classes must be retrained from scratch", file=sys.stderr, flush=True) for nc in noise_classes.values(): for spec in nc.classifiers: if "notrain" in spec: del spec["notrain"] # Train classifiers grouped by feature rng = None spec_inds_sorted = _sort_spec_inds(noise_classes) filenames, classes, labels = read_dataset(args.dataset_name, "train") for i, (spec, nc, feats, idxs) in enumerate( _iterate_classifiers(spec_inds_sorted, filenames, args.recompute)): print(f"Training ({i + 1}/{len(spec_inds_sorted)})") if args.update is not None and spec.get("notrain", False): print("Keeping old classifier") del spec["notrain"] continue label_ind, = np.where(classes == spec["instance"].noise_types[0])[0] labels_binary = labels[:, label_ind] labels_binary = np.column_stack((labels_binary, ~labels_binary)) # Bootstrapping if spec.get("bootstrap", False): if len(nc.classifiers) == 1: print( "Warning: Bootstrapping a single classifier - please use model averaging or the entire training set.", file=sys.stderr, flush=True) if rng is None: rng = np.random.default_rng() sample_inds = rng.choice(np.arange(len(filenames)), len(filenames)) feats_used = [ feats[feat_ind] for sample_ind in sample_inds for feat_ind in np.where(idxs == sample_ind)[0] ] idxs_used = [ idx for idx, sample_ind in enumerate(sample_inds) for _ in range(np.sum(idxs == sample_ind)) ] labels_used = labels_binary[sample_inds, :] #for li in range(len(sample_inds)): # for fi in np.where(idxs_used == idxs_used[li])[0]: # assert labels_used[fi] == labels_binary[sample_inds[li]] else: feats_used = feats idxs_used = idxs labels_used = labels_binary spec["instance"].train(feats_used, idxs_used, labels_used, args.models) print("Training complete") # Save fname = os.path.join(args.models, args.dataset_name + ".noiseclasses") if os.path.exists(fname): print("Overwriting", fname) with open(fname, "wb") as f: pickle.dump(noise_classes, f) print("Saved to", fname)
def __init__(self): self.model = Model() self.model.model_load() self.r = Reader() self.r.read_corpus() self.tagger = Classifier(self.r.train_sents, self.model)
if __name__ == "__main__": from torch.utils import data from sklearn.model_selection import train_test_split from generator.generator import Generator from discriminator.discriminator_semi import SemiSupervisedDiscriminator from classifier.classifier import Classifier from data.data_loader import ImageDataset, ImageTransform, make_datapath_list z_dim = 20 image_size_g = 64 image_size_d = 12 num_classes = 10 G = Generator(image_size_g, z_dim) D = SemiSupervisedDiscriminator(image_size_d, num_classes) C = Classifier(image_size_d, num_classes) G.apply(weights_init) D.apply(weights_init) print("Finish initialization of the network") label_list = list(range(num_classes)) img_list, label_list = make_datapath_list(label_list) train_img_list, test_img_list, train_label_list, test_label_list = train_test_split( img_list, label_list, test_size=0.2) mean = (0.5, ) std = (0.5, ) train_dataset = ImageDataset(data_list=train_img_list, transform=ImageTransform(mean, std),
import sys import os from PIL import Image from classifier.classifier import Classifier cnn = Classifier(json_file='model.json', weights_file='model.h5') exit_program = False count_true = 0 count_false = 0 while (exit_program == False): type_input = input("Folder(F) or Single File(S)?: ") if type_input == "F" or type_input == "f": if not os.path.exists('animals_and_humans'): os.mkdir('animals_and_humans') if not os.path.exists('nothing'): os.mkdir('nothing') folder_name = input("Folder Name: ") if os.path.exists(folder_name): test_images = os.listdir(folder_name) if len(test_images) > 0: for image in test_images: print(image) if image.startswith('.'): print(image + " not read") else: path_image = "./" + folder_name + "/" + image animal, accuracy = cnn.predict_animal(path_image) if (animal): os.rename(path_image, "./animals_and_humans/" + image) count_true += 1
def run(opt): # output dir if os.path.exists(opt.save_dir): shutil.rmtree(opt.save_dir) os.makedirs(opt.save_dir) # load dataset dataset = Dataloader(source=opt.source, imgsz=opt.img_size).dataset # load object detection model, and weights detector = Detector(detector_type=opt.detector_type, cfg_file=opt.detector_cfg_file) detector.run_through_once(opt.img_size) # 空跑一次 # load object tracking model tracker = Tracker(tracker_type=opt.tracker_type, cfg_file=opt.tracker_cfg_file) # load pose detection model poser = Poser(poser_type=opt.poser_type, cfg_file=opt.poser_cfg_file) # load classifier model clssifier = Classifier(classifier_type=opt.classifier_type, cfg_file=opt.classifier_cfg_file) print(detector.device, detector.cfg) filt_with_txt = False # 先分析一下status标注文件.txt,存在的才进行检测,这样能加快速度 if filt_with_txt: from classifier.data_analyse import anaylise_label label_ret = anaylise_label() label_stems = [x[0] for x in label_ret] for img_idx, (path, img, im0s, vid_cap) in enumerate(dataset): # print(type(img), type(im0s)) # print(type(im0s), im0s.shape) if dataset.is_camera: im0s = im0s[0] path = f'{path[0]}/{img_idx:0<6}.jpg' if filt_with_txt: fold_stem = path.split('/')[-2] idx = label_stems.index(fold_stem) # print(fold_stem, label_stems, idx) img_stem = Path(path).stem valid_stems = [Path(x).stem for x in label_ret[idx][-1]] in_it = f'track_{img_stem}' in valid_stems # print(path, in_it, label_ret[idx][-1][0]) if not in_it: continue # img: [3, w, h], preprocess, inference, NMS, det_ret = detector.detect( path, img, im0s) # detect result: nparray, [num_obj, 6] 6: xyxy,conf,cls # detector.imshow(im0s, det_ret) # track tra_ret = tracker.track( det_ret, im0s) # track result: list, [num_obj, 7], 7: xyxy, cls, tid, trace # print(tra_ret[:, 5]) # tracker.imshow(im0s, tra_ret, path) # pose detect pose_ret = poser.detect_pose(tra_ret, im0s, path, return_type='zzd') # zzd format: np.array(object): [num_obj, 10],10: xyxy cls tid trace keypoints kp_score proposal_score # print(pose_ret) poser.imshow(im0s, pose_ret, path, resize=(1280, 720)) # classifier if opt.feature_save_dir is not None: # 保存特征的 clssifier.build_and_save_feature(pose_ret, path, save_dir=opt.feature_save_dir) print(f'\rsaving features: [{img_idx + 1:>3}/{len(dataset)}] ', end='') continue # status_ret = clssifier.detect_status(pose_ret, path, is_camera=dataset.is_camera) # zzd format: np.array(object): [num_obj, 12], 12: 比10多了status_idx和status # clssifier.imshow(im0s, status_ret, show_name='x', resize=(1280, 720)) # print(status_ret) if img_idx == 10: if cv2.waitKeyEx(0) == ord('q'): raise StopIteration