def run(): start_time = time() data_cand, data_part, full_data = load_data() # numeric_parties = full_data.party.map(party_map) train_c, test_c = train_test_split(data_cand, test_size=0.2) train_p, test_p = train_test_split(data_part, test_size=0.2) candidatos_clf = Classifier(train_c.drop('candidatoId', axis=1), train_c.candidatoId) partidos_clf = Classifier(train_p.drop('idPartido', axis=1), train_p.idPartido) cand_solver = candidatos_clf._predict() n_cand, pca_cand_solver = candidatos_clf._pca() part_solver = partidos_clf._predict() n_part, pca_part_solver = partidos_clf._pca() cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, cand_solver) pca_cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, pca_cand_solver, n_cand) part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, part_solver) pca_part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, pca_part_solver, n_part) output_results(f'CANDIDATOS | {cand_solver}', test_c.candidatoId, cand_pred) output_results(f'CANDIDATOS_PCA | {pca_cand_solver}, {n_cand}', test_c.candidatoId, pca_cand_pred) output_results(f'PARTIDOS | {part_solver}', test_p.idPartido, part_pred) output_results(f'PARTIDOS_PCA | {pca_part_solver}, {n_part}', test_p.idPartido, pca_part_pred) cand_part_target, cand_part_pred = candidato_mapper(test_c.candidatoId, cand_pred) output_results(f'PARTIDOS CON CANDIDATO | {cand_solver}', cand_part_target, cand_part_pred) cm_cand = ConfusionMatrix(test_c.candidatoId, cand_pred) cm_pca_cand = ConfusionMatrix(test_c.candidatoId, pca_cand_pred) cm_part = ConfusionMatrix(test_p.idPartido, part_pred) cm_pca_part = ConfusionMatrix(test_p.idPartido, pca_part_pred) cm_cand_part = ConfusionMatrix(cand_part_target, cand_part_pred) elapsed_time = time() - start_time print(f'----------------------------------------') print(f'TOTAL TIME: {datetime.timedelta(seconds=elapsed_time)}') result = { 'data': { 'candidatos': (train_c, test_c), 'partidos': (train_p, test_p), }, 'results': { 'candidatos': (test_c.candidatoId, cand_pred), 'candidatos_pca': (test_c.candidatoId, pca_cand_pred), 'partidos': (test_p.idPartido, part_pred), 'partidos_pca': (test_p.idPartido, pca_part_pred), 'partidos_candidatos': (cand_part_target, cand_part_pred) }, 'matrices': { 'candidatos': cm_cand, 'candidatos_pca': cm_pca_cand, 'partidos': cm_part, 'partidos_pca': cm_pca_part, 'partidos_candidatos': cm_cand_part } } return result
def train(cfg): cnn = Classifier(input_shape=cfg["shape"], batch_size=cfg["batch"]) if cfg["model"] == "AlexNet": cnn.set_default_AlexNet_Model() else: arch = cfg["arch"] cnn.set_custom_model(conv_layers=arch["conv"], dense_layers=arch["dense"]) if cfg["random"]: cnn.random_boost = True if cfg["pre_trained"] is not None: dir_model = glob( os.path.join(st.DIR_LOG, "RUNNING", "*" + cfg["pre_trained"] + "*")) assert len(dir_model) == 1 # model_file = os.path.join(dir_model[0], "'model-086-1.000000.ckpt") # cnn.model.load_weights(model_file) cnn.model.load_weights(os.path.join(dir_model[0], "cp-model.ckpt")) val_data = cnn.get_data_generator(st.DATA_VALIDATION_DIR, dip_filter=cfg["dip"]) train_data = cnn.get_data_generator(st.DATA_TRAIN_DIR, dip_filter=cfg["dip"]) cnn.set_log_name(cfg) cnn.model_select = cfg["msk"] cnn.lr["lr"] = cfg["lr"] cnn.lr["decay_steps"] = cfg["dc_st"] cnn.lr["decay_rate"] = cfg["dc"] cnn.lr["momentum"] = cfg["mt"] cnn.callbacks.append( TensorBoard(log_dir=os.path.join(st.DIR_LOG, "RUNNING", cnn.log_name))) cnn.train(gen_train=train_data, gen_val=val_data, epochs=cfg["epochs"])
def run_models(self): """run all the models defined in confi file Save model in pkl format, Save scores in csv files. Params: CONFIG (dict): configurations of task, SEARCH_SPACE (dict): hyperparameters of models, df (pandas dataframe): data frame read from raw data, model: Classifier instance, all_metrics (list): list to save all model training results. """ CONFIG = self.load_config() DATA_PATH = self.DATA_PATH SEARCH_SPACE = self.SEARCH_SPACE df = self.load_data() model = Classifier(CONFIG, SEARCH_SPACE, df) all_metrics = [] for model_name in self.CONFIG['RUN_MODELS']: model_name, grid_cv, best_model, X_test, y_test = model.train_model( model_name) metrics = model.gen_metrics(model_name, grid_cv, best_model, X_test, y_test) all_metrics.append(metrics) with open( f'{self.CONFIG["MODEL_DIR"]}/{model_name}_best_model.pkl', 'wb') as f: pickle.dump(best_model, f) df_all_metrics = pd.concat(all_metrics) df_all_metrics.to_csv( f'{self.CONFIG["REPORT_DIR"]}/models_metrics.csv', index=False)
def eval_model(cfg): cnn = Classifier(input_shape=cfg["shape"]) cnn.set_default_AlexNet_Model() # if cfg["pre_trained"] is not None: # dir_model = glob(os.path.join(st.DIR_LOG, cfg["log"], "*" + cfg["pre_trained"] + "*")) # assert len(dir_model) == 1, "the len of chekpoint list is {}".format(len(dir_model)) # cnn.model.load_weights(os.path.join(dir_model[0], "cp-model.ckpt")) assert cfg["pre_trained"] is not None dir_model = glob( os.path.join(st.DIR_LOG, cfg["log"], "*" + cfg["pre_trained"] + "*")) assert len(dir_model) == 1, "the len of chekpoint list is {}".format( len(dir_model)) model_file = os.path.join(dir_model[0], "cp-model.ckpt") # model_file = os.path.join(dir_model[0], "'model-086-1.000000.ckpt") cnn.model.load_weights(model_file) dt = Data() sum_pred = np.zeros(6, ) lenght_data = np.zeros(6, ) for idx in range(dt.length['train']): image_file, label_gt = dt.get_image_file('train', idx, encode_label=True) label_est = cnn.model_predict(image_file, cfg["dip"]) lenght_data[label_gt] += 1 if label_est == label_gt: sum_pred[label_gt] += 1 acc = sum_pred / lenght_data print("{} - Acc: {}".format((label_gt, label_est), acc))
def eval_model(cfg): cnn = Classifier(input_shape=cfg["shape"]) cnn.set_default_AlexNet_Model() assert cfg["pre_trained"] is not None dir_model = glob( os.path.join(st.DIR_LOG, cfg["log"], "*" + cfg["pre_trained"] + "*")) assert len(dir_model) == 1, "the len of chekpoint list is {}".format( len(dir_model)) model_file = os.path.join(dir_model[0], "cp-model.ckpt") # model_file = os.path.join(dir_model[0], "'model-086-1.000000.ckpt") cnn.model.load_weights(model_file) dt = red_csv_file() filename = os.path.join( st.DATA_DIR, "eval_test_data_{}_{}.csv".format(cfg["pre_trained"], cfg["extra"])) print(filename) with open(filename, '+w') as csvfile: writer = csv.writer(csvfile) writer.writerow(("ID", "Label")) for idx in range(len(dt)): image_file = os.path.join(st.DATA_TEST_DIR, dt[idx]) assert os.path.isfile(image_file) label_est = cnn.model_predict(image_file, cfg["dip"]) with open(filename, 'a') as csvfile: writer = csv.writer(csvfile) writer.writerow((dt[idx], label_est)) print("{0:} - progress: {1:.1f}".format(idx, 100 * idx / len(dt)))
def parte_a(train, test, numeric_attributes=IRIS_NUMERIC_ATTRIBUTES): classifier = Classifier(ID3(train, numeric_attributes)) classifier.train() actual = [] predicted = [] for _, elem in test.iterrows(): actual.append(elem.clazz) predicted.append( classifier.classify(elem.drop(columns=['clazz'], axis=1))) output_results(title='PARTE A', actual=actual, predicted=predicted)
def run_train(): parser = ArgumentParser() parser.add_argument('--settings', type=str, required=True, help='Path to the training settings ini file') settings = configparser.ConfigParser() settings.read(parser.parse_args().settings) # create model predictor = ResNet50Layers(None) model = Classifier(predictor) # use selected gpu by id gpu = settings.getint('hardware', 'gpu') if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() model.to_gpu() label_handler, train_dataset, val_dataset = _create_datasets(settings['input_data']) train_iter = chainer.iterators.SerialIterator(train_dataset, settings.getint('trainer', 'batchsize')) val_iter = chainer.iterators.SerialIterator(val_dataset, settings.getint('trainer', 'batchsize'), repeat=False) output_dir = '{}/training_{}_{}'.format(settings.get('output_data', 'path'), settings.get('trainer', 'epochs'), settings.get('optimizer', 'optimizer')) # optimizer optimizer = _create_optimizer(settings['optimizer']) optimizer.setup(model) # trainer updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=gpu) trainer = chainer.training.Trainer(updater, (settings.getint('trainer', 'epochs'), 'epoch'), output_dir) trainer.extend(extensions.LogReport()) trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1)) evaluator = Evaluator(val_iter, model, device=gpu) trainer.extend(evaluator) trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.run() # save model output_file_path = '{0}/resnet.model'.format(output_dir) chainer.serializers.save_npz(output_file_path, predictor) meta_output = { 'trainer': settings._sections['trainer'], 'optimizer': settings._sections['optimizer'], 'train_data': train_dataset.get_meta_info(label_handler), 'validation_data': val_dataset.get_meta_info(label_handler), } with open('{0}/meta.json'.format(output_dir), 'w') as f: json.dump(meta_output, f, indent=4)
def main(): directory = '' match = False inject = False db_name = 'certs.db' output = False try: opts, args = getopt.getopt(sys.argv[1:], 'hd:midb:-o', ['help', 'directory=', 'match', 'inject', 'database=', '--output']) except: usage() for opt, arg in opts: if opt in ('-h', '--help'): print('printing hep') usage() elif opt in ('-d', '--directory'): directory = arg if(not directory.endswith('/')): directory += '/' elif opt in ('-m', '--match'): match = True elif opt in ('-i', '--inject'): inject = True elif opt in ('-b', '--database'): db = arg elif opt in ('-o', '--output'): output = True outputfile = arg if(outputfile == ''): outputfile = 'matches.txt' if directory: print("Grabbing data...") grabber = Grabber(directory) classifier = Classifier(grabber) print("Classifying...") classifier.classify() certs, keys = classifier.get_data() db = Database(db_name) if inject: try: print("Creating the database") db.create_db() except: print("Database already exists.") print("Injecting data into the database...") db.insert_keys(keys) db.insert_certs(certs) if match: print("Matching data...") db.match_cert_key() if output: db.export_matches(outputfile)
def parte_b(train, test, numeric_attributes=IRIS_NUMERIC_ATTRIBUTES): classifiers = [] classes = train.clazz.unique() idx = 1 for clazz in classes: classifier = Classifier( ID3(train, numeric_attributes, specific_class=clazz)) classifier.train() classifiers.append(classifier) idx += 1 actual = [] predicted = [] for _, elem in test.iterrows(): actual.append(elem.clazz) predicted.append( vote_classify(classifiers, elem.drop(columns=['clazz'], axis=1))) output_results(title='PARTE B', actual=actual, predicted=predicted)
def __init__(self): # Classifier self.c = Classifier() self.c.load_classifier(TRAINING_SET) self.code = '' # Correios tracker self.t = Tracker() # Answers with open(GREETINGS_SET) as f: self.greetings_responses = f.readlines() with open(COMPLAIN_SET) as f: self.complain_responses = f.readlines() with open(COMPLAIN_SET) as f: self.complain_responses = f.readlines() with open(HELP_SET) as f: self.help_responses = f.readlines() with open(UNKNOWN_SET) as f: self.unknown_responses = f.readlines() with open(WRONG_CODE_SET) as f: self.wrong_code_responses = f.readlines() with open(TRACKING_SET) as f: self.tracking_responses = f.readlines() with open(QUIT_SET) as f: self.quit_responses = f.readlines() with open(GOODBYE_SET) as f: self.goodbye_responses = f.readlines() with open(RESULTS_SET) as f: results_responses = f.readlines() self.offline_response = results_responses[0] self.fail_response = results_responses[1] self.sent_response = results_responses[2] self.forwarded_response = results_responses[3] self.delivering_response = results_responses[4] self.arrived_response = results_responses[5] # If it is expecting something self.waiting_for = '' self.attempts = 0
def run(save_loc="cnn/ONN", loss=Loss.MSE, OD=10, gradient=Gradient.APPROXIMATE): print("\n----- Running {} -----".format(os.path.basename(__file__))) #################################################### # Configure datasets. #################################################### dataset = Dataset.MNIST if dataset != Dataset.MNIST: save_loc += "_{}".format(str(dataset).split(".")[-1]) batch_size_train = 64 batch_size_test = 1000 #################################################### # Configure Networks. #################################################### sat_abs_nl_args = { 'I_sat': 1, 'OD': OD, 'encoding': Encoding.AMPLITUDE, 'gradient': gradient } SANL = lambda: SatAbsNL(**sat_abs_nl_args) if loss == Loss.MSE: output = None loss_str = "mse" elif loss == Loss.CCE: output = lambda: nn.LogSoftmax(-1) loss_str = "nll" else: raise ValueError("Unrecognised loss :", loss) net_args = { 'n_ch_conv': [32, 64], 'kernel_size_conv': [5, 5], 'n_in_fc': 1024, 'n_hid_fc': [128], 'activation_conv': [SANL, SANL], 'activation_fc': SANL, 'dropout': lambda: nn.Dropout(0.4), 'conv_args': { 'stride': 1, 'padding': 0, 'bias': False }, 'pool_conv': lambda: nn.AvgPool2d(kernel_size=2, stride=2), 'n_out': 10 if dataset != Dataset.EMNIST else 47, 'bias_fc': False, 'output': output } #################################################### # Train classifiers #################################################### n_seeds = 5 losses = {} corrects = {} valid_scores = {} for i in range(n_seeds): lab = 'seed{}'.format(i) network = ConvNet(**net_args) train_loader, test_loader, validation_loader = get_dataset_loaders( dataset=dataset, train_batch=batch_size_train, test_batch=batch_size_test, unroll_img=False, max_value=15 if OD > 10 else 5, get_validation=True) classifier = Classifier( network, train_loader, test_loader, n_epochs=30 if dataset == Dataset.MNIST else 40, learning_rate=5e-4, init_weight_mean=0., init_weight_std=0.01, init_conv_weight_std=0.1, loss=loss_str, weight_range=None, weight_normalisation=weight_norm.NONE, log_interval=25, n_test_per_epoch=0, save_path=os.path.join(save_loc, lab)) train_losses, test_correct = classifier.train() losses[lab] = train_losses corrects[lab] = test_correct #################################################### # Validation #################################################### classifier.load(classifier.network_save_path) valid_loss, valid_correct = classifier.validate(validation_loader) print("Validation accuracy : {:.2f}%".format( 100. * valid_correct / len(validation_loader.dataset))) valid_scores[lab] = 100. * valid_correct / len( validation_loader.dataset) validation_save_path = os.path.join(classifier.save_path, "validation_score.pkl") with open(validation_save_path, 'wb+') as output: pickle.dump(np.array([valid_loss, valid_correct]), output, pickle.HIGHEST_PROTOCOL) print('Validation scores saved to {}'.format(validation_save_path)) print("Validation scores are:") for lab, score in valid_scores.items(): print("\t{} : {:.2f}%".format(lab, score)) #################################################### # Plot results #################################################### fig_fname = os.path.join(save_loc, "training_performance") with plt.style.context('seaborn-paper', after_reset=True): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7, 2.5), gridspec_kw={'wspace': 0.3}) window = 25 avg_mask = np.ones(window) / window for lab, data in losses.items(): ax1.plot(np.convolve(data[:, 0], avg_mask, 'valid'), np.convolve(data[:, 1], avg_mask, 'valid'), label=lab, linewidth=0.75, alpha=0.8) ax1.legend() ax1.set_xlabel("Epoch") ax1.set_ylabel("Losses") for lab, data in corrects.items(): ax2.plot(data[:, 0], data[:, 1] / len(test_loader.dataset), label=lab) print("{}: Best score {}/{}".format(lab, np.max(data), len(test_loader))) ax2.legend() ax2.set_xlabel("Epoch") ax2.set_ylabel("Accuracy") plt.savefig(fig_fname + ".png", bbox_inches='tight') plt.savefig(fig_fname + ".pdf", bbox_inches='tight')
def __init__(self, file_name: str): self.classifier = Classifier(file_name) self.plot_pair()
def __init__(self, file_name: str, features: list): self.classifier = Classifier(file_name, filter=False) self.plotter = Plotter(file_name, features, self.house_array_function) self.plotter.plot_scatter()
dataset = [x for x in dataset if len(x[0].split()) > 0] dataset = list(set(dataset)) classifiers = [(SVC(kernel='rbf', C=2.9, gamma=1), 'svm_rbf'), (SVC(kernel='linear'), 'svm_linear')] # (KNeighborsClassifier(), 'knn'), # (MultinomialNB(), 'naive_bayes'), # (Perceptron(), 'perceptron')] vectorizers = [(TfidfVectorizer(min_df=0.0, max_df=1.0, sublinear_tf=True, use_idf=True), 'tfidf')] # (CountVectorizer(min_df=0.0, max_df=1.0), 'count'), # (HashingVectorizer(), 'hash')] c = Classifier(classifier=classifiers[1][0], vectorizer=vectorizers[0][0]) x = list(map(lambda a: a[0], dataset)) y = list(map(lambda a: a[1], dataset)) c.train(x_train=x, y_train=y) from sklearn.pipeline import make_pipeline import eli5 pipe = make_pipeline(vectorizers[0][0], classifiers[1][0]) pipe.fit(x, y) file = open('C:/Users/Gustavo/Desktop/batata.html', 'wb') file.write(eli5.show_weights(classifiers[1][0], vec=vectorizers[0][0], top=1000).data.encode('utf8')) file.close()
# cv2.waitKey(0) return image video_capture = cv2.VideoCapture(0) font = cv2.FONT_HERSHEY_SIMPLEX feelings_faces = [] for index, emotion in enumerate(EMOTIONS): feelings_faces.append(cv2.imread('../emojis/' + emotion + '.png', -1)) while True: # Capture frame-by-frame ret, frame = video_capture.read() # Predict result with network result = Classifier(format_image(frame)).data[0] # Draw face in frame # for (x,y,w,h) in faces: # cv2.rectangle(frame, (x,y), (x+w,y+h), (255,0,0), 2) # Write results in frame if result is not None: for index, emotion in enumerate(EMOTIONS): cv2.putText(frame, emotion, (10, index * 20 + 20), cv2.FONT_HERSHEY_PLAIN, 0.5, (0, 255, 0), 1) cv2.rectangle(frame, (130, index * 20 + 10), (130 + int(result[index] * 100), (index + 1) * 20 + 4), (255, 0, 0), -1) face_image = feelings_faces[np.argmin(result)]
def run(save_loc="mlp/ANN", n_hid=2, loss=Loss.CCE, activation=nn.ReLU): print("\n----- Running {} -----".format(os.path.basename(__file__))) #################################################### # Configure datasets. #################################################### dataset = Dataset.MNIST if dataset != Dataset.MNIST: save_loc += "_{}".format(str(dataset).split(".")[-1]) batch_size_train = 64 batch_size_test = 1000 input_scaling = 1 #################################################### # Configure Networks. #################################################### if loss == Loss.MSE: output = None loss_str = "mse" elif loss == Loss.CCE: output = lambda: nn.LogSoftmax(-1) loss_str = "nll" else: raise ValueError("Unrecognised loss :", loss) default_net_args = { 'n_hid': [128] * n_hid, 'n_in': 784, 'n_out': 10 if dataset != Dataset.EMNIST else 47, 'activation': activation, 'output': output, } #################################################### # Train classifiers #################################################### n_seeds = 5 losses = {} corrects = {} valid_scores = {} for i in range(n_seeds): lab = 'seed{}'.format(i) network = LinNet(**default_net_args) train_loader, test_loader, validation_loader = get_dataset_loaders( dataset=dataset, train_batch=batch_size_train, test_batch=batch_size_test, unroll_img=True, max_value=input_scaling, get_validation=True) classifier = Classifier(network, train_loader, test_loader, n_epochs=50, learning_rate=5e-4, init_weight_mean=0, init_weight_std=0.1, loss=loss_str, weight_range=None, weight_normalisation=weight_norm.NONE, log_interval=25, n_test_per_epoch=0, save_path=os.path.join(save_loc, lab)) train_losses, test_correct = classifier.train() losses[lab] = train_losses corrects[lab] = test_correct #################################################### # Validation #################################################### classifier.load(classifier.network_save_path) valid_loss, valid_correct = classifier.validate(validation_loader) print("Validation accuracy : {:.2f}%".format( 100. * valid_correct / len(validation_loader.dataset))) valid_scores[lab] = 100. * valid_correct / len( validation_loader.dataset) validation_save_path = os.path.join(classifier.save_path, "validation_score.pkl") with open(validation_save_path, 'wb+') as output: pickle.dump(np.array([valid_loss, valid_correct]), output, pickle.HIGHEST_PROTOCOL) print('Validation scores saved to {}'.format(validation_save_path)) print("Validation scores are:") for lab, score in valid_scores.items(): print("\t{} : {:.2f}%".format(lab, score)) #################################################### # Plot results #################################################### fig_fname = os.path.join(save_loc, "training_performance") with plt.style.context('seaborn-paper', after_reset=True): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7, 2.5), gridspec_kw={'wspace': 0.3}) window = 25 avg_mask = np.ones(window) / window for lab, data in losses.items(): ax1.plot(np.convolve(data[:, 0], avg_mask, 'valid'), np.convolve(data[:, 1], avg_mask, 'valid'), label=lab, linewidth=0.75, alpha=0.8) ax1.legend() ax1.set_xlabel("Epoch") ax1.set_ylabel("Losses") for lab, data in corrects.items(): ax2.plot(data[:, 0], data[:, 1] / len(test_loader.dataset), label=lab) print("{}: Best score {}/{}".format(lab, np.max(data), len(test_loader))) ax2.legend() ax2.set_xlabel("Epoch") ax2.set_ylabel("Accuracy") plt.savefig(fig_fname + ".png", bbox_inches='tight') plt.savefig(fig_fname + ".pdf", bbox_inches='tight')
def run_train(): parser = ArgumentParser() parser.add_argument( '--paths', type=str, nargs='+', required=True, help='Root paths of folders that contain images and pascal voc files') parser.add_argument('--label_names', type=str, required=True, help='Path to label names file') parser.add_argument('--training_splitsize', type=float, default=0.9, help='Splitsize of training data') parser.add_argument('--batchsize', type=int, default=20, help='Learning minibatch size') parser.add_argument('--epoch', type=int, default=10, help='Numbers of epochs to train') parser.add_argument('--gpu', type=int, default=-1, help='GPU ID, negative value indicates CPU') parser.add_argument('--out', default='trainer_output', help='Output directory of trainer') parser.add_argument('--val_batchsize', type=int, default=250, help='Validation minibatch size') args = parser.parse_args() # create model predictor = ResNet50Layers(None) model = Classifier(predictor) # TODO: initmodel # use selected gpu by id if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # build datasets from paths label_handler = LabelHandler(args.label_names) # builder = LabeledImageDatasetBuilder(args.paths, label_handler) # train_dataset, val_dataset = builder.get_labeled_image_dataset_split(args.training_splitsize) builder = SortedImageDatasetBuilder(args.paths, label_handler) train_dataset, val_dataset = builder.get_sorted_image_dataset_split( args.training_splitsize) train_iter = chainer.iterators.SerialIterator(train_dataset, args.batchsize) val_iter = chainer.iterators.SerialIterator(val_dataset, args.val_batchsize, repeat=False) # optimizer learning_rate = 0.01 momentum = 0.9 optimizer = chainer.optimizers.MomentumSGD(learning_rate, momentum) optimizer.setup(model) # trainer updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), args.out) trainer.extend(extensions.LogReport()) trainer.extend(chainer.training.extensions.ProgressBar(update_interval=10)) trainer.run() # save model output_file_path = '{0}/resnet_{1}_{2}.model'.format( args.out, args.batchsize, args.epoch) chainer.serializers.save_npz(output_file_path, predictor)
"""Generate keypoints and descriptions for template images and dump them.""" import cv2 as cv import os from os.path import join from tqdm import tqdm from src.classifier import Classifier from src.preprocessing import prepare_image from util.utils import CLASS_NAMES, FEATURE_DETECTORS, FEATURE_DESCRIPTORS, corner_case from util.constants import DATA_PATH if __name__ == '__main__': template_images = [(name, prepare_image(cv.imread(join(DATA_PATH, f"{name}.jpg")))) for name in CLASS_NAMES] for detector in FEATURE_DETECTORS: for descriptor in FEATURE_DESCRIPTORS: if corner_case(detector, descriptor): continue clf = Classifier(detector, descriptor) for cls_name, image in tqdm( template_images, desc=f"Dumping features for {detector}_{descriptor}"): clf.dump_features(cls_name, image)
from src.stockProcessor import StockProcessor from src.models import Models from src.newsProcessor import NewsProcessor from src.classifier import Classifier if __name__ == '__main__': print('load Stock chart data') base_file_path = sys.argv[1] stockCharts = StockProcessor(base_file_path) print('Input stock price interval in minutes - 5, 15, 30, 60, 240, 1440') time_interval = input() amazon_stock_prices, apple_stock_prices = stockCharts.loadDataForInterval(time_interval) For training only document_vectors_amazon, document_vectors_apple = NewsProcessor(base_file_path).loadNewsArticles() # AMAZON classify = Classifier(base_file_path, 'amazon', time_interval, amazon_stock_prices, document_vectors_amazon) classify.label_documents() # APPLE classify = Classifier(base_file_path, 'apple', time_interval, apple_stock_prices, document_vectors_apple) classify.label_documents() amazon_model = Models(base_file_path, 'amazon', amazon_stock_prices, time_interval) amazon_model.naive_bayes_classifier() amazon_model.SVM_classifier() amazon_model.DT_classifier() amazon_model.SVM_poly_classifier() amazon_model.Logistic_Regression11_classifier() amazon_model.Logistic_Regression12_classifier() amazon_model.KNN_classifier() amazon_model.SGDC_classifier() amazon_model.accounting_factor()
import os import cv2 as cv from os.path import join from src.classifier import Classifier from src.inference import draw_bboxes_with_classes from src.preprocessing import prepare_image, extract_detection, IMG_WIDTH_SIZE from util.constants import TEST_IMG_PATH, ORB, YOLO_CONFIDENCE, NMS_THRESHOLD from src.object_detector import Yolo TEMPLATE_IMGS = ["1uah.jpg", "2uah.jpg", "5uah.jpg", "10uah.jpg"] if __name__ == '__main__': for test_image in os.listdir(join(TEST_IMG_PATH, "5uah_heads")): yolo = Yolo(confidence=YOLO_CONFIDENCE, nms_threshold=NMS_THRESHOLD) yolo.load_model() image = cv.imread(join(TEST_IMG_PATH, "5uah_heads", test_image)) prepared_image = prepare_image(image.copy()) img_for_viz = image.copy() # prepared_image = image yolo.load_data(prepared_image) layer_outputs = yolo.detect() bboxes = yolo.process_outputs(layer_outputs) clf = Classifier(ORB) draw_bboxes_with_classes(img_for_viz, bboxes, clf) cv.imshow("Detections", img_for_viz) cv.waitKey(0) cv.destroyAllWindows()
import os import sys from src.classifier import Classifier import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) def print_usage_and_exit(): print("Usage: " + sys.argv[0] + " <fileClass1> <fileClass2> <corpusDir> <modelFileName>") sys.exit(0) if __name__ == '__main__': if len(sys.argv) != 5: print_usage_and_exit() classifier = Classifier(class1_file=sys.argv[1], class2_file=sys.argv[2]) classifier.train(sys.argv[3], sys.argv[4])
from os.path import join import cv2 as cv from src.classifier import Classifier from src.detector_descriptor import DetectorDescriptor from util.constants import DATA_PATH, TEST_IMG_PATH, ORB if __name__ == '__main__': template_img = cv.imread(join(DATA_PATH, "1uah_heads.jpg")) test_img = cv.imread(join(TEST_IMG_PATH, "1uah_heads", "top.jpeg")) detector_descriptor = DetectorDescriptor(ORB, ORB) template_keypoints, template_descriptions = detector_descriptor.detect_describe( template_img) test_keypoints, test_descriptions = detector_descriptor.detect_describe( test_img) bf_matcher = cv.BFMatcher_create(cv.NORM_HAMMING, True) matches = bf_matcher.match(template_descriptions, test_descriptions) clf = Classifier() print(len(matches)) best_matches = clf.ransac_outlier_rejection(template_keypoints, test_keypoints, matches) print(len(best_matches))
return image if __name__ == '__main__': parser = _parse_args() args = parser.parse_args() if args.image is None and args.video is None: raise ValueError( "Neither image nor video arguments are provided! Please," " specify either the path to an image or video or both.") # TODO: supply nms threshold and confidence for YOLO as script arguments object_detector = Yolo(args.object_detector) classifier = Classifier(args.feature_detector, args.feature_descriptor, args.feature_matcher, args.outlier_rejection_method) if args.image: if not isfile(args.image): raise ValueError( "The provided path to an image does not exist! " "Please, provide a path relative to the project:) " "The directory with test images is test_images/") image = cv.imread(args.image) # Image resizing is not necessary, # OpenCV's YOLO interface takes care of that. prepared_image = prepare_image(image.copy(), resize=False) img_for_viz = image.copy() object_detector.load_data(prepared_image)
# Date: 11/04/2018 # Description: Main file import os from src.classifier import Classifier from werkzeug.utils import secure_filename from flask import Flask, render_template, request, session, jsonify, redirect, url_for, flash app = Flask(__name__) app.config['UPLOAD_FOLDER'] = 'uploads' app.config['SECRET_KEY'] = os.urandom(0x200) ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg']) PATH_TO_MODEL = 'ai/trained_models/cat_dog_3.h5' classifier = Classifier(PATH_TO_MODEL) @app.route('/') def index(): return render_template('index.html') def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @app.route('/upload', methods=['POST']) def upload(): if 'file' not in request.files: flash('Upload an image')
set_reproducible() n_runs = 5 if len(sys.argv) > 1: n_runs = int(sys.argv[1]) datadir = "../data/" trainfile = datadir + "traindata.csv" devfile = datadir + "devdata.csv" testfile = None # testfile = datadir + "testdata.csv" # Runs start_time = time.perf_counter() devaccs = [] testaccs = [] for i in range(1, n_runs+1): classifier = Classifier() devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i) devaccs.append(np.round(devacc,2)) testaccs.append(np.round(testacc,2)) print('\nCompleted %d runs.' % n_runs) total_exec_time = (time.perf_counter() - start_time) print("Dev accs:", devaccs) print("Test accs:", testaccs) print() print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs))) print("Mean Test Acc.: %.2f (%.2f)" % (np.mean(testaccs), np.std(testaccs))) print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))
def clasify(self): print("step 6. clasifier") from src.classifier import Classifier classifier = Classifier() classifier.parse_arguments(None) classifier.main()
client = MongoClient('localhost', 27017) db = client['tcc'] collection = db['tweets'] events = collection.find({"classified": True}) dataset = pickle.load(open(conf.project_path + 'data\dataset_preprocessed.pickle', 'rb')) dataset = [x for x in dataset if len(x[0].split()) > 0] # positives = [x for x in dataset if x[1] == 'positive'] # negatives = [x for x in dataset if x[1] == 'negative'] # # dataset = negatives + positives[0:len(negatives)] vectorizer = TfidfVectorizer(min_df=0.0, max_df=1.0, sublinear_tf=True, use_idf=True) classifier = SVC(kernel='rbf', C=2.9, gamma=1) p = Preprocessor() clf = Classifier(vectorizer=vectorizer, classifier=classifier) x_train = [x[0] for x in dataset] y_train = [x[1] for x in dataset] clf.train(x_train=x_train, y_train=y_train) print(clf.predict(p.preprocess(''''''))) # for ev in events: # print(ev['text'].replace('\n', '') + ' --> ' + clf.predict(p.preprocess(ev['text'])))