コード例 #1
0
def run():
    start_time = time()
    data_cand, data_part, full_data = load_data()
    # numeric_parties  = full_data.party.map(party_map)
    train_c, test_c = train_test_split(data_cand, test_size=0.2)
    train_p, test_p = train_test_split(data_part, test_size=0.2)
    candidatos_clf = Classifier(train_c.drop('candidatoId', axis=1), train_c.candidatoId)
    partidos_clf = Classifier(train_p.drop('idPartido', axis=1), train_p.idPartido)

    cand_solver = candidatos_clf._predict()
    n_cand, pca_cand_solver = candidatos_clf._pca()
    part_solver = partidos_clf._predict()
    n_part, pca_part_solver = partidos_clf._pca()

    cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, cand_solver)
    pca_cand_pred = candidatos_clf.classify(test_c.drop('candidatoId', axis=1), test_c.candidatoId, pca_cand_solver, n_cand)
    part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, part_solver)
    pca_part_pred = partidos_clf.classify(test_p.drop('idPartido', axis=1), test_p.idPartido, pca_part_solver, n_part)

    output_results(f'CANDIDATOS | {cand_solver}', test_c.candidatoId, cand_pred)
    output_results(f'CANDIDATOS_PCA | {pca_cand_solver}, {n_cand}', test_c.candidatoId, pca_cand_pred)
    output_results(f'PARTIDOS | {part_solver}', test_p.idPartido, part_pred)
    output_results(f'PARTIDOS_PCA | {pca_part_solver}, {n_part}', test_p.idPartido, pca_part_pred)
    cand_part_target, cand_part_pred = candidato_mapper(test_c.candidatoId, cand_pred)
    output_results(f'PARTIDOS CON CANDIDATO | {cand_solver}', cand_part_target, cand_part_pred)

    cm_cand = ConfusionMatrix(test_c.candidatoId, cand_pred)
    cm_pca_cand = ConfusionMatrix(test_c.candidatoId, pca_cand_pred)
    cm_part = ConfusionMatrix(test_p.idPartido, part_pred)
    cm_pca_part = ConfusionMatrix(test_p.idPartido, pca_part_pred)
    cm_cand_part = ConfusionMatrix(cand_part_target, cand_part_pred)

    elapsed_time = time() - start_time
    print(f'----------------------------------------')
    print(f'TOTAL TIME: {datetime.timedelta(seconds=elapsed_time)}')

    result = {
        'data': {
            'candidatos': (train_c, test_c),
            'partidos': (train_p, test_p),
        },
        'results': {
            'candidatos': (test_c.candidatoId, cand_pred),
            'candidatos_pca': (test_c.candidatoId, pca_cand_pred),
            'partidos': (test_p.idPartido, part_pred),
            'partidos_pca': (test_p.idPartido, pca_part_pred),
            'partidos_candidatos': (cand_part_target, cand_part_pred)
        },
        'matrices': {
            'candidatos': cm_cand,
            'candidatos_pca': cm_pca_cand,
            'partidos': cm_part,
            'partidos_pca': cm_pca_part,
            'partidos_candidatos': cm_cand_part
        }
    }
    return result
コード例 #2
0
def train(cfg):
    cnn = Classifier(input_shape=cfg["shape"], batch_size=cfg["batch"])
    if cfg["model"] == "AlexNet":
        cnn.set_default_AlexNet_Model()
    else:
        arch = cfg["arch"]
        cnn.set_custom_model(conv_layers=arch["conv"],
                             dense_layers=arch["dense"])
    if cfg["random"]:
        cnn.random_boost = True

    if cfg["pre_trained"] is not None:
        dir_model = glob(
            os.path.join(st.DIR_LOG, "RUNNING",
                         "*" + cfg["pre_trained"] + "*"))
        assert len(dir_model) == 1
        # model_file = os.path.join(dir_model[0], "'model-086-1.000000.ckpt")
        # cnn.model.load_weights(model_file)
        cnn.model.load_weights(os.path.join(dir_model[0], "cp-model.ckpt"))

    val_data = cnn.get_data_generator(st.DATA_VALIDATION_DIR,
                                      dip_filter=cfg["dip"])
    train_data = cnn.get_data_generator(st.DATA_TRAIN_DIR,
                                        dip_filter=cfg["dip"])
    cnn.set_log_name(cfg)
    cnn.model_select = cfg["msk"]
    cnn.lr["lr"] = cfg["lr"]
    cnn.lr["decay_steps"] = cfg["dc_st"]
    cnn.lr["decay_rate"] = cfg["dc"]
    cnn.lr["momentum"] = cfg["mt"]

    cnn.callbacks.append(
        TensorBoard(log_dir=os.path.join(st.DIR_LOG, "RUNNING", cnn.log_name)))
    cnn.train(gen_train=train_data, gen_val=val_data, epochs=cfg["epochs"])
コード例 #3
0
    def run_models(self):
        """run all the models defined in confi file
            Save model in pkl format,
            Save scores in csv files.
        
        Params:
            CONFIG (dict): configurations of task,
            SEARCH_SPACE (dict): hyperparameters of models,
            df (pandas dataframe): data frame read from raw data,
            model: Classifier instance,
            all_metrics (list): list to save all model training results.
        """

        CONFIG = self.load_config()
        DATA_PATH = self.DATA_PATH
        SEARCH_SPACE = self.SEARCH_SPACE
        df = self.load_data()
        model = Classifier(CONFIG, SEARCH_SPACE, df)
        all_metrics = []

        for model_name in self.CONFIG['RUN_MODELS']:
            model_name, grid_cv, best_model, X_test, y_test = model.train_model(
                model_name)
            metrics = model.gen_metrics(model_name, grid_cv, best_model,
                                        X_test, y_test)
            all_metrics.append(metrics)

            with open(
                    f'{self.CONFIG["MODEL_DIR"]}/{model_name}_best_model.pkl',
                    'wb') as f:
                pickle.dump(best_model, f)

        df_all_metrics = pd.concat(all_metrics)
        df_all_metrics.to_csv(
            f'{self.CONFIG["REPORT_DIR"]}/models_metrics.csv', index=False)
コード例 #4
0
def eval_model(cfg):
    cnn = Classifier(input_shape=cfg["shape"])
    cnn.set_default_AlexNet_Model()

    # if cfg["pre_trained"] is not None:
    #     dir_model = glob(os.path.join(st.DIR_LOG, cfg["log"], "*" + cfg["pre_trained"] + "*"))
    #     assert len(dir_model) == 1, "the len of chekpoint list is {}".format(len(dir_model))
    #     cnn.model.load_weights(os.path.join(dir_model[0], "cp-model.ckpt"))

    assert cfg["pre_trained"] is not None
    dir_model = glob(
        os.path.join(st.DIR_LOG, cfg["log"], "*" + cfg["pre_trained"] + "*"))
    assert len(dir_model) == 1, "the len of chekpoint list is {}".format(
        len(dir_model))
    model_file = os.path.join(dir_model[0], "cp-model.ckpt")
    # model_file = os.path.join(dir_model[0], "'model-086-1.000000.ckpt")
    cnn.model.load_weights(model_file)

    dt = Data()
    sum_pred = np.zeros(6, )
    lenght_data = np.zeros(6, )
    for idx in range(dt.length['train']):
        image_file, label_gt = dt.get_image_file('train',
                                                 idx,
                                                 encode_label=True)
        label_est = cnn.model_predict(image_file, cfg["dip"])
        lenght_data[label_gt] += 1
        if label_est == label_gt:
            sum_pred[label_gt] += 1

        acc = sum_pred / lenght_data
        print("{} - Acc:  {}".format((label_gt, label_est), acc))
コード例 #5
0
def eval_model(cfg):
    cnn = Classifier(input_shape=cfg["shape"])
    cnn.set_default_AlexNet_Model()

    assert cfg["pre_trained"] is not None
    dir_model = glob(
        os.path.join(st.DIR_LOG, cfg["log"], "*" + cfg["pre_trained"] + "*"))
    assert len(dir_model) == 1, "the len of chekpoint list is {}".format(
        len(dir_model))
    model_file = os.path.join(dir_model[0], "cp-model.ckpt")
    # model_file = os.path.join(dir_model[0], "'model-086-1.000000.ckpt")
    cnn.model.load_weights(model_file)

    dt = red_csv_file()
    filename = os.path.join(
        st.DATA_DIR, "eval_test_data_{}_{}.csv".format(cfg["pre_trained"],
                                                       cfg["extra"]))
    print(filename)
    with open(filename, '+w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(("ID", "Label"))
    for idx in range(len(dt)):
        image_file = os.path.join(st.DATA_TEST_DIR, dt[idx])
        assert os.path.isfile(image_file)
        label_est = cnn.model_predict(image_file, cfg["dip"])

        with open(filename, 'a') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow((dt[idx], label_est))

        print("{0:} - progress:  {1:.1f}".format(idx, 100 * idx / len(dt)))
コード例 #6
0
ファイル: run.py プロジェクト: guillermoap/aa-pract3
def parte_a(train, test, numeric_attributes=IRIS_NUMERIC_ATTRIBUTES):
    classifier = Classifier(ID3(train, numeric_attributes))
    classifier.train()
    actual = []
    predicted = []
    for _, elem in test.iterrows():
        actual.append(elem.clazz)
        predicted.append(
            classifier.classify(elem.drop(columns=['clazz'], axis=1)))
    output_results(title='PARTE A', actual=actual, predicted=predicted)
コード例 #7
0
def run_train():
    parser = ArgumentParser()
    parser.add_argument('--settings', type=str, required=True,
                        help='Path to the training settings ini file')

    settings = configparser.ConfigParser()
    settings.read(parser.parse_args().settings)

    # create model
    predictor = ResNet50Layers(None)
    model = Classifier(predictor)

    # use selected gpu by id
    gpu = settings.getint('hardware', 'gpu')
    if gpu >= 0:
        chainer.cuda.get_device_from_id(gpu).use()
        model.to_gpu()

    label_handler, train_dataset, val_dataset = _create_datasets(settings['input_data'])

    train_iter = chainer.iterators.SerialIterator(train_dataset, settings.getint('trainer', 'batchsize'))
    val_iter = chainer.iterators.SerialIterator(val_dataset, settings.getint('trainer', 'batchsize'), repeat=False)

    output_dir = '{}/training_{}_{}'.format(settings.get('output_data', 'path'), settings.get('trainer', 'epochs'), settings.get('optimizer', 'optimizer'))

    # optimizer
    optimizer = _create_optimizer(settings['optimizer'])
    optimizer.setup(model)

    # trainer
    updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=gpu)
    trainer = chainer.training.Trainer(updater, (settings.getint('trainer', 'epochs'), 'epoch'), output_dir)

    trainer.extend(extensions.LogReport())
    trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1))
    evaluator = Evaluator(val_iter, model, device=gpu)
    trainer.extend(evaluator)
    trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png'))
    trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png'))

    trainer.run()

    # save model
    output_file_path = '{0}/resnet.model'.format(output_dir)
    chainer.serializers.save_npz(output_file_path, predictor)

    meta_output = {
        'trainer': settings._sections['trainer'],
        'optimizer': settings._sections['optimizer'],
        'train_data': train_dataset.get_meta_info(label_handler),
        'validation_data': val_dataset.get_meta_info(label_handler),
    }

    with open('{0}/meta.json'.format(output_dir), 'w') as f:
        json.dump(meta_output, f, indent=4)
コード例 #8
0
ファイル: main.py プロジェクト: AlyAbdellatif/Keynder
def main():
    directory = ''
    match = False
    inject = False
    db_name = 'certs.db'
    output = False
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hd:midb:-o',
                                   ['help', 'directory=', 'match', 'inject',
                                    'database=', '--output'])
    except:
        usage()
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            print('printing hep')
            usage()
        elif opt in ('-d', '--directory'):
            directory = arg
            if(not directory.endswith('/')):
                directory += '/'
        elif opt in ('-m', '--match'):
            match = True
        elif opt in ('-i', '--inject'):
            inject = True
        elif opt in ('-b', '--database'):
            db = arg
        elif opt in ('-o', '--output'):
            output = True
            outputfile = arg
            if(outputfile == ''):
                outputfile = 'matches.txt'
    if directory:
        print("Grabbing data...")
        grabber = Grabber(directory)
        classifier = Classifier(grabber)
        print("Classifying...")
        classifier.classify()
        certs, keys = classifier.get_data()
    db = Database(db_name)
    if inject:
        try:
            print("Creating the database")
            db.create_db()
        except:
            print("Database already exists.")
        print("Injecting data into the database...")
        db.insert_keys(keys)
        db.insert_certs(certs)
    if match:
        print("Matching data...")
        db.match_cert_key()
    if output:
        db.export_matches(outputfile)
コード例 #9
0
ファイル: run.py プロジェクト: guillermoap/aa-pract3
def parte_b(train, test, numeric_attributes=IRIS_NUMERIC_ATTRIBUTES):
    classifiers = []
    classes = train.clazz.unique()
    idx = 1
    for clazz in classes:
        classifier = Classifier(
            ID3(train, numeric_attributes, specific_class=clazz))
        classifier.train()
        classifiers.append(classifier)
        idx += 1

    actual = []
    predicted = []
    for _, elem in test.iterrows():
        actual.append(elem.clazz)
        predicted.append(
            vote_classify(classifiers, elem.drop(columns=['clazz'], axis=1)))
    output_results(title='PARTE B', actual=actual, predicted=predicted)
コード例 #10
0
    def __init__(self):
        # Classifier
        self.c = Classifier()
        self.c.load_classifier(TRAINING_SET)
        self.code = ''

        # Correios tracker
        self.t = Tracker()

        # Answers
        with open(GREETINGS_SET) as f:
            self.greetings_responses = f.readlines()
        with open(COMPLAIN_SET) as f:
            self.complain_responses = f.readlines()
        with open(COMPLAIN_SET) as f:
            self.complain_responses = f.readlines()
        with open(HELP_SET) as f:
            self.help_responses = f.readlines()
        with open(UNKNOWN_SET) as f:
            self.unknown_responses = f.readlines()
        with open(WRONG_CODE_SET) as f:
            self.wrong_code_responses = f.readlines()
        with open(TRACKING_SET) as f:
            self.tracking_responses = f.readlines()
        with open(QUIT_SET) as f:
            self.quit_responses = f.readlines()
        with open(GOODBYE_SET) as f:
            self.goodbye_responses = f.readlines()
        with open(RESULTS_SET) as f:
            results_responses = f.readlines()
            self.offline_response = results_responses[0]
            self.fail_response = results_responses[1]
            self.sent_response = results_responses[2]
            self.forwarded_response = results_responses[3]
            self.delivering_response = results_responses[4]
            self.arrived_response = results_responses[5]

        # If it is expecting something
        self.waiting_for = ''
        self.attempts = 0
コード例 #11
0
def run(save_loc="cnn/ONN",
        loss=Loss.MSE,
        OD=10,
        gradient=Gradient.APPROXIMATE):

    print("\n----- Running {} -----".format(os.path.basename(__file__)))

    ####################################################
    # Configure datasets.
    ####################################################

    dataset = Dataset.MNIST

    if dataset != Dataset.MNIST:
        save_loc += "_{}".format(str(dataset).split(".")[-1])

    batch_size_train = 64
    batch_size_test = 1000

    ####################################################
    # Configure Networks.
    ####################################################

    sat_abs_nl_args = {
        'I_sat': 1,
        'OD': OD,
        'encoding': Encoding.AMPLITUDE,
        'gradient': gradient
    }

    SANL = lambda: SatAbsNL(**sat_abs_nl_args)

    if loss == Loss.MSE:
        output = None
        loss_str = "mse"
    elif loss == Loss.CCE:
        output = lambda: nn.LogSoftmax(-1)
        loss_str = "nll"
    else:
        raise ValueError("Unrecognised loss :", loss)

    net_args = {
        'n_ch_conv': [32, 64],
        'kernel_size_conv': [5, 5],
        'n_in_fc': 1024,
        'n_hid_fc': [128],
        'activation_conv': [SANL, SANL],
        'activation_fc': SANL,
        'dropout': lambda: nn.Dropout(0.4),
        'conv_args': {
            'stride': 1,
            'padding': 0,
            'bias': False
        },
        'pool_conv': lambda: nn.AvgPool2d(kernel_size=2, stride=2),
        'n_out': 10 if dataset != Dataset.EMNIST else 47,
        'bias_fc': False,
        'output': output
    }

    ####################################################
    # Train classifiers
    ####################################################

    n_seeds = 5

    losses = {}
    corrects = {}
    valid_scores = {}

    for i in range(n_seeds):
        lab = 'seed{}'.format(i)

        network = ConvNet(**net_args)

        train_loader, test_loader, validation_loader = get_dataset_loaders(
            dataset=dataset,
            train_batch=batch_size_train,
            test_batch=batch_size_test,
            unroll_img=False,
            max_value=15 if OD > 10 else 5,
            get_validation=True)

        classifier = Classifier(
            network,
            train_loader,
            test_loader,
            n_epochs=30 if dataset == Dataset.MNIST else 40,
            learning_rate=5e-4,
            init_weight_mean=0.,
            init_weight_std=0.01,
            init_conv_weight_std=0.1,
            loss=loss_str,
            weight_range=None,
            weight_normalisation=weight_norm.NONE,
            log_interval=25,
            n_test_per_epoch=0,
            save_path=os.path.join(save_loc, lab))

        train_losses, test_correct = classifier.train()

        losses[lab] = train_losses
        corrects[lab] = test_correct

        ####################################################
        # Validation
        ####################################################

        classifier.load(classifier.network_save_path)

        valid_loss, valid_correct = classifier.validate(validation_loader)

        print("Validation accuracy : {:.2f}%".format(
            100. * valid_correct / len(validation_loader.dataset)))
        valid_scores[lab] = 100. * valid_correct / len(
            validation_loader.dataset)

        validation_save_path = os.path.join(classifier.save_path,
                                            "validation_score.pkl")
        with open(validation_save_path, 'wb+') as output:
            pickle.dump(np.array([valid_loss, valid_correct]), output,
                        pickle.HIGHEST_PROTOCOL)
            print('Validation scores saved to {}'.format(validation_save_path))

    print("Validation scores are:")
    for lab, score in valid_scores.items():
        print("\t{} : {:.2f}%".format(lab, score))

    ####################################################
    # Plot results
    ####################################################

    fig_fname = os.path.join(save_loc, "training_performance")

    with plt.style.context('seaborn-paper', after_reset=True):

        fig, (ax1, ax2) = plt.subplots(1,
                                       2,
                                       figsize=(7, 2.5),
                                       gridspec_kw={'wspace': 0.3})

        window = 25
        avg_mask = np.ones(window) / window

        for lab, data in losses.items():
            ax1.plot(np.convolve(data[:, 0], avg_mask, 'valid'),
                     np.convolve(data[:, 1], avg_mask, 'valid'),
                     label=lab,
                     linewidth=0.75,
                     alpha=0.8)
        ax1.legend()
        ax1.set_xlabel("Epoch")
        ax1.set_ylabel("Losses")

        for lab, data in corrects.items():
            ax2.plot(data[:, 0],
                     data[:, 1] / len(test_loader.dataset),
                     label=lab)
            print("{}: Best score {}/{}".format(lab, np.max(data),
                                                len(test_loader)))
        ax2.legend()
        ax2.set_xlabel("Epoch")
        ax2.set_ylabel("Accuracy")

        plt.savefig(fig_fname + ".png", bbox_inches='tight')
        plt.savefig(fig_fname + ".pdf", bbox_inches='tight')
コード例 #12
0
ファイル: pair_plot.py プロジェクト: 42Projects/DSLR
 def __init__(self, file_name: str):
     self.classifier = Classifier(file_name)
     self.plot_pair()
コード例 #13
0
 def __init__(self, file_name: str, features: list):
     self.classifier = Classifier(file_name, filter=False)
     self.plotter = Plotter(file_name, features, self.house_array_function)
     self.plotter.plot_scatter()
コード例 #14
0
dataset = [x for x in dataset if len(x[0].split()) > 0]

dataset = list(set(dataset))

classifiers = [(SVC(kernel='rbf', C=2.9, gamma=1), 'svm_rbf'),
               (SVC(kernel='linear'), 'svm_linear')]
               # (KNeighborsClassifier(), 'knn'),
               # (MultinomialNB(), 'naive_bayes'),
               # (Perceptron(), 'perceptron')]

vectorizers = [(TfidfVectorizer(min_df=0.0, max_df=1.0, sublinear_tf=True, use_idf=True), 'tfidf')]
               # (CountVectorizer(min_df=0.0, max_df=1.0), 'count'),
               # (HashingVectorizer(), 'hash')]


c = Classifier(classifier=classifiers[1][0], vectorizer=vectorizers[0][0])
x = list(map(lambda a: a[0], dataset))
y = list(map(lambda a: a[1], dataset))

c.train(x_train=x, y_train=y)

from sklearn.pipeline import make_pipeline
import eli5

pipe = make_pipeline(vectorizers[0][0], classifiers[1][0])
pipe.fit(x, y)


file = open('C:/Users/Gustavo/Desktop/batata.html', 'wb')
file.write(eli5.show_weights(classifiers[1][0], vec=vectorizers[0][0], top=1000).data.encode('utf8'))
file.close()
コード例 #15
0
    # cv2.waitKey(0)
    return image


video_capture = cv2.VideoCapture(0)
font = cv2.FONT_HERSHEY_SIMPLEX

feelings_faces = []
for index, emotion in enumerate(EMOTIONS):
    feelings_faces.append(cv2.imread('../emojis/' + emotion + '.png', -1))

while True:
    # Capture frame-by-frame
    ret, frame = video_capture.read()
    # Predict result with network
    result = Classifier(format_image(frame)).data[0]

    # Draw face in frame
    # for (x,y,w,h) in faces:
    #   cv2.rectangle(frame, (x,y), (x+w,y+h), (255,0,0), 2)

    # Write results in frame
    if result is not None:
        for index, emotion in enumerate(EMOTIONS):
            cv2.putText(frame, emotion, (10, index * 20 + 20),
                        cv2.FONT_HERSHEY_PLAIN, 0.5, (0, 255, 0), 1)
            cv2.rectangle(frame, (130, index * 20 + 10),
                          (130 + int(result[index] * 100),
                           (index + 1) * 20 + 4), (255, 0, 0), -1)

        face_image = feelings_faces[np.argmin(result)]
コード例 #16
0
def run(save_loc="mlp/ANN", n_hid=2, loss=Loss.CCE, activation=nn.ReLU):

    print("\n----- Running {} -----".format(os.path.basename(__file__)))

    ####################################################
    # Configure datasets.
    ####################################################

    dataset = Dataset.MNIST

    if dataset != Dataset.MNIST:
        save_loc += "_{}".format(str(dataset).split(".")[-1])

    batch_size_train = 64
    batch_size_test = 1000

    input_scaling = 1

    ####################################################
    # Configure Networks.
    ####################################################

    if loss == Loss.MSE:
        output = None
        loss_str = "mse"
    elif loss == Loss.CCE:
        output = lambda: nn.LogSoftmax(-1)
        loss_str = "nll"
    else:
        raise ValueError("Unrecognised loss :", loss)

    default_net_args = {
        'n_hid': [128] * n_hid,
        'n_in': 784,
        'n_out': 10 if dataset != Dataset.EMNIST else 47,
        'activation': activation,
        'output': output,
    }

    ####################################################
    # Train classifiers
    ####################################################

    n_seeds = 5

    losses = {}
    corrects = {}
    valid_scores = {}

    for i in range(n_seeds):

        lab = 'seed{}'.format(i)

        network = LinNet(**default_net_args)

        train_loader, test_loader, validation_loader = get_dataset_loaders(
            dataset=dataset,
            train_batch=batch_size_train,
            test_batch=batch_size_test,
            unroll_img=True,
            max_value=input_scaling,
            get_validation=True)

        classifier = Classifier(network,
                                train_loader,
                                test_loader,
                                n_epochs=50,
                                learning_rate=5e-4,
                                init_weight_mean=0,
                                init_weight_std=0.1,
                                loss=loss_str,
                                weight_range=None,
                                weight_normalisation=weight_norm.NONE,
                                log_interval=25,
                                n_test_per_epoch=0,
                                save_path=os.path.join(save_loc, lab))

        train_losses, test_correct = classifier.train()

        losses[lab] = train_losses
        corrects[lab] = test_correct

        ####################################################
        # Validation
        ####################################################

        classifier.load(classifier.network_save_path)

        valid_loss, valid_correct = classifier.validate(validation_loader)

        print("Validation accuracy : {:.2f}%".format(
            100. * valid_correct / len(validation_loader.dataset)))
        valid_scores[lab] = 100. * valid_correct / len(
            validation_loader.dataset)

        validation_save_path = os.path.join(classifier.save_path,
                                            "validation_score.pkl")
        with open(validation_save_path, 'wb+') as output:
            pickle.dump(np.array([valid_loss, valid_correct]), output,
                        pickle.HIGHEST_PROTOCOL)
            print('Validation scores saved to {}'.format(validation_save_path))

    print("Validation scores are:")
    for lab, score in valid_scores.items():
        print("\t{} : {:.2f}%".format(lab, score))

    ####################################################
    # Plot results
    ####################################################

    fig_fname = os.path.join(save_loc, "training_performance")

    with plt.style.context('seaborn-paper', after_reset=True):

        fig, (ax1, ax2) = plt.subplots(1,
                                       2,
                                       figsize=(7, 2.5),
                                       gridspec_kw={'wspace': 0.3})

        window = 25
        avg_mask = np.ones(window) / window

        for lab, data in losses.items():
            ax1.plot(np.convolve(data[:, 0], avg_mask, 'valid'),
                     np.convolve(data[:, 1], avg_mask, 'valid'),
                     label=lab,
                     linewidth=0.75,
                     alpha=0.8)
        ax1.legend()
        ax1.set_xlabel("Epoch")
        ax1.set_ylabel("Losses")

        for lab, data in corrects.items():
            ax2.plot(data[:, 0],
                     data[:, 1] / len(test_loader.dataset),
                     label=lab)
            print("{}: Best score {}/{}".format(lab, np.max(data),
                                                len(test_loader)))
        ax2.legend()
        ax2.set_xlabel("Epoch")
        ax2.set_ylabel("Accuracy")

        plt.savefig(fig_fname + ".png", bbox_inches='tight')
        plt.savefig(fig_fname + ".pdf", bbox_inches='tight')
コード例 #17
0
ファイル: run_train.py プロジェクト: MLPRA/embedded_resnet
def run_train():
    parser = ArgumentParser()
    parser.add_argument(
        '--paths',
        type=str,
        nargs='+',
        required=True,
        help='Root paths of folders that contain images and pascal voc files')
    parser.add_argument('--label_names',
                        type=str,
                        required=True,
                        help='Path to label names file')
    parser.add_argument('--training_splitsize',
                        type=float,
                        default=0.9,
                        help='Splitsize of training data')
    parser.add_argument('--batchsize',
                        type=int,
                        default=20,
                        help='Learning minibatch size')
    parser.add_argument('--epoch',
                        type=int,
                        default=10,
                        help='Numbers of epochs to train')
    parser.add_argument('--gpu',
                        type=int,
                        default=-1,
                        help='GPU ID, negative value indicates CPU')
    parser.add_argument('--out',
                        default='trainer_output',
                        help='Output directory of trainer')
    parser.add_argument('--val_batchsize',
                        type=int,
                        default=250,
                        help='Validation minibatch size')
    args = parser.parse_args()

    # create model
    predictor = ResNet50Layers(None)
    model = Classifier(predictor)

    # TODO: initmodel

    # use selected gpu by id
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # build datasets from paths
    label_handler = LabelHandler(args.label_names)

    # builder = LabeledImageDatasetBuilder(args.paths, label_handler)
    # train_dataset, val_dataset = builder.get_labeled_image_dataset_split(args.training_splitsize)
    builder = SortedImageDatasetBuilder(args.paths, label_handler)
    train_dataset, val_dataset = builder.get_sorted_image_dataset_split(
        args.training_splitsize)

    train_iter = chainer.iterators.SerialIterator(train_dataset,
                                                  args.batchsize)
    val_iter = chainer.iterators.SerialIterator(val_dataset,
                                                args.val_batchsize,
                                                repeat=False)

    # optimizer
    learning_rate = 0.01
    momentum = 0.9
    optimizer = chainer.optimizers.MomentumSGD(learning_rate, momentum)
    optimizer.setup(model)

    # trainer
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=args.gpu)
    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       args.out)

    trainer.extend(extensions.LogReport())
    trainer.extend(chainer.training.extensions.ProgressBar(update_interval=10))

    trainer.run()

    # save model
    output_file_path = '{0}/resnet_{1}_{2}.model'.format(
        args.out, args.batchsize, args.epoch)
    chainer.serializers.save_npz(output_file_path, predictor)
コード例 #18
0
"""Generate keypoints and descriptions for template images and dump them."""
import cv2 as cv
import os
from os.path import join

from tqdm import tqdm

from src.classifier import Classifier
from src.preprocessing import prepare_image
from util.utils import CLASS_NAMES, FEATURE_DETECTORS, FEATURE_DESCRIPTORS, corner_case
from util.constants import DATA_PATH

if __name__ == '__main__':
    template_images = [(name,
                        prepare_image(cv.imread(join(DATA_PATH,
                                                     f"{name}.jpg"))))
                       for name in CLASS_NAMES]

    for detector in FEATURE_DETECTORS:
        for descriptor in FEATURE_DESCRIPTORS:
            if corner_case(detector, descriptor):
                continue
            clf = Classifier(detector, descriptor)
            for cls_name, image in tqdm(
                    template_images,
                    desc=f"Dumping features for {detector}_{descriptor}"):
                clf.dump_features(cls_name, image)
コード例 #19
0
from src.stockProcessor import StockProcessor
from src.models import Models
from src.newsProcessor import NewsProcessor
from src.classifier import Classifier

if __name__ == '__main__':
    print('load Stock chart data')
    base_file_path = sys.argv[1]
    stockCharts = StockProcessor(base_file_path)
    print('Input stock price interval in minutes - 5, 15, 30, 60, 240, 1440')
    time_interval = input()
    amazon_stock_prices, apple_stock_prices = stockCharts.loadDataForInterval(time_interval)
     For training only
     document_vectors_amazon, document_vectors_apple = NewsProcessor(base_file_path).loadNewsArticles()
     # AMAZON
     classify = Classifier(base_file_path, 'amazon', time_interval, amazon_stock_prices, document_vectors_amazon)
     classify.label_documents()
     # APPLE
     classify = Classifier(base_file_path, 'apple', time_interval, apple_stock_prices, document_vectors_apple)
     classify.label_documents()
    amazon_model = Models(base_file_path, 'amazon', amazon_stock_prices, time_interval)
    amazon_model.naive_bayes_classifier()
    amazon_model.SVM_classifier()
    amazon_model.DT_classifier()
    amazon_model.SVM_poly_classifier()
    amazon_model.Logistic_Regression11_classifier()
    amazon_model.Logistic_Regression12_classifier()
    amazon_model.KNN_classifier()
    amazon_model.SGDC_classifier()
    amazon_model.accounting_factor()
コード例 #20
0
import os

import cv2 as cv
from os.path import join

from src.classifier import Classifier
from src.inference import draw_bboxes_with_classes
from src.preprocessing import prepare_image, extract_detection, IMG_WIDTH_SIZE
from util.constants import TEST_IMG_PATH, ORB, YOLO_CONFIDENCE, NMS_THRESHOLD
from src.object_detector import Yolo

TEMPLATE_IMGS = ["1uah.jpg", "2uah.jpg", "5uah.jpg", "10uah.jpg"]

if __name__ == '__main__':
    for test_image in os.listdir(join(TEST_IMG_PATH, "5uah_heads")):
        yolo = Yolo(confidence=YOLO_CONFIDENCE, nms_threshold=NMS_THRESHOLD)
        yolo.load_model()
        image = cv.imread(join(TEST_IMG_PATH, "5uah_heads", test_image))
        prepared_image = prepare_image(image.copy())
        img_for_viz = image.copy()
        # prepared_image = image
        yolo.load_data(prepared_image)
        layer_outputs = yolo.detect()
        bboxes = yolo.process_outputs(layer_outputs)
        clf = Classifier(ORB)
        draw_bboxes_with_classes(img_for_viz, bboxes, clf)
        cv.imshow("Detections", img_for_viz)
        cv.waitKey(0)
    cv.destroyAllWindows()
コード例 #21
0
ファイル: train.py プロジェクト: CindyAloui/MasComp2
import os
import sys
from src.classifier import Classifier
import tensorflow as tf

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


def print_usage_and_exit():
    print("Usage: " + sys.argv[0] +
          " <fileClass1> <fileClass2> <corpusDir> <modelFileName>")
    sys.exit(0)


if __name__ == '__main__':
    if len(sys.argv) != 5:
        print_usage_and_exit()
    classifier = Classifier(class1_file=sys.argv[1], class2_file=sys.argv[2])
    classifier.train(sys.argv[3], sys.argv[4])
コード例 #22
0
from os.path import join

import cv2 as cv

from src.classifier import Classifier
from src.detector_descriptor import DetectorDescriptor
from util.constants import DATA_PATH, TEST_IMG_PATH, ORB

if __name__ == '__main__':
    template_img = cv.imread(join(DATA_PATH, "1uah_heads.jpg"))
    test_img = cv.imread(join(TEST_IMG_PATH, "1uah_heads", "top.jpeg"))

    detector_descriptor = DetectorDescriptor(ORB, ORB)

    template_keypoints, template_descriptions = detector_descriptor.detect_describe(
        template_img)
    test_keypoints, test_descriptions = detector_descriptor.detect_describe(
        test_img)

    bf_matcher = cv.BFMatcher_create(cv.NORM_HAMMING, True)

    matches = bf_matcher.match(template_descriptions, test_descriptions)

    clf = Classifier()

    print(len(matches))

    best_matches = clf.ransac_outlier_rejection(template_keypoints,
                                                test_keypoints, matches)

    print(len(best_matches))
コード例 #23
0
    return image


if __name__ == '__main__':
    parser = _parse_args()
    args = parser.parse_args()

    if args.image is None and args.video is None:
        raise ValueError(
            "Neither image nor video arguments are provided! Please,"
            " specify either the path to an image or video or both.")

    # TODO: supply nms threshold and confidence for YOLO as script arguments
    object_detector = Yolo(args.object_detector)
    classifier = Classifier(args.feature_detector, args.feature_descriptor,
                            args.feature_matcher,
                            args.outlier_rejection_method)
    if args.image:
        if not isfile(args.image):
            raise ValueError(
                "The provided path to an image does not exist! "
                "Please, provide a path relative to the project:) "
                "The directory with test images is test_images/")

        image = cv.imread(args.image)
        # Image resizing is not necessary,
        # OpenCV's YOLO interface takes care of that.
        prepared_image = prepare_image(image.copy(), resize=False)
        img_for_viz = image.copy()

        object_detector.load_data(prepared_image)
コード例 #24
0
ファイル: main.py プロジェクト: Bitwise-01/Cambrian
# Date: 11/04/2018
# Description: Main file

import os
from src.classifier import Classifier
from werkzeug.utils import secure_filename
from flask import Flask, render_template, request, session, jsonify, redirect, url_for, flash

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads'
app.config['SECRET_KEY'] = os.urandom(0x200)
ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg'])
PATH_TO_MODEL = 'ai/trained_models/cat_dog_3.h5'

classifier = Classifier(PATH_TO_MODEL)


@app.route('/')
def index():
    return render_template('index.html')


def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS


@app.route('/upload', methods=['POST'])
def upload():
    if 'file' not in request.files:
        flash('Upload an image')
コード例 #25
0
    set_reproducible()
    n_runs = 5
    if len(sys.argv) > 1:
        n_runs = int(sys.argv[1])
    datadir = "../data/"
    trainfile =  datadir + "traindata.csv"
    devfile =  datadir + "devdata.csv"
    testfile = None
    # testfile = datadir + "testdata.csv"

    # Runs
    start_time = time.perf_counter()
    devaccs = []
    testaccs = []
    for i in range(1, n_runs+1):
        classifier =  Classifier()
        devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i)
        devaccs.append(np.round(devacc,2))
        testaccs.append(np.round(testacc,2))
    print('\nCompleted %d runs.' % n_runs)
    total_exec_time = (time.perf_counter() - start_time)
    print("Dev accs:", devaccs)
    print("Test accs:", testaccs)
    print()
    print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
    print("Mean Test Acc.: %.2f (%.2f)" % (np.mean(testaccs), np.std(testaccs)))
    print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))



コード例 #26
0
ファイル: runAndTest.py プロジェクト: jorge190588/facenet
 def clasify(self):
     print("step 6. clasifier")
     from src.classifier import Classifier
     classifier = Classifier()
     classifier.parse_arguments(None)
     classifier.main()
コード例 #27
0
client = MongoClient('localhost', 27017)
db = client['tcc']
collection = db['tweets']

events = collection.find({"classified": True})

dataset = pickle.load(open(conf.project_path + 'data\dataset_preprocessed.pickle', 'rb'))
dataset = [x for x in dataset if len(x[0].split()) > 0]

# positives = [x for x in dataset if x[1] == 'positive']
# negatives = [x for x in dataset if x[1] == 'negative']
#
# dataset = negatives + positives[0:len(negatives)]

vectorizer = TfidfVectorizer(min_df=0.0, max_df=1.0, sublinear_tf=True, use_idf=True)
classifier = SVC(kernel='rbf', C=2.9, gamma=1)
p = Preprocessor()

clf = Classifier(vectorizer=vectorizer, classifier=classifier)

x_train = [x[0] for x in dataset]
y_train = [x[1] for x in dataset]

clf.train(x_train=x_train, y_train=y_train)

print(clf.predict(p.preprocess('''''')))

# for ev in events:
#     print(ev['text'].replace('\n', '') + ' --> ' + clf.predict(p.preprocess(ev['text'])))