예제 #1
0
def trainNaiveBayes(spamdir, hamdir):
    allFeatures = []
    classifications = []

    for f in listdir_fullpath(spamdir):
        allFeatures += [featuresForMail(f)]
        classifications += [True]

    for f in listdir_fullpath(hamdir):
        allFeatures += [featuresForMail(f)]
        classifications += [False]

    features = zip(allFeatures, classifications)
    classi = NaiveBayesClassifier.train(features)
    return classi
예제 #2
0
    def __init__(self,
                 image_path,
                 mask_path,
                 batch_size=1,
                 dim=(400, 400),
                 n_channels=3,
                 augmentation=None,
                 shuffle=True):

        self.image_path = image_path
        self.mask_path = mask_path
        self.image_filenames = helpers.listdir_fullpath(image_path)
        self.mask_filenames = helpers.listdir_fullpath(mask_path)
        self.batch_size = batch_size
        self.dim = dim
        self.out_dim = (
            (dim[0] // 32) * 32, (dim[1] // 32) * 32
        )  # Needs to be a multiple of 32 (Because of the structure of the NN)
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.augmentation = augmentation
        self.on_epoch_end()
예제 #3
0
    # load in the model checkpoints ###################################################################################
    encoder.load_model(args.enc_model_fpath)
    synthesizer = Synthesizer(args.syn_model_dir.joinpath("taco_pretrained"),
                              low_mem=args.low_mem)
    vocoder.load_model(args.voc_model_fpath)

    # get 10 samples from each speaker  ###############################################################################
    # load in samples from dev set
    data_root = os.path.join(dataset_root, 'dev-clean', 'LibriSpeech',
                             'dev-clean')
    speakers = list(np.random.choice(os.listdir(data_root), size=num_speaker))
    speaker_directories = [os.path.join(data_root, spk) for spk in speakers]

    spk_sample_dir_pair = [
        (spk, listdir_fullpath(spk_dir)[0])
        for spk, spk_dir in zip(speakers, speaker_directories)
    ]
    spk_sample_files_pair = [(spk, listdir_fullpath(smpl_dir))
                             for spk, smpl_dir in spk_sample_dir_pair]
    # filter out txt files
    spk_sample_files_pair = [(spk, [x for x in smpl_files if '.flac' in x])
                             for spk, smpl_files in spk_sample_files_pair]
    # randomly choose a few
    spk_sample_files_pair = [(spk,
                              np.random.choice(smpl_files,
                                               size=num_sample_per_spk))
                             for spk, smpl_files in spk_sample_files_pair]

    # encode the samples
    print('encoding samples, this might take a while...')
예제 #4
0
def main():
    #########################################
    # input parser
    #########################################
    parser = argparse.ArgumentParser(
        description='Road Segmentation Challenge- EPFL.')

    group_data = parser.add_argument_group('model arguments')
    group_data.add_argument(
        '--model',
        type=str,
        default="unet",
        choices=["unet", "manunet4", "manunet5", "manunet6"],
        help='select the Neural Network model you desired to use.')
    args = parser.parse_args()
    for arg in vars(args):
        print(arg, getattr(args, arg))
    modelname = args.model

    #########################################
    # generate data
    #########################################
    # 1: Devide the data
    data_division.make_folders()

    # 2 : Load entire images

    # Generators
    training_generator = generator.DataGenerator(
        constants.train_image_path,
        constants.train_mask_path,
        augmentation=helpers.aug_with_crop,
        batch_size=1,
    )
    validation_generator = generator.DataGenerator(constants.val_image_path,
                                                   constants.val_mask_path)

    #########################################
    # Model and training
    #########################################
    if (modelname == "manunet4"):
        model = unetManual()
        model.summary()
    elif (modelname == "manunet5"):
        model = unetManualFiveDeep()
        model.summary()
    elif (modelname == "manunet6"):
        model = unetManualSixDeep()
        model.summary()
    else:
        model = Unet(backbone_name='efficientnetb7',
                     encoder_weights='imagenet',
                     encoder_freeze=False)
        model.compile(optimizer='Adam',
                      loss=bce_jaccard_loss,
                      metrics=[sm.metrics.FScore(threshold=0.5)])

    history = model.fit_generator(training_generator,
                                  shuffle=True,
                                  epochs=30,
                                  workers=4,
                                  use_multiprocessing=True,
                                  validation_data=validation_generator,
                                  verbose=1,
                                  callbacks=[callbacks.lr_reducer])
    # plotting history
    #helpers.plot_training_history(history)

    # Save model
    model.save(constants.PATH + "saved_" + modelname + ".h5")
    print("Trained model was successfully saved on disk.")
    #model = load_model(constants.PATH + "saved_"+modelname+".h5")

    #########################################
    # Testing and make predictions
    #########################################
    test = helpers.listdir_fullpath(constants.IMAGE_PATH)
    os.makedirs(constants.MASK_TEST_PATH)

    for pth in test:
        name = os.listdir(pth)[0]
        path = pth + "/" + name
        print(path)
        image = mpimg.imread(path) / 255
        if (modelname == "manunet4" or modelname == "manunet5"
                or modelname == "manunet6"):
            image = cv2.resize(
                image, dsize=(384, 384), interpolation=cv2.INTER_CUBIC
            )  # resize test images to (384,384) to feed to manual Unet
            prediction = cv2.resize(model.predict(np.expand_dims(
                image, axis=0)).reshape(384, 384),
                                    dsize=(608, 608),
                                    interpolation=cv2.INTER_CUBIC
                                    )  # resize the predictions to (608,608)
        else:
            prediction = model.predict(np.expand_dims(image, axis=0)).reshape(
                608, 608)
        mpimg.imsave(constants.MASK_TEST_PATH + name, prediction)
        print("Image " + name + " saved")

    submission_filename = constants.PATH + "test_final_" + modelname + ".csv"
    image_filenames = helpers.listdir_fullpath(constants.MASK_TEST_PATH)
    make_submission.masks_to_submission(submission_filename, *image_filenames)
예제 #5
0
from helpers import listdir_fullpath
from features import featuresForMail
from sys import argv

# do cross validation on training/testing data

if __name__ == '__main__':
    if len(argv) == 3:
        hamdir = argv[1] + '/ham'
        spamdir = argv[2] + '/spam'
        classi = trainNaiveBayes(hamdir, spamdir)

        total = 0
        good = 0

        for f in listdir_fullpath(argv[1] + '/ham'):
            features = featuresForMail(f)
            total += 1
            if classi.classify(features) == False:
                good += 1

        for f in listdir_fullpath(argv[1] + '/spam'):
            features = featuresForMail(f)
            total += 1
            if classi.classify(features) == True:
                good += 1

        print "accuracy: " + str(float(int(float(good)/float(total)*10000))/100) + "%"

    else:
        print "usage: python applyCrossval.py <training directory> <testing directory>"