Ejemplo n.º 1
0
def main(opt):
    # Check you can write to output path directory
    if not os.access(os.path.split(opt.model_path)[0], os.W_OK):
        raise OSError("--model_path is not a writeable path: %s" % opt.model_path)

    # Import dataset
    dataset = import_ham_dataset(dataset_root=opt.dataroot, training=opt.training,
                                 model_path=os.path.split(opt.model_path)[0])
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True,
                                             num_workers=opt.workers)
    n_class = dataset.NUM_CLASS

    # Load InceptionV3 network
    model = models.inception_v3(pretrained=True)

    # Freeze all layers
    for params in model.parameters():
        params.requires_grad = False

    # Stage-2 , Freeze all the layers till "Conv2d_4a_3*3"
    ct = []
    for name, child in model.named_children():
        if "Conv2d_4a_3x3" in ct:
            for params in child.parameters():
                params.requires_grad = True
        ct.append(name)

    # Replace final layer
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, n_class)

    # Print network layer architecture
    for name, child in model.named_children():
        for name2, params in child.named_parameters():
            print(name, name2, 'trainable=%r' % params.requires_grad)

    if opt.cuda and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")
    print("Using", device)
    model.to(device)  # Move model to device

    # Model training parameters
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(list(filter(lambda p: p.requires_grad, model.parameters())), lr=0.001, momentum=0.9)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    # Initiate TensorBoard logger
    logger_tensorboard = TensorboardLogger(log_dir=os.path.split(opt.model_path)[0])

    # # Training
    if opt.training:
        train_model(model, dataloader, len(dataset), criterion, optimizer, scheduler, device, opt.model_path,
                    logger_tensorboard, num_epochs=opt.epochs)

    # # Testing
    else:
        model.load_state_dict(torch.load(opt.model_path))
        test_model(model, dataloader, len(dataset), criterion, device)
Ejemplo n.º 2
0
def main(args):
    set_seeds(322)
    args = get_args(args)
    dataset = Dataset(args)
    layers = [200, 600, dataset.n_items]
    args.z_dim = layers[0]
    args.l2_coeff = 0.

    if args.model == 'MultiVAE':
        model = MultiVAE(layers, args=args).to(args.device)
        metric_values = train_model(model, dataset, args)
    elif args.model == 'MultiDAE':
        args.l2_coeff = 0.01 / args.train_batch_size
        model = MultiDAE([200, dataset.n_items], args=args).to(args.device)
        metric_values = train_model(model, dataset, args)
    elif args.model == 'Multi_our_VAE':
        model = Multi_our_VAE(layers, args=args).to(args.device)
        metric_values = train_met_model(model, dataset, args)
    elif args.model == 'MultiHoffmanVAE':
        model = MultiHoffmanVAE(layers, args=args).to(args.device)
        metric_values = train_hoffman_model(model, dataset, args)
    elif args.model == 'Multi_ourHoffman_VAE':
        model = Multi_ourHoffman_VAE(layers, args=args).to(args.device)
        metric_values = train_methoffman_model(model, dataset, args)

    np.savetxt(
        "../logs/metrics_{}_{}_K_{}_N_{}_learnreverse_{}_anneal_{}_lrdec_{}_lrenc_{}.txt"
        .format(args.data, args.model, args.K, args.N, args.learnable_reverse,
                args.annealing, args.lrdec, args.lrenc),
        np.array(metric_values))

    with open("../logs/log.txt", "a") as myfile:
        myfile.write("!!Success!! \n \n \n \n".format(args))
    print('Success!')
Ejemplo n.º 3
0
def main(overwrite=False):
    # convert input images into an hdf5 file
    if overwrite or not os.path.exists(config["data_file"]):
        training_files, subject_ids = fetch_training_data_files(
            return_subject_ids=True)

        write_data_to_file(training_files,
                           config["data_file"],
                           image_shape=config["image_shape"],
                           subject_ids=subject_ids)
    data_file_opened = open_data_file(config["data_file"])

    if not overwrite and os.path.exists(config["model_file"]):
        model = load_old_model(config["model_file"])
    else:
        # instantiate new model
        model = is_model(input_shape=config["input_shape"],
                         n_labels=config["n_labels"],
                         initial_learning_rate=config["initial_learning_rate"],
                         n_base_filters=config["n_base_filters"])

    # get training and testing generators
    train_generator, validation_generator, n_train_steps, n_validation_steps = get_training_and_validation_generators(
        data_file_opened,
        batch_size=config["batch_size"],
        data_split=config["validation_split"],
        overwrite=overwrite,
        validation_keys_file=config["validation_file"],
        training_keys_file=config["training_file"],
        n_labels=config["n_labels"],
        labels=config["labels"],
        patch_shape=config["patch_shape"],
        validation_batch_size=config["validation_batch_size"],
        validation_patch_overlap=config["validation_patch_overlap"],
        training_patch_start_offset=config["training_patch_start_offset"],
        permute=config["permute"],
        augment=config["augment"],
        skip_blank=config["skip_blank"],
        augment_flip=config["flip"],
        augment_distortion_factor=config["distort"])

    # run training
    train_model(model=model,
                model_file=config["model_file"],
                training_generator=train_generator,
                validation_generator=validation_generator,
                steps_per_epoch=n_train_steps,
                validation_steps=n_validation_steps,
                initial_learning_rate=config["initial_learning_rate"],
                learning_rate_drop=config["learning_rate_drop"],
                learning_rate_patience=config["patience"],
                early_stopping_patience=config["early_stop"],
                n_epochs=config["n_epochs"])
    data_file_opened.close()
Ejemplo n.º 4
0
def main(_):
    print("Start preprocessing data...")
    if FLAGS.actions == "random":
        data = data_preprocessing.data_prep(FLAGS.dataset, FLAGS.num_actions,
                                            FLAGS.total_num_actions)
    else:
        actions = FLAGS.actions.split(",")
        data = data_preprocessing.data_prep(FLAGS.dataset, FLAGS.num_actions,
                                            FLAGS.total_num_actions, actions)

    video_dir = os.path.normpath(
        os.path.join(FLAGS.video_dir, FLAGS.dataset,
                     "{0}_actions".format(FLAGS.num_actions),
                     '_'.join(data.actions)))
    tf.reset_default_graph()
    print("Create model...")
    model = seq2seq_model.Seq2SeqModel(len(data.dim_use), FLAGS.source_seq_len,
                                       FLAGS.target_seq_len, FLAGS.rnn_size,
                                       FLAGS.batch_size, FLAGS.learning_rate)
    sess = training.get_session()
    #perform segmentations with pre-trained model
    if FLAGS.load > 0:
        model = training.load_model(sess, model, train_dir, FLAGS.load)
    else:
        #train new model
        if FLAGS.load <= 0:
            current_step = 0
            print("Creating model with fresh parameters.")
            sess.run(tf.global_variables_initializer())
        #continue training given a checkpoint
        else:
            current_step = FLAGS.load + 1
            model = training.load_model(sess, model, train_dir, FLAGS.load)
        print("Training is in process...")
        training.train_model(sess, model, train_dir, data, model.batch_size,
                             current_step, FLAGS.iterations)
    print("Finding temporal segments...")
    test_states, all_states, cuts = segmentation.find_cuts(
        sess, model, data.norm_complete_train)
    print("Doing clustering...")
    pred_labels, reduced_states, labels_true, all_labels = segmentation.clustering(
        model, cuts, test_states, all_states, data.trainData,
        FLAGS.num_actions)
    labels_true, labels_pred = segmentation.order_labels(
        labels_true, all_labels)
    colors = viz.get_color(labels_true)
    xyz_gt = get_gt(data.complete_train)
    print("Generate results...")
    print("length of gt:{0}, pred:{1},reduced:{2}:".format(
        len(labels_true), len(labels_pred), len(reduced_states)))
    generate_video(video_dir, labels_true, labels_pred, xyz_gt, reduced_states,
                   colors, FLAGS.gen_video)
Ejemplo n.º 5
0
def main(argv=None):
    print("begin:")
    model = unet_2d.unet_model_2d_attention([256, 256, 3],
                                            21,
                                            batch_normalization=True)
    voc = voc_reader.voc_reader(256, 256, 8, 8)
    batch_size = 8
    steps_per_epoch = 1464 // batch_size
    validation_steps = 1449 // batch_size
    training.train_model(model, "model_File",
                         training.train_generator_data(voc),
                         training.val_generator_data(voc), steps_per_epoch,
                         validation_steps)
Ejemplo n.º 6
0
def run_autoencoder_model_experiment(model, dataset_dir, exp_name,
                batch_size=32, epochs=200, steps_per_epoch=100,
                validation_steps=20, early_stop_patience=15, evaluation_steps=500,
                experiments_dir="experiments", tensorboard_logs_dir="tensorboard_logs",
                earlystop_metric="loss", checkpoint_metric="val_binary_accuracy"):
    outfiles_dir = os.path.join(experiments_dir, exp_name)
    tensorboard_logdir = os.path.join(tensorboard_logs_dir, exp_name)

    metrics_cols = [' '.join([word.capitalize() for word in metric.split('_')]) for metric in model.metrics_names]
    metrics_df = pd.DataFrame(columns=["Fold", "Set"] + metrics_cols)
    n_folds = len(os.listdir(dataset_dir))
    with tempfile.TemporaryDirectory() as temp_dir:
        init_weights_file = os.path.join(temp_dir, "init_weights.h5")
        temp_weights_file = os.path.join(temp_dir, "temp_weights.h5")
        model.save_weights(init_weights_file)
        for i in range(n_folds):
            fold_name = "fold_{}".format(i)
            folds_files = glob(os.path.join(dataset_dir, fold_name, "fold_*.txt"))
            model.load_weights(init_weights_file)

            cur_outfiles_dir = os.path.join(outfiles_dir, fold_name)
            if not os.path.exists(cur_outfiles_dir):
                os.makedirs(cur_outfiles_dir)
            cur_tensorboard_logdir = os.path.join(tensorboard_logdir, fold_name)
            if not os.path.exists(cur_tensorboard_logdir):
                os.makedirs(cur_tensorboard_logdir)

            train_datagen = FacePairGenerator([folds_files[j] for j in range(len(folds_files)) if j != i], batch_size=batch_size)
            test_datagen = FacePairGenerator([folds_files[i]], batch_size=batch_size)

            train_model(model, train_datagen, test_datagen, 
                    epochs=epochs, steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps, early_stop_patience=early_stop_patience,
                    tensorboard_logdir=cur_tensorboard_logdir, best_model_filepath=temp_weights_file,
                    earlystop_metric=earlystop_metric, checkpoint_metric=checkpoint_metric)

            model.load_weights(temp_weights_file)
            train_metrics, val_metrics = evaluate_model(model, train_datagen, test_datagen, evaluation_steps=evaluation_steps)
            model.save(os.path.join(cur_outfiles_dir, "best_model.h5"),
                    overwrite=True, include_optimizer=False, save_format='h5')

            cur_metrics_df = pd.DataFrame([train_metrics, val_metrics], columns=metrics_cols)
            cur_metrics_df.insert(0, "Set", ["Train", "Val"])
            cur_metrics_df.to_csv(os.path.join(cur_outfiles_dir, "metrics.csv"), index=False)

            cur_metrics_df.insert(0, "Fold", [i, i])
            metrics_df = pd.concat([metrics_df, cur_metrics_df], ignore_index=True)
        
    metrics_df.to_csv(os.path.join(outfiles_dir, "metrics.csv"), index=False)
Ejemplo n.º 7
0
            def optuna_objective(trial):
                # args.lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
                # args.optimizer = trial.suggest_categorical('optimizer', ['Adam', 'SDG', 'RMSprop', 'Nadam'])
                args.batch_size = trial.suggest_categorical(
                    'batch_size', [4, 8, 16])

                args.layers = trial.suggest_categorical('layers', [1, 2, 3])
                args.units = trial.suggest_categorical('units', [25, 50, 100])
                args.dropout = trial.suggest_categorical(
                    'dropout', [0.1, 0.2, 0.5])
                # args.dropout_only_last = trial.suggest_categorical('dropout_only_last', [True, False])
                args.attention_first = trial.suggest_categorical(
                    'attention_first', [True, False])
                # args.attention_middle = trial.suggest_categorical('attention_middle', [True, False])

                test_results, val_results = train_model(
                    args,
                    x,
                    y,
                    args.file_desc,
                    args.patience,
                    args.folds,
                    units=args.units,
                    undersample=args.no_undersample is False,
                    augment_train=args.no_augment is False,
                    optimizing=True)

                return np.mean(val_results[:, 3])
def main():
    print('loading training data.....')
    data_file_opened, train_list, train_name = get_data(config['data_fpath'])
    config_file = open(config['training_config_file'], 'a')
    config_file.writelines('train_nii: ' + ' / '.join(train_name))
    del train_name
    config_file.close()

    if config["model_file"]:
        print('load pre_train_model....')
        model = load_old_model(config["model_file"])
    else:
        print('build the new mode.....')
        model = Unet(input_size=config['input_size'],
                     first_filter_num=config['initial_filter_num'],
                     weight_decay=config['l2_regular'],
                     batch_norm=config['bn'],
                     deconv=config['deconv'],
                     dropout_rate=config['drop'],
                     deeper=config['deeper_4'],
                     nb_classes=config['nb_classes'],
                     initial_learning_rate=config['initial_lr'],
                     activate_fun=config['activation_fun'],
                     multi_gpu=multi_gpu_num,
                     use_Adam=config["use_Adam"])

    # get training and testing generators
    print('get training and testing generators....')
    train_generator, validation_generator, n_train_steps, n_validation_steps = get_training_and_validation_generators(
        data_file_opened,
        patch_shape=config["patch_shape"],
        batch_size=config["batch_size"],
        n_labels=config['nb_classes'],
        training_list=train_list,
        validation_list=None,
        val_split=config["validation_split"],
        binary_num_rate=config["binary_num_rate"],
        min_point=config["min_point"],
        pos_num_rate=config["pos_num_rate"],
        nag_num_rate=config["nag_num_rate"],
        validation_batch_size=config["validation_batch_size"],
        augment=config["augment"],
        augment_flip=config["flip"],
        augment_distortion_factor=config["distort"])
    # run training
    print('training is going on...')
    model = train_model(model=model,
                        save_path=config['save_fpath'],
                        training_generator=train_generator,
                        validation_generator=validation_generator,
                        steps_per_epoch=n_train_steps,
                        validation_steps=n_validation_steps,
                        initial_learning_rate=config["initial_lr"],
                        learning_rate_drop=config["learning_rate_drop"],
                        learning_rate_epochs=config['learning_rate_epochs'],
                        n_epochs=config["n_epochs"],
                        early_stopping_patience=config["early_stop"])
Ejemplo n.º 9
0
def train(directory):
    """
    Creates features dataset from raw data and trains a model the transformed data
    
    Parameters
    directory: string with absolute directory path of training data files
    
    """
    #load training data and aggregate from "time" series to regular data
    train = prep.create_featureframe(raw_path=filespath + "\\data\\features",
                                     label_path=filespath + "\\data\\labels",
                                     labels=True)

    #export created file to csv
    train.to_csv('fframe.csv')
    train = pd.DataFrame.from_csv('fframe.csv')

    #create and save model file(s)
    training.train_model(train)
def main(overwrite=False):
    # convert input images into an hdf5 file
    data_file_opened = open_data_file(config["data_file"])

    model = model_3d_1(input_shape=config["input_shape"],
                       initial_learning_rate=config["initial_learning_rate"],
                       opt=args.opt
                       )
    if not overwrite and os.path.exists(config["model_file"]):
        print('load model !!')
        load_old_model(config["model_file"], model)


    # get training and testing generators
    train_generator, validation_generator, n_train_steps, n_validation_steps = get_training_and_validation_generators(
        data_file_opened,
        batch_size=config["batch_size"],
        data_split=config["validation_split"],
        overwrite=overwrite,
        validation_keys_file=config["validation_file"],
        training_keys_file=config["training_file"],
        patch_shape=config["patch_shape"],
        validation_batch_size=config["validation_batch_size"],
        validation_patch_overlap=config["validation_patch_overlap"],
        training_patch_start_offset=config["training_patch_start_offset"],
        )

    # run training
    train_model(model=model,
                model_file=config["model_file"],
                training_generator=train_generator,
                validation_generator=validation_generator,
                steps_per_epoch=n_train_steps,
                validation_steps=n_validation_steps,
                initial_learning_rate=config["initial_learning_rate"],
                learning_rate_drop=config["learning_rate_drop"],
                learning_rate_patience=config["patience"],
                early_stopping_patience=config["early_stop"],
                n_epochs=config["n_epochs"])
    data_file_opened.close()
Ejemplo n.º 11
0
def run_experiment(parser, use_gpu):
    # parse experiment specific command line arguments
    parser.add_argument('--learning-rate', dest='learning_rate', type=float,
                        default=0.001, help='Learning rate to use during training.')
    args, _unknown = parser.parse_known_args()

    # pre-process data
    process_raw_data(use_gpu, force_pre_processing_overwrite=False)
    start_compute_grad = time.time()
    for path in data_paths:
        # run experiment
        training_file = "data/preprocessed/training_" + path + ".hdf5"
        validation_file = "data/preprocessed/validation.hdf5"

        if args.skenario is 1:
            model = RGN(embedding_size=42, use_gpu=use_gpu, minibatch_size=args.minibatch_size, pretraining=-1)
        elif args.skenario is 2:
            model = UTGN(embedding_size=42, use_gpu=use_gpu, batch_size=args.minibatch_size, pretraining=-1)
        elif args.skenario is 3:
            model = RGN(embedding_size=768 + 21, use_gpu=use_gpu, minibatch_size=args.minibatch_size, use_pssm=False, use_token=True)
        elif args.skenario is 4:
            model = UTGN(embedding_size=768 + 21, use_gpu=use_gpu, batch_size=args.minibatch_size, use_pssm=False, use_token=True)
        elif args.skenario is 5:
            model = RGN(embedding_size=21, use_gpu=use_gpu, minibatch_size=args.minibatch_size, pretraining=-1, use_pssm=False)
        elif args.skenario is 6:
            model = UTGN(embedding_size=21, use_gpu=use_gpu, batch_size=args.minibatch_size, pretraining=-1, use_pssm=False)
        elif args.skenario is 7:
            model = RGN(embedding_size=768, use_gpu=use_gpu, minibatch_size=args.minibatch_size, use_aa=False, use_pssm=False, use_token=True)
        elif args.skenario is 8:
            model = UTGN(embedding_size=768, use_gpu=use_gpu, batch_size=args.minibatch_size, use_aa=False, use_pssm=False, use_token=True)


        train_loader = contruct_dataloader_from_disk(training_file, args.minibatch_size)
        validation_loader = contruct_dataloader_from_disk(validation_file, args.minibatch_size)
        identifier = "skenario{0}".format(args.skenario)
        train_model_path = train_model(
            data_set_identifier=identifier,
            model=model,
            train_loader=train_loader,
            validation_loader=validation_loader,
            learning_rate=args.learning_rate,
            minibatch_size=args.minibatch_size,
            eval_interval=args.eval_interval,
            hide_ui=args.hide_ui,
            use_gpu=use_gpu,
            optimizer_type=args.optimizer_type,
            restart=args.restart,
            minimum_updates=args.minimum_updates)

        print(train_model_path)
    end = time.time()
    print("Training time:", end - start_compute_grad)
Ejemplo n.º 12
0
	def train(self, data):
		es = keras.callbacks.EarlyStopping(monitor="val_loss", mode='min', patience=self.config["PATIENCE"])

		self.loss, self.acc, self.history = train_model(self.model, 
			data,
			self.config,
			has_acc=self.predictModel,
			cb_list=[es])

		best_epoch(self.history, has_acc=self.predictModel)

		if self.predictModel:
			print('{} test accuracy: {:.3f}'.format(self.model.name, self.acc))
Ejemplo n.º 13
0
def training_stage(action, args):
    try:
        dataset = DATASETS[args.dataset]
    except KeyError:
        if not os.path.exists(args.dataset):
            raise Exception("Please specify a valid dataset id or path.")
        filename = os.path.split(args.dataset)[-1]
        dataset = Dataset(
            filename=filename,
            filepath=args.dataset,
            lang=args.language
        )
        
    
    if action == "preprocess":
        if args.out:
            dataset = dataset._replace(preprocessed_filepath=args.out)

        get_training_file(
            dataset=dataset,
            use_cache=False,
            lemmatize=False,
            remove_stopwords=True,
            progressbar=False if args.no_pb else True,
            tokenizer=args.tokenizer,
            workers=args.workers,
            chunk_size=args.chunk_size
        )
    elif action == "train":
        train_model(
            dataset=dataset,
            model_type=args.model,
            use_cache=True,
            dims=args.dims,
            window=args.window,
            workers=args.workers,
            model_filepath=args.out
        )
Ejemplo n.º 14
0
def job():

    students_image_count_obj = {}
    try:
        for username in os.listdir(constants.DATASET_PATH):
            concatenated_dir = constants.DATASET_PATH + "/" + username
            if os.path.isdir(concatenated_dir):
                students_image_count_obj[username] = len([
                    file for file in os.listdir(concatenated_dir)
                    if (os.path.isfile(concatenated_dir + "/" +
                                       file) and os.path.splitext(file)
                        [1].lower() == constants.SUPPORTED_FILE_TYPE)
                ])
        start_training = True
        for username in students_image_count_obj.keys():
            if (students_image_count_obj[username] !=
                    constants.USER_TRAINING_IMAGE_COUNT):
                start_training = False
                break
        # check if any new user is added, i.e. is there really any need of training again.
        if start_training:
            students_count = len(students_image_count_obj.keys())
            if students_count > constants.ALREADY_TRAINED_STUDENTS_COUNT:
                constants.ALREADY_TRAINED_STUDENTS_COUNT = students_count
            else:
                log.info("Training terminated to avoid redundant trainings.")
                start_training = False
        else:
            log.info(
                "Training didn't start because of insufficient number of images."
            )
            log.info("Metadata: " + str(students_image_count_obj))
        if start_training:
            log.info("Initialising model's training.")
            extract_embeddings()
            train_model()
    except KeyboardInterrupt:
        log.error("Training process' polling job interrupted.")
Ejemplo n.º 15
0
def main():
    raw_lst = processing.get_raw_data()
    names_lst = processing.get_raw_names()

    for idx in range(len(raw_lst)):
        dataset_name = names_lst[idx]

        X, y, target_names = processing.process_data(raw_lst[idx])

        model_dict = training.train_model(X, y, target_names, dataset_name)

        training.print_elapsed_time(model_dict)

        reporting.produce_report(model_dict)
Ejemplo n.º 16
0
def execute():

    model = create_model()

    base_dir: str = '/Users/Jan/Developer/ML/dogs/dataset'

    train_generator, val_generator = create_generators(f'{base_dir}/train',
                                                       f'{base_dir}/val',
                                                       f'{base_dir}/test')

    history = train_model(train_generator, val_generator, model)

    display_progress(history)

    print(history)
Ejemplo n.º 17
0
def run_experiment(parser, use_gpu):
    # parse experiment specific command line arguments
    parser.add_argument('--learning-rate',
                        dest='learning_rate',
                        type=float,
                        default=0.01,
                        help='Learning rate to use during training.')
    parser.add_argument('--min-updates',
                        dest='minimum_updates',
                        type=int,
                        default=1000,
                        help='Minimum number of minibatch iterations.')
    parser.add_argument('--minibatch-size',
                        dest='minibatch_size',
                        type=int,
                        default=1,
                        help='Size of each minibatch.')
    args, _unknown = parser.parse_known_args()

    # pre-process data
    process_raw_data(use_gpu, force_pre_processing_overwrite=False)

    # run experiment
    training_file = "data/preprocessed/single_protein.txt.hdf5"
    validation_file = "data/preprocessed/single_protein.txt.hdf5"

    model = ExampleModel(21, args.minibatch_size,
                         use_gpu=use_gpu)  # embed size = 21

    train_loader = contruct_dataloader_from_disk(training_file,
                                                 args.minibatch_size)
    validation_loader = contruct_dataloader_from_disk(validation_file,
                                                      args.minibatch_size)

    train_model_path = train_model(data_set_identifier="TRAIN",
                                   model=model,
                                   train_loader=train_loader,
                                   validation_loader=validation_loader,
                                   learning_rate=args.learning_rate,
                                   minibatch_size=args.minibatch_size,
                                   eval_interval=args.eval_interval,
                                   hide_ui=args.hide_ui,
                                   use_gpu=use_gpu,
                                   minimum_updates=args.minimum_updates)

    print("Completed training, trained model stored at:")
    print(train_model_path)
Ejemplo n.º 18
0
def train_in_batch(server_name, suff, kbase_id1, lang_code, org_id, token):

    training_id_fname = "training_ids.txt"

    print("---------------------TRAINING KBASE----------------------")
    training_id = training.train_model(server_name, suff, kbase_id1, lang_code,
                                       org_id, token)
    print(training_id)

    out_string = training_id + "\n"
    if not os.path.exists(training_id_fname):
        print("Case 1")
        with open(training_id_fname, 'w') as file:
            file.write(out_string)
    else:
        with open(training_id_fname, 'a') as file:
            file.write(out_string)

    status_code = ""
    while status_code.lower() != "succeeded":
        print("---------------------VIEW TRAINING STATUS-------------------")
        resp_json = training.view_trained_model(server_name, suff, kbase_id1,
                                                lang_code, training_id, org_id,
                                                token)
        status_code = resp_json["status"].lower()
        if status_code == 'failed':
            print("resp_json: ", resp_json)
            print("status_code: ", status_code)
            print("errorMessage: ", resp_json["errorMessage"])
            error_message = resp_json["errorMessage"]
            index_words = error_message.index("-words")
            index_start = index_words + len("-words")
            substr_doc_id = error_message[index_start:]

            resp_doc = documents.view_doc(server_name, suff, kbase_id1,
                                          lang_code, substr_doc_id, org_id,
                                          token)
            print("Document failed question: ", resp_doc["faq"]["question"])
            print("Document failed answer: ", resp_doc["faq"]["answer"])

            return
        else:
            print("status_code: ", status_code)
        time.sleep(10)
Ejemplo n.º 19
0
def run_experiment(parser, use_gpu):
    # parse experiment specific command line arguments
    parser.add_argument('--learning-rate',
                        dest='learning_rate',
                        type=float,
                        default=0.01,
                        help='Learning rate to use during training.')

    parser.add_argument(
        '--input-file',
        dest='input_file',
        type=str,
        default='data/preprocessed/protein_net_testfile.txt.hdf5')

    args, _unknown = parser.parse_known_args()

    # pre-process data
    process_raw_data(use_gpu, force_pre_processing_overwrite=False)

    # run experiment
    training_file = args.input_file
    validation_file = args.input_file

    model = MyModel(21, use_gpu=use_gpu)  # embed size = 21

    train_loader = contruct_dataloader_from_disk(training_file,
                                                 args.minibatch_size)
    validation_loader = contruct_dataloader_from_disk(validation_file,
                                                      args.minibatch_size)

    train_model_path = train_model(data_set_identifier="TRAIN",
                                   model=model,
                                   train_loader=train_loader,
                                   validation_loader=validation_loader,
                                   learning_rate=args.learning_rate,
                                   minibatch_size=args.minibatch_size,
                                   eval_interval=args.eval_interval,
                                   hide_ui=args.hide_ui,
                                   use_gpu=use_gpu,
                                   minimum_updates=args.minimum_updates)

    print("Completed training, trained model stored at:")
    print(train_model_path)
Ejemplo n.º 20
0
def run_experiment(parser, use_gpu):
    # parse experiment specific command line arguments
    parser.add_argument('--learning-rate',
                        dest='learning_rate',
                        type=float,
                        default=0.01,
                        help='Learning rate to use during training.')
    args, _unknown = parser.parse_known_args()

    # pre-process data
    process_raw_data(use_gpu,
                     raw_data_root="data/raw/*",
                     force_pre_processing_overwrite=False)

    # run experiment
    training_file = "data/preprocessed/train_sample.txt.hdf5"
    validation_file = "data/preprocessed/test_sample.txt.hdf5"

    model = ExampleModel(21, args.minibatch_size,
                         use_gpu=use_gpu)  # embed size = 21

    train_loader = contruct_dataloader_from_disk(training_file,
                                                 args.minibatch_size)
    validation_loader = contruct_dataloader_from_disk(validation_file,
                                                      args.minibatch_size)

    train_model_path = train_model(data_set_identifier="TRAIN",
                                   model=model,
                                   train_loader=train_loader,
                                   validation_loader=validation_loader,
                                   learning_rate=args.learning_rate,
                                   minibatch_size=args.minibatch_size,
                                   eval_interval=args.eval_interval,
                                   hide_ui=args.hide_ui,
                                   use_gpu=use_gpu,
                                   minimum_updates=args.minimum_updates)

    print(train_model_path)
Ejemplo n.º 21
0
EPOCHS=20
LIM=1000

X_train, y_train = preprocess(train['price'], categories=CATEGORIES,units=UNITS, lookahead=LOOKAHEAD, vol_adjust=VOL_ADJUST )
X_dev, y_dev = preprocess(dev['price'], categories=CATEGORIES,units=UNITS, lookahead=LOOKAHEAD, vol_adjust=VOL_ADJUST)

if LIM:
    X_train = X_train[:LIM]
    y_train = y_train[:LIM]
    X_dev = X_dev[:LIM]
    y_dev = y_dev[:LIM]

model = build_model(X_train.shape[1], n_categories=CATEGORIES, loss='categorical_crossentropy')


train_model(RUN_ID, model, X_train, y_train, epochs=EPOCHS)

from keras.utils import np_utils
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score


def report_performance():
    y_pred = model.predict(X_dev.as_matrix(), batch_size=2000, verbose=2)
    pred_classes = np_utils.categorical_probas_to_classes(y_pred)
    conf_matrix = confusion_matrix(y_dev, pred_classes)
    print conf_matrix
    print "accuracy", accuracy_score(y_dev, pred_classes)
    print "f1", f1_score(y_dev, pred_classes, average='weighted')
    #sns.heatmap(conf_matrix)
    #plt.show()
Ejemplo n.º 22
0
if torch.cuda.is_available():
    write_out("CUDA is available, using GPU")
    use_gpu = True

# start web server
start_dashboard_server()

process_raw_data(use_gpu, force_pre_processing_overwrite=False)

training_file = "data/preprocessed/sample.txt.hdf5"
validation_file = "data/preprocessed/sample.txt.hdf5"
testing_file = "data/preprocessed/testing.hdf5"

model = ExampleModel(21, args.minibatch_size, use_gpu=use_gpu)  # embed size = 21

train_loader = contruct_dataloader_from_disk(training_file, args.minibatch_size)
validation_loader = contruct_dataloader_from_disk(validation_file, args.minibatch_size)

train_model_path = train_model(data_set_identifier="TRAIN",
                               model=model,
                               train_loader=train_loader,
                               validation_loader=validation_loader,
                               learning_rate=args.learning_rate,
                               minibatch_size=args.minibatch_size,
                               eval_interval=args.eval_interval,
                               hide_ui=args.hide_ui,
                               use_gpu=use_gpu,
                               minimum_updates=args.minimum_updates)

print(train_model_path)
Ejemplo n.º 23
0
def run_experiment(parser, use_gpu):
    parser.add_argument('--minibatch-size-validation',
                        dest='minibatch_size_validation',
                        type=int,
                        default=8,
                        help='Size of each minibatch during evaluation.')
    parser.add_argument('--hidden-size',
                        dest='hidden_size',
                        type=int,
                        default=64,
                        help='Hidden size.')
    parser.add_argument('--learning-rate',
                        dest='learning_rate',
                        type=float,
                        default=0.0002,
                        help='Learning rate to use during training.')
    parser.add_argument('--cv-partition',
                        dest='cv_partition',
                        type=int,
                        default=0,
                        help='Run a particular cross validation rotation.')
    parser.add_argument('--model-mode',
                        dest='model_mode',
                        type=int,
                        default=2,
                        help='Which model to use.')
    parser.add_argument('--input-data',
                        dest='input_data',
                        type=str,
                        default='data/raw/TMHMM3.train.3line.latest',
                        help='Path of input data file.')
    parser.add_argument('--pre-trained-model-paths',
                        dest='pre_trained_model_paths',
                        type=str,
                        default=None,
                        help='Paths of pre-trained models.')
    parser.add_argument('--profile-path', dest='profile_path',
                        type=str, default="",
                        help='Profiles to use for embedding.')
    args, _unknown = parser.parse_known_args()

    result_matrices = np.zeros((5, 5), dtype=np.int64)

    if args.model_mode == 0:
        model_mode = TMHMM3Mode.LSTM
    elif args.model_mode == 1:
        model_mode = TMHMM3Mode.LSTM_CRF
    elif args.model_mode == 2:
        model_mode = TMHMM3Mode.LSTM_CRF_HMM
    elif args.model_mode == 3:
        model_mode = TMHMM3Mode.LSTM_CRF_MARG
    else:
        print("ERROR: No model defined")

    print("Using model:", model_mode)

    if args.profile_path != "":
        embedding = "PROFILE"
    else:
        embedding = "BLOSUM62"
    use_marg_prob = False
    all_prediction_data = []

    for cv_partition in [0, 1, 2, 3, 4]:
        # prepare data sets
        train_set, val_set, test_set = load_data_from_disk(filename=args.input_data,
                                                           partition_rotation=cv_partition)

        # topology data set
        train_set_topology = list(filter(lambda x: x[3] == 0 or x[3] == 1, train_set))
        val_set_topology = list(filter(lambda x: x[3] == 0 or x[3] == 1, val_set))
        test_set_topology = list(filter(lambda x: x[3] == 0 or x[3] == 1, test_set))

        if not args.silent:
            print("Loaded ",
                  len(train_set), "training,",
                  len(val_set), "validation and",
                  len(test_set), "test samples")

        print("Processing data...")
        pre_processed_path = "data/preprocessed/preprocessed_data_" + str(
            hashlib.sha256(args.input_data.encode()).hexdigest())[:8] + "_cv" \
                             + str(cv_partition) + ".pickle"
        if not os.path.isfile(pre_processed_path):
            input_data_processed = list([TMDataset.from_disk(set, use_gpu) for set in
                                         [train_set, val_set, test_set,
                                          train_set_topology, val_set_topology,
                                          test_set_topology]])
            pickle.dump(input_data_processed, open(pre_processed_path, "wb"))
        input_data_processed = pickle.load(open(pre_processed_path, "rb"))
        train_preprocessed_set = input_data_processed[0]
        validation_preprocessed_set = input_data_processed[1]
        test_preprocessed_set = input_data_processed[2]
        train_preprocessed_set_topology = input_data_processed[3]
        validation_preprocessed_set_topology = input_data_processed[4]
        _test_preprocessed_set_topology = input_data_processed[5]

        print("Completed preprocessing of data...")

        train_loader = tm_contruct_dataloader_from_disk(train_preprocessed_set,
                                                        args.minibatch_size,
                                                        balance_classes=True)
        validation_loader = tm_contruct_dataloader_from_disk(validation_preprocessed_set,
                                                             args.minibatch_size_validation,
                                                             balance_classes=True)
        test_loader = tm_contruct_dataloader_from_disk(
            test_preprocessed_set if args.evaluate_on_test else validation_preprocessed_set,
            args.minibatch_size_validation)

        train_loader_topology = \
            tm_contruct_dataloader_from_disk(train_preprocessed_set_topology,
                                             args.minibatch_size)
        validation_loader_topology = \
            tm_contruct_dataloader_from_disk(validation_preprocessed_set_topology,
                                             args
                                             .minibatch_size_validation)

        type_predictor_model_path = None

        if args.pre_trained_model_paths is None:
            for (experiment_id, train_data, validation_data) in [
                    ("TRAIN_TYPE_CV" + str(cv_partition) + "-" + str(model_mode)
                     + "-HS" + str(args.hidden_size) + "-F" + str(args.input_data.split(".")[-2])
                     + "-P" + str(args.profile_path.split("_")[-1]), train_loader,
                     validation_loader),
                    ("TRAIN_TOPOLOGY_CV" + str(cv_partition) + "-" + str(model_mode)
                     + "-HS" + str(args.hidden_size) + "-F" + str(args.input_data.split(".")[-2])
                     + "-P" + str(args.profile_path.split("_")[-1]),
                     train_loader_topology, validation_loader_topology)]:

                type_predictor = None
                if type_predictor_model_path is not None:
                    type_predictor = load_model_from_disk(type_predictor_model_path,
                                                          force_cpu=False)
                    model = load_model_from_disk(type_predictor_model_path,
                                                 force_cpu=False)
                    model.type_classifier = type_predictor
                    model.type_01loss_values = []
                    model.topology_01loss_values = []
                else:
                    model = TMHMM3(
                        embedding,
                        args.hidden_size,
                        use_gpu,
                        model_mode,
                        use_marg_prob,
                        type_predictor,
                        args.profile_path)

                model_path = train_model(data_set_identifier=experiment_id,
                                         model=model,
                                         train_loader=train_data,
                                         validation_loader=validation_data,
                                         learning_rate=args.learning_rate,
                                         minibatch_size=args.minibatch_size,
                                         eval_interval=args.eval_interval,
                                         hide_ui=args.hide_ui,
                                         use_gpu=use_gpu,
                                         minimum_updates=args.minimum_updates)

                # let the GC collect the model
                del model

                write_out(model_path)

                # if we just trained a type predictor, save it for later
                if "TRAIN_TYPE" in experiment_id:
                    type_predictor_model_path = model_path
        else:
            # use the pre-trained model
            model_path = args.pre_trained_model_paths.split(",")[cv_partition]

        # test model
        write_out("Testing model...")
        model = load_model_from_disk(model_path, force_cpu=False)
        _loss, json_data, prediction_data = model.evaluate_model(test_loader)

        all_prediction_data.append(post_process_prediction_data(prediction_data))
        result_matrix = np.array(json_data['confusion_matrix'])
        result_matrices += result_matrix
        write_out(result_matrix)

    set_experiment_id(
        "TEST-" + str(model_mode) + "-HS" + str(args.hidden_size) + "-F"
        + str(args.input_data.split(".")[-2]),
        args.learning_rate,
        args.minibatch_size)
    write_out(result_matrices)
    write_prediction_data_to_disk("\n".join(all_prediction_data))
Ejemplo n.º 24
0
def run_experiment(parser, use_gpu):
    # parse experiment specific command line arguments
    parser.add_argument('--learning-rate',
                        dest='learning_rate',
                        type=float,
                        default=0.001,
                        help='Learning rate to use during training.')
    parser.add_argument('--embed-size',
                        dest='embed_size',
                        type=int,
                        default=21,
                        help='Embedding size.')
    args, _unknown = parser.parse_known_args()

    all_prediction_data = []
    result_matrices = []
    # pre-process data
    preprocessed_training_file = process_single_raw_data(
        training_file, use_gpu=use_gpu, force_pre_processing_overwrite=False)
    preprocessed_validation_file = process_single_raw_data(
        validation_file, use_gpu=use_gpu, force_pre_processing_overwrite=False)
    preprocessed_test_file = process_single_raw_data(
        test_file, use_gpu=use_gpu, force_pre_processing_overwrite=False)

    # run experiment

    # model = ExampleModel(args.embed_size, args.minibatch_size, use_gpu=use_gpu)  # embed size = 21
    # model = SimpleRCNN(args.embed_size, args.minibatch_size, use_gpu=use_gpu)  # embed size = 21
    model = DeepResRCNN_100(args.embed_size,
                            args.minibatch_size,
                            use_gpu=use_gpu)  # embed size = 21

    train_loader = contruct_dataloader_from_disk(preprocessed_training_file,
                                                 args.minibatch_size)
    validation_loader = contruct_dataloader_from_disk(
        preprocessed_validation_file, args.minibatch_size)

    train_model_path = train_model(data_set_identifier="TRAIN",
                                   model=model,
                                   train_loader=train_loader,
                                   validation_loader=validation_loader,
                                   learning_rate=args.learning_rate,
                                   minibatch_size=args.minibatch_size,
                                   eval_interval=args.eval_interval,
                                   hide_ui=args.hide_ui,
                                   use_gpu=use_gpu,
                                   minimum_updates=args.minimum_updates)

    print(train_model_path)

    # test model
    test_loader = contruct_dataloader_from_disk(preprocessed_test_file,
                                                args.minibatch_size)
    write_out("Testing model...")
    model = load_model_from_disk(train_model_path, force_cpu=False)
    _loss, json_data, _ = model.evaluate_model(test_loader)

    all_prediction_data.append(json_data)
    # all_prediction_data.append(model.post_process_prediction_data(prediction_data))
    result_matrix = np.array(json_data['confusion_matrix'])
    result_matrices += result_matrix
    write_out(result_matrix)

    set_experiment_id(
        "TEST-" + str(args.hidden_size) + "-F" +
        str(args.input_data.split(".")[-2]), args.learning_rate,
        args.minibatch_size)
    write_out(result_matrices)
    write_prediction_data_to_disk("\n".join(all_prediction_data))
Ejemplo n.º 25
0
from argparse import ArgumentParser
from training import train_model

if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('--use_gpu', type=int, choices=[0, 1], default=0)
    parser.add_argument('--pre_trained', type=str, default=None)
    parser.add_argument('--label_dict',
                        type=str,
                        default='./data/y2_id_dic.txt')

    # parser.add_argument('--train_dataset', type=str, default='data/train.json')
    # parser.add_argument('--dev_dataset', type=str, default='data/dev.json')

    parser.add_argument('--embedding_size', type=int, default=300)
    parser.add_argument('--hidden_size', type=int, default=25)
    parser.add_argument('--dict_number', type=int, default=21128)
    parser.add_argument('--num_labels', type=int, default=53)

    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--epochs', type=int, default=50)

    #parser.add_argument('--save_model', type=str, default='models/bert.pt')
    option = parser.parse_args()

    train_model(option)
Ejemplo n.º 26
0
import os
import torch.optim

from data_loader import LabelledTextDS
from models import FastText
from plotting import *
from training import train_model
import pandas as pd

num_epochs = 50
num_hidden = 40  # Number of hidden neurons in model

dev = 'cuda' if torch.cuda.is_available(
) else 'cpu'  # If you have a GPU installed, use that, otherwise CPU
dataset = LabelledTextDS(os.path.join('data', 'labelled_movie_reviews.csv'),
                         dev=dev)

imdb_data = pd.read_csv('data/labelled_movie_reviews.csv')

model = FastText(
    len(dataset.token_to_id) + 2, num_hidden, len(dataset.class_to_id)).to(dev)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

losses, accuracies = train_model(dataset, model, optimizer, num_epochs)
torch.save(model, os.path.join('saved_models', 'classifier.pth'))

print('')
print_accuracies(accuracies)
plot_losses(losses)
Ejemplo n.º 27
0
elif model == 'BasicLSTM':
    lstm = BasicLSTM(input_dim, 5, goal_dim, output_length=goal_length)
    lstm.to(device)

    lstm_optim = torch.optim.SGD(lstm.parameters(), lr=0.001, momentum=0.9)

    setup_kwargs['model'] = lstm
    setup_kwargs['optim'] = lstm_optim

elif model == 'NTM_LSTM':
    memory_banks = 10
    memory_dim = 20

    ntm = NTM_LSTM(input_dim,
                   5,
                   goal_dim,
                   memory_banks,
                   memory_dim,
                   output_length=goal_length)
    ntm_optim = torch.optim.SGD(ntm.parameters(), lr=0.001)

    setup_kwargs['model'] = ntm
    setup_kwargs['optim'] = ntm_optim
    setup_kwargs['print_interval'] = 1e2

else:
    print("MODEL IS NOT RECOGNIZED! ERROR!")

# Begin Training the Model!
train_model(**setup_kwargs)
Ejemplo n.º 28
0
                                          **kwargs)

manualSeed = 9302  #random.randint(1, 10000) # fix seed
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

g_config = get_config()

model_dir = args.model_dir
setupLogger(os.path.join(model_dir, 'log.txt'))
g_config.model_dir = model_dir

criterion = nn.HingeEmbeddingLoss()
model = Siamese()

# load model snapshot
load_path = args.load_path
if load_path is not '':
    snapshot = torch.load(load_path)
    # loadModelState(model, snapshot)
    model.load_state_dict(snapshot['state_dict'])
    logging('Model loaded from {}'.format(load_path))

train_model(model,
            criterion,
            train_loader,
            test_loader,
            g_config,
            use_cuda=False)
Ejemplo n.º 29
0
    optimizer = torch.optim.SGD(model.parameters(), lr=lr,weight_decay=weightDecay,momentum=0.9) # lr should be set to 0.1
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=20,gamma=0.1,verbose=True)
  else:
    weightDecay = 5e-5
    optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=weightDecay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,'min',patience=1,factor=0.1,verbose=True)

  if resume_from_ckp:
    ckp_path = checkpoint_path
    ckp = torch.load(ckp_path, map_location=device)
    model.load_state_dict(ckp['state_dict'])
    optimizer.load_state_dict(ckp['optimizer'])
    scheduler.load_state_dict(ckp['scheduler'])
    start_epoch = ckp['epoch']
    print("Resuming from checkpoint...")
    del ckp
  else:
    start_epoch = 0
  

  if args.model == 'VDSR':
    model = train_model(model, optimizer, scheduler, num_epochs=num_epochs, start_epoch=start_epoch,checkpoint_dir=checkpoint_path,dataloaders=dataloaders,clip_norm=0.4,scheduler_no_arg=True)
  else:
    model = train_model(model, optimizer, scheduler, num_epochs=num_epochs, start_epoch=start_epoch,checkpoint_dir=checkpoint_path,dataloaders=dataloaders)

  filename = "./model/"+"trial"+str(trialNumber)+".pth"
  torch.save(model.state_dict(), filename)


# %%
Ejemplo n.º 30
0
 def fit(self, *args, **kwargs):
     ''' Train the model. See training.train_model for more details. '''
     from training import train_model
     return train_model(self, *args, **kwargs)