Ejemplo n.º 1
0
 def load_training_dataset(self):
     ''' This function loads the training dataset '''
     utils.print_head('TRAINING dataset loaded...', 'darkcyan')
     self.train_datagen = utils.load_train_dataset(self.paths['train_dataset_path'], self.config)
     self.val_datagen = utils.load_validation_dataset(self.paths['train_dataset_path'], self.config)
     # Saving classes to use for predictions
     classes = np.array(list(self.train_datagen.class_indices.keys()))
     np.save(f"{self.paths['class_file_path']}/{self.paths['class_file_name']}", classes)
def main():
    parser = ArgumentParser()
    parser.add_argument('--config', default='./configs/lgb_template.yaml')
    parser.add_argument('--create-features', action='store_true')
    options = parser.parse_args()
    config = yaml.safe_load(open(options.config))

    kfold = get_kfold(config)

    if options.create_features:
        train_path = get_dataset_filename(config, 'train')
        test_path = get_dataset_filename(config, 'test')

        with timer('Load train/test dump files'):
            train_df = load_dump(train_path)
            test_df = load_dump(test_path)

        create_features(config, train_df, test_df, kfold)

        del train_df, test_df
        gc.collect()

    target_col = config['target']
    target_path = get_dataset_filename(config, 'target')
    use_features = extract_use_features(config)
    x_train = load_train_dataset(use_features)
    y_train = load_dump(target_path)[target_col]

    output_dir = config['dataset']['output_directory']
    basename = Path(options.config).stem
    logger = setup_logger(output_dir, basename)

    clfs, importances = train_model(x_train, y_train, kfold, config, logger)

    save_feature_importances(importances, basename, output_dir)

    del x_train, y_train, importances
    gc.collect()

    pred = predict_model(clfs, use_features, config)

    print('Creating a submission csv file...')
    submission_path = get_dataset_filename(config, 'sample_submit')
    submission = pd.read_csv(submission_path)
    submission[target_col] = pred
    submission.to_csv(f'{output_dir}/submit_{basename}.csv.gz', index=False)
    print('Done.')
Ejemplo n.º 3
0
def worker(args):
    alg, max_bit, model_file = args
    model = load_pretrain_model(model_file)
    data_loader = load_data_for_adv(
        os.path.join(data_dir, 'baseline_dataset.pkl'))
    train_samples = load_train_dataset(
        os.path.join(data_dir, 'baseline_dataset.pkl'))

    print("=============attack algorithm:{} max_bit:{} ======START=========".
          format(alg, max_bit))
    r_codes = adv_attack(model, train_samples, data_loader, max_bit, alg)
    report = {
        'alg': alg,
        'max_bit': max_bit,
        'model_file': model_file,
        'r_codes': r_codes,
    }
    print("=============attack algorithm:{} max_bit:{} ======END=========".
          format(alg, max_bit))
    return report
Ejemplo n.º 4
0
    # Load test dataset
    if args.data_dir != None:
        # Get test images
        print('Getting and resizing test images... ')
        test_df = load_test_dataset(data_dir=args.data_dir,
                                    img_width=t_img_width,
                                    img_height=t_img_height,
                                    img_chan=t_img_chan,
                                    partial=args.partial,
                                    part_size=args.partial_size,
                                    debug=False)
        # Load training dataset
        x_train, x_valid, y_train, y_valid = load_train_dataset(
            data_dir=args.data_dir,
            img_width=t_img_width,
            img_height=t_img_height,
            img_chan=t_img_chan,
            debug=True)
        if args.partial:
            x_valid = x_valid[:args.partial_size, :, :, :]
            y_valid = y_valid[:args.partial_size, :, :, :]

        print('Done!')
    else:
        raise ValueError(
            "The input data directory or dataset file not specified")

    start_inference(model=args.model,
                    test_df=test_df,
                    X_valid=x_valid,
                    y_valid=y_valid,
Ejemplo n.º 5
0
cost_val = []
FLAGS.num_products = FLAGS.num_products * 2

# Create graph object
graph = tf.Graph()
with graph.as_default():

    with tf.device('/cpu:0'):

        tf.set_random_seed(FLAGS.seed)

        # Load the model
        model = CausalProd2Vec2i(FLAGS)

        # Get train data batch from queue
        next_batch = ut.load_train_dataset(train_data_set_location,
                                           FLAGS.batch_size, FLAGS.num_epochs)
        test_user_batch, test_product_batch, test_label_batch, test_cr = ut.load_test_dataset(
            test_data_set_location)
        val_test_user_batch, val_test_product_batch, val_test_label_batch, val_cr = ut.load_test_dataset(
            validation_test_set_location)
        val_train_user_batch, val_train_product_batch, val_train_label_batch, val_cr = ut.load_test_dataset(
            validation_train_set_location)

        # create the empirical CR test logits
        test_logits = np.empty(len(test_label_batch))
        test_logits.fill(test_cr)

# Launch the Session
with tf.Session(graph=graph,
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)) as sess: