test_ext=repr_test_x_ext, dtype=args.repr_x_dtype) if decode: repr_fold_y_splits = load_train_val_test_splits(args.repr_y, dataset_name, y_only=True, train_ext=repr_train_y_ext, valid_ext=repr_valid_y_ext, test_ext=repr_test_y_ext, dtype=args.repr_y_dtype) # # printing logging.info('Original folds') print_fold_splits_shapes(fold_splits) logging.info('Repr X folds') print_fold_splits_shapes(repr_fold_x_splits) if decode: logging.info('Repr Y folds') print_fold_splits_shapes(repr_fold_y_splits) # # Opening the file for test prediction # if args.exp_name: out_path = os.path.join(args.output, dataset_name + '_' + args.exp_name) else: date_string = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") out_path = os.path.join(args.output, dataset_name + '_' + date_string)
n_folds, train_ext=train_ext, valid_ext=valid_ext, test_ext=test_ext, dtype=args.dtype) else: fold_splits = load_train_val_test_splits(args.dataset, dataset_name, x_only=False, y_only=False, train_ext=train_ext, valid_ext=valid_ext, test_ext=test_ext, dtype=args.dtype) print_fold_splits_shapes(fold_splits) merged_fold_splits = [] for i, splits in enumerate(fold_splits): logging.info('Processing fold {}\n'.format(i)) merged_splits = [] for j, split in enumerate(splits): if split is not None: split_x, split_y = split logging.info('\tProcessing split {} ({}, {})'.format( SPLIT_NAMES[j], split_x.shape, split_y.shape)) if args.n_classes: logging.info('classes : {} -> {}'.format(
y_only=y_only, dtype=args.dtype) else: fold_splits = load_train_val_test_splits(args.dataset, dataset_name, train_ext=train_ext, valid_ext=valid_ext, test_ext=test_ext, x_only=x_only, y_only=y_only, dtype=args.dtype) # # printing print_fold_splits_shapes(fold_splits) # n_instances = train.shape[0] # n_test_instances = test.shape[0] # # estimating the frequencies for the features logging.info('Estimating features on training set...') # freqs, features = dataset.data_2_freqs(train) n_features = fold_splits[0][0].shape[1] features = None if args.feature_scheme is None: features = numpy.array([2 for i in range(n_features)]) else: raise ValueError('Loading feature schema not implemented yet') #