def open_datasets(train_path, train_ref_path, test_path, test_ref_path, delim, labels_path=None, tostring=False): if not os.path.isfile(os.path.abspath(train_path)): raise IOError("training dataset path is not valid: %s" % train_path) if not os.path.isfile(os.path.abspath(train_ref_path)): raise IOError("training references path is not valid: %s" % train_ref_path) if not os.path.isfile(os.path.abspath(test_path)): raise IOError("test dataset path is not valid: %s" % test_path) if not os.path.isfile(os.path.abspath(test_ref_path)): raise IOError("test references path is not valid: %s" % test_ref_path) labels = [] if labels_path is not None: if not os.path.isfile(os.path.abspath(labels_path)): raise IOError("labels file is not valid: %s" % labels_path) labels = read_labels_file(labels_path, delim) X_train = read_features_file(train_path, delim, tostring=tostring) y_train = read_reference_file(train_ref_path, delim, tostring=tostring) X_test = read_features_file(test_path, delim, tostring=tostring) y_test = read_reference_file(test_ref_path, delim, tostring=tostring) if len(X_train.shape) != 2: raise IOError( "the training dataset must be in the format of a matrix with M lines and N columns." ) if len(X_test.shape) != 2: raise IOError( "the test dataset must be in the format of a matrix with M lines and N columns." ) if X_train.shape[0] != y_train.shape[0]: print X_train.shape[0], y_train.shape[0] raise IOError( "the number of instances in the train features file does not match the number of references given." ) if X_test.shape[0] != y_test.shape[0]: raise IOError( "the number of instances in the test features file does not match the number of references given." ) if X_train.shape[1] != X_test.shape[1]: raise IOError( "the number of features in train and test datasets is different.") return X_train, y_train, X_test, y_test, labels
def open_eval_datasets(eval_path, delim, shape): if not os.path.isfile(os.path.abspath(eval_path)): raise IOError("eval dataset path is not valid: %s" % eval_path) X_eval = read_features_file(eval_path, delim) if X_eval.shape[1] <> shape: raise IOError("the number of features in train and eva datasets is different.") return X_eval
def open_eval_datasets(eval_path, delim, shape): if not os.path.isfile(os.path.abspath(eval_path)): raise IOError("eval dataset path is not valid: %s" % eval_path) X_eval = read_features_file(eval_path, delim) if X_eval.shape[1] <> shape: raise IOError( "the number of features in train and eva datasets is different.") return X_eval
def open_datasets(train_path, train_ref_path, test_path, test_ref_path, delim, labels_path=None): if not os.path.isfile(os.path.abspath(train_path)): raise IOError("training dataset path is not valid: %s" % train_path) if not os.path.isfile(os.path.abspath(train_ref_path)): raise IOError("training references path is not valid: %s" % train_ref_path) if not os.path.isfile(os.path.abspath(test_path)): raise IOError("test dataset path is not valid: %s" % test_path) if not os.path.isfile(os.path.abspath(test_ref_path)): raise IOError("test references path is not valid: %s" % test_ref_path) labels = [] if labels_path is not None: if not os.path.isfile(os.path.abspath(labels_path)): raise IOError("labels file is not valid: %s" % labels_path) labels = read_labels_file(labels_path, delim) X_train = read_features_file(train_path, delim) y_train = read_reference_file(train_ref_path, delim) X_test = read_features_file(test_path, delim) y_test = read_reference_file(test_ref_path, delim) if len(X_train.shape) != 2: raise IOError("the training dataset must be in the format of a matrix with M lines and N columns.") if len(X_test.shape) != 2: raise IOError("the test dataset must be in the format of a matrix with M lines and N columns.") if X_train.shape[0] != y_train.shape[0]: print X_train.shape[0], y_train.shape[0] raise IOError("the number of instances in the train features file does not match the number of references given.") if X_test.shape[0] != y_test.shape[0]: raise IOError("the number of instances in the test features file does not match the number of references given.") if X_train.shape[1] != X_test.shape[1]: raise IOError("the number of features in train and test datasets is different.") return X_train, y_train, X_test, y_test, labels