Ejemplo n.º 1
0
def open_datasets(train_path,
                  train_ref_path,
                  test_path,
                  test_ref_path,
                  delim,
                  labels_path=None,
                  tostring=False):

    if not os.path.isfile(os.path.abspath(train_path)):
        raise IOError("training dataset path is not valid: %s" % train_path)

    if not os.path.isfile(os.path.abspath(train_ref_path)):
        raise IOError("training references path is not valid: %s" %
                      train_ref_path)

    if not os.path.isfile(os.path.abspath(test_path)):
        raise IOError("test dataset path is not valid: %s" % test_path)

    if not os.path.isfile(os.path.abspath(test_ref_path)):
        raise IOError("test references path is not valid: %s" % test_ref_path)

    labels = []
    if labels_path is not None:
        if not os.path.isfile(os.path.abspath(labels_path)):
            raise IOError("labels file is not valid: %s" % labels_path)

        labels = read_labels_file(labels_path, delim)

    X_train = read_features_file(train_path, delim, tostring=tostring)
    y_train = read_reference_file(train_ref_path, delim, tostring=tostring)

    X_test = read_features_file(test_path, delim, tostring=tostring)
    y_test = read_reference_file(test_ref_path, delim, tostring=tostring)

    if len(X_train.shape) != 2:
        raise IOError(
            "the training dataset must be in the format of a matrix with M lines and N columns."
        )

    if len(X_test.shape) != 2:
        raise IOError(
            "the test dataset must be in the format of a matrix with M lines and N columns."
        )

    if X_train.shape[0] != y_train.shape[0]:
        print X_train.shape[0], y_train.shape[0]
        raise IOError(
            "the number of instances in the train features file does not match the number of references given."
        )

    if X_test.shape[0] != y_test.shape[0]:
        raise IOError(
            "the number of instances in the test features file does not match the number of references given."
        )

    if X_train.shape[1] != X_test.shape[1]:
        raise IOError(
            "the number of features in train and test datasets is different.")

    return X_train, y_train, X_test, y_test, labels
Ejemplo n.º 2
0
def open_eval_datasets(eval_path, delim, shape):
    if not os.path.isfile(os.path.abspath(eval_path)):
        raise IOError("eval dataset path is not valid: %s" % eval_path)
    X_eval = read_features_file(eval_path, delim)
    if X_eval.shape[1] <> shape:
        raise IOError("the number of features in train and eva datasets is different.")
    return X_eval
Ejemplo n.º 3
0
def open_eval_datasets(eval_path, delim, shape):
    if not os.path.isfile(os.path.abspath(eval_path)):
        raise IOError("eval dataset path is not valid: %s" % eval_path)
    X_eval = read_features_file(eval_path, delim)
    if X_eval.shape[1] <> shape:
        raise IOError(
            "the number of features in train and eva datasets is different.")
    return X_eval
Ejemplo n.º 4
0
def open_datasets(train_path, train_ref_path, test_path, 
                  test_ref_path, delim, labels_path=None):
    
    if not os.path.isfile(os.path.abspath(train_path)):
        raise IOError("training dataset path is not valid: %s" % train_path)
    
    if not os.path.isfile(os.path.abspath(train_ref_path)):
        raise IOError("training references path is not valid: %s" % train_ref_path)
    
    if not os.path.isfile(os.path.abspath(test_path)):
        raise IOError("test dataset path is not valid: %s" % test_path)
    
    if not os.path.isfile(os.path.abspath(test_ref_path)):
        raise IOError("test references path is not valid: %s" % test_ref_path)

    labels = []
    if labels_path is not None:
        if not os.path.isfile(os.path.abspath(labels_path)):
            raise IOError("labels file is not valid: %s" % labels_path)

        labels = read_labels_file(labels_path, delim)

    X_train = read_features_file(train_path, delim)
    y_train = read_reference_file(train_ref_path, delim)
    
    X_test = read_features_file(test_path, delim)
    y_test = read_reference_file(test_ref_path, delim)
    
    if len(X_train.shape) != 2:
        raise IOError("the training dataset must be in the format of a matrix with M lines and N columns.")

    if len(X_test.shape) != 2:
        raise IOError("the test dataset must be in the format of a matrix with M lines and N columns.")
        
    if X_train.shape[0] != y_train.shape[0]:
        print X_train.shape[0],  y_train.shape[0]
        raise IOError("the number of instances in the train features file does not match the number of references given.")
        
    if X_test.shape[0] != y_test.shape[0]:
        raise IOError("the number of instances in the test features file does not match the number of references given.")

    if X_train.shape[1] != X_test.shape[1]:
        raise IOError("the number of features in train and test datasets is different.")

    return X_train, y_train, X_test, y_test, labels