Esempio n. 1
0
def test_errorfuncs_dims1():
    """Should fail with more than 2 dimensions"""
    import ann

    x = np.zeros((2, 2, 2))
    y = np.zeros((2, 2, 2))
    MSE = ann.ERROR_MSE

    failed = False
    try:
        ann.get_error(MSE, x, y)
    except ValueError:
        failed = True

    assert failed, "Should not work with more than 2 dimensions"
Esempio n. 2
0
def test_errorfuncs_dims1():
    """Should fail with more than 2 dimensions"""
    import ann

    x = np.zeros((2,2,2))
    y = np.zeros((2,2,2))
    MSE = ann.ERROR_MSE

    failed = False
    try:
        ann.get_error(MSE, x, y)
    except ValueError:
        failed = True

    assert failed, "Should not work with more than 2 dimensions"
Esempio n. 3
0
def test_errorfuncs_dims2():
    """Should fail if arrays have different dimensions"""
    import ann

    # First, should fail if arrays differ in dimension
    x = np.zeros((2,2))
    y = np.zeros((3,3))
    MSE = ann.ERROR_MSE

    failed = False
    try:
        ann.get_error(MSE, x, y)
    except ValueError:
        failed = True

    assert failed, "Dimensions should not match!"
Esempio n. 4
0
def test_errorfuncs_dims2():
    """Should fail if arrays have different dimensions"""
    import ann

    # First, should fail if arrays differ in dimension
    x = np.zeros((2, 2))
    y = np.zeros((3, 3))
    MSE = ann.ERROR_MSE

    failed = False
    try:
        ann.get_error(MSE, x, y)
    except ValueError:
        failed = True

    assert failed, "Dimensions should not match!"
Esempio n. 5
0
def get_error_nn(model, X, y):

    with torch.no_grad():
        y_pred = get_sign(model(X))

    y_true = np.array(y)

    return get_error(y_pred, y_true)
Esempio n. 6
0
def test_parameter_values(net_constructor,
                          data,
                          inputcols,
                          targetcols,
                          name,
                          values,
                          ntimes=10):
    '''
    Given a parameter and a list of values, will test ntimes for each
    parameter value. This trains on the entire data set each time so
    is suitable for parameters which should be treated as unrelated
    to overtraining.
    Returns the result as a dict, defined as dict[value] = resultlist

    Keyword arguments:
    net_constructor - A function that should return a new neural network with
    all properties set to suitable values. Will be called as:
    net_constructor(name, value)

    data - The data to do crossvalidation on. Should be a two-dimensional
    numpy array (or compatible).

    inputcols - A tuple/list of the column numbers which represent the input
    data.

    targetcols - A tuple/list expected to have two members. First being the
    column number of the survival times. The second being the column number
    of the event column.

    name - Name of the property to test different values for. Is up to
    net_constructor to set this property on the network

    values - A list of values to test 'name' with.

    ntimes - The number of times to train the network.
    '''
    # Store results in a hash
    results = {}

    # Try these values for generations parameter
    for value in values:
        print("Training with {} = {}".format(name, value))
        net = net_constructor(name, value)
        # Store results in a hash where the values are lists
        results[value] = []

        #Train n times for each value
        for x in range(ntimes):
            net.learn(data[:, inputcols], data[:, targetcols])
            predictions = np.array([net.output(x) for x in data[:, inputcols]])
            results[value].append(
                np.mean(
                    ann.get_error(net.error_function, data[:, targetcols],
                                  predictions)))
            #results[value].append(get_C_index(data[:, targetcols], predictions))

    return results
Esempio n. 7
0
def test_parameter_values(net_constructor, data, inputcols, targetcols,
                           name, values, ntimes=10):
    '''
    Given a parameter and a list of values, will test ntimes for each
    parameter value. This trains on the entire data set each time so
    is suitable for parameters which should be treated as unrelated
    to overtraining.
    Returns the result as a dict, defined as dict[value] = resultlist

    Keyword arguments:
    net_constructor - A function that should return a new neural network with
    all properties set to suitable values. Will be called as:
    net_constructor(name, value)

    data - The data to do crossvalidation on. Should be a two-dimensional
    numpy array (or compatible).

    inputcols - A tuple/list of the column numbers which represent the input
    data.

    targetcols - A tuple/list expected to have two members. First being the
    column number of the survival times. The second being the column number
    of the event column.

    name - Name of the property to test different values for. Is up to
    net_constructor to set this property on the network

    values - A list of values to test 'name' with.

    ntimes - The number of times to train the network.
    '''
    # Store results in a hash
    results = {}

    # Try these values for generations parameter
    for value in values:
        print("Training with {} = {}".format(name, value))
        net = net_constructor(name, value)
        # Store results in a hash where the values are lists
        results[value] = []

        #Train n times for each value
        for x in range(ntimes):
            net.learn(data[:, inputcols], data[:, targetcols])
            predictions = np.array([net.output(x) for x in data[:, inputcols]])
            results[value].append(np.mean(ann.get_error(net.error_function,
                                                data[:, targetcols],
                                                predictions)))
            #results[value].append(get_C_index(data[:, targetcols], predictions))

    return results
Esempio n. 8
0
def test_errorfuncs_data1dlistsminimal():
    """Verify dimensions and value of result"""
    import ann

    rows, cols = 1, 1
    x = [0.0 for i in range(rows)]
    y = [2.0 for i in range(rows)]
    MSE = ann.ERROR_MSE

    error = ann.get_error(MSE, x, y)

    assert len(error.shape) == 1, "Should be one-dimensional result"
    assert error.shape[0] == cols, "Count should match column number"

    for e in error:
        assert 0.000001 > e - 0.5*(2 - 0)**2, "Error is incorrect"
Esempio n. 9
0
def test_errorfuncs_data1d():
    """Result must match dimensions of input"""
    import ann

    rows, cols = 5, 1
    x = np.zeros(rows)
    y = np.ones(rows) * 2
    MSE = ann.ERROR_MSE

    error = ann.get_error(MSE, x, y)

    for s1, s2 in zip(error.shape, y.shape):
        assert s1 == s2, "Dimensions of result should match input"

    for e in error:
        assert 0.000001 > e - 0.5*(2 - 0)**2, "Error is incorrect"
Esempio n. 10
0
def test_errorfuncs_data1dlistsminimal():
    """Verify dimensions and value of result"""
    import ann

    rows, cols = 1, 1
    x = [0.0 for i in range(rows)]
    y = [2.0 for i in range(rows)]
    MSE = ann.ERROR_MSE

    error = ann.get_error(MSE, x, y)

    assert len(error.shape) == 1, "Should be one-dimensional result"
    assert error.shape[0] == cols, "Count should match column number"

    for e in error:
        assert 0.000001 > e - 0.5 * (2 - 0)**2, "Error is incorrect"
Esempio n. 11
0
def test_errorfuncs_data1d():
    """Result must match dimensions of input"""
    import ann

    rows, cols = 5, 1
    x = np.zeros(rows)
    y = np.ones(rows) * 2
    MSE = ann.ERROR_MSE

    error = ann.get_error(MSE, x, y)

    for s1, s2 in zip(error.shape, y.shape):
        assert s1 == s2, "Dimensions of result should match input"

    for e in error:
        assert 0.000001 > e - 0.5 * (2 - 0)**2, "Error is incorrect"
Esempio n. 12
0
def test_surv_likelihood():
    import ann

    dim = (20,2)
    targets = np.ones(dim)
    censlvl=0.0
    for i in range(len(targets)):
        if np.random.uniform() < censlvl:
            targets[i, 1] = 0
    outputs = np.random.normal(1, 10, size=dim)

    #timesorting = outputs[:, 0].argsort()

    cens = targets[:, 1] < 1
    uncens = targets[:, 1] == 1

    errors = ann.get_error(ann.ERROR_SURV_LIKELIHOOD, targets, outputs)
    derivs = ann.get_deriv(ann.ERROR_SURV_LIKELIHOOD, targets, outputs)
Esempio n. 13
0
def test_surv_likelihood():
    import ann

    dim = (20, 2)
    targets = np.ones(dim)
    censlvl = 0.0
    for i in range(len(targets)):
        if np.random.uniform() < censlvl:
            targets[i, 1] = 0
    outputs = np.random.normal(1, 10, size=dim)

    #timesorting = outputs[:, 0].argsort()

    cens = targets[:, 1] < 1
    uncens = targets[:, 1] == 1

    errors = ann.get_error(ann.ERROR_SURV_LIKELIHOOD, targets, outputs)
    derivs = ann.get_deriv(ann.ERROR_SURV_LIKELIHOOD, targets, outputs)
Esempio n. 14
0
def crossvalidate(net_constructor, data, inputcols, targetcols, ntimes=5,
                  kfold=3, evalfunc = None, evalresults = None):
    '''
    Does crossvalidation testing on a network the designated
    number of times. Random divisions are stratified for events.

    Keyword arguments:
    net_constructor - A function that should return a new neural network with
    all properties set to suitable values. The model must a "learn" method,
    and one of the following: output or output_all, where they are called as:
    output(x), for x in data[:, inputcols]
    output_all(data[:, inputcols)

    data - The data to do crossvalidation on. Should be a two-dimensional
    numpy array (or compatible).

    inputcols - A tuple/list of the column numbers which represent the input
    data.

    targetcols - A tuple/list expected to have two members. First being the
    column number of the survival times. The second being the column number
    of the event column. The data sets are stratified for the event column.

    ntimes - The number of times to divide the data.

    kfold - The number of folds to divide the data in. Total number of results
    will equal ntimes * kfold. Where each row has featured in a test set ntimes.

    evalfunc - Function to apply at end of training. Called with the following
    signature:
    evalfunc(net, data, inputcols, targetcols, trnindices, valindices, evalresults)
    Results are expected to be placed in evalresults. Only called if both
    evalfunc and evalresults is not None.

    Returns a tuple: (trnresultlist, valresultlist)
    where each list is ntimes * kfold long.
    '''
    trnresults = []
    valresults = []

    # This might be a decimal number, remember to round it off
    indices = np.arange(len(data))

    classes = np.unique(data[:, targetcols[1]])
    classindices = {}
    for c in classes:
        classindices[c] = indices[data[:, targetcols[1]] == c]

    for n in range(ntimes):
        # Re-shuffle the data every time
        for c in classes:
            np.random.shuffle(classindices[c])

        for k in range(kfold):
            valindices = []
            trnindices = []

            # Join the data pieces
            for p in range(kfold):
                # validation piece
                if k == p:
                    for idx in classindices.values():
                        # Calc piece length
                        plength = int(round(len(idx) / kfold))
                        valindices.extend(idx[p*plength:(p+1)*plength])
                else:
                    for idx in classindices.values():
                        # Calc piece length
                        plength = int(round(len(idx) / kfold))
                        trnindices.extend(idx[p*plength:(p+1)*plength])

            # Ready to train
            net = net_constructor()
            net.learn(data[trnindices][:, inputcols],
                      data[trnindices][:, targetcols])

            # Training result
            try:
                predictions = np.array(net.output_all(data[trnindices][:, inputcols]))
            except:
                predictions = np.array([net.output(x) for x in data[trnindices][:, inputcols]])

            err = np.mean(ann.get_error(net.error_function,
                                    data[trnindices][:, targetcols],
                                    predictions))
            #c_index = get_C_index(data[trnindices][:, targetcols], predictions)
            trnresults.append(err)

            # Validation result
            try:
                predictions = np.array(net.output_all(data[valindices][:, inputcols]))
            except:
                predictions = np.array([net.output(x) for x in data[valindices][:, inputcols]])

            err = np.mean(ann.get_error(net.error_function,
                                    data[valindices][:, targetcols],
                                    predictions))

            #c_index = get_C_index(data[valindices][:, targetcols], predictions)
            valresults.append(err)

            if evalfunc is not None and evalresults is not None:
                evalfunc(net, data, inputcols, targetcols,
                         trnindices, valindices, evalresults)

    return (trnresults, valresults)
Esempio n. 15
0
def crossvalidate(net_constructor,
                  data,
                  inputcols,
                  targetcols,
                  ntimes=5,
                  kfold=3,
                  evalfunc=None,
                  evalresults=None):
    '''
    Does crossvalidation testing on a network the designated
    number of times. Random divisions are stratified for events.

    Keyword arguments:
    net_constructor - A function that should return a new neural network with
    all properties set to suitable values. The model must a "learn" method,
    and one of the following: output or output_all, where they are called as:
    output(x), for x in data[:, inputcols]
    output_all(data[:, inputcols)

    data - The data to do crossvalidation on. Should be a two-dimensional
    numpy array (or compatible).

    inputcols - A tuple/list of the column numbers which represent the input
    data.

    targetcols - A tuple/list expected to have two members. First being the
    column number of the survival times. The second being the column number
    of the event column. The data sets are stratified for the event column.

    ntimes - The number of times to divide the data.

    kfold - The number of folds to divide the data in. Total number of results
    will equal ntimes * kfold. Where each row has featured in a test set ntimes.

    evalfunc - Function to apply at end of training. Called with the following
    signature:
    evalfunc(net, data, inputcols, targetcols, trnindices, valindices, evalresults)
    Results are expected to be placed in evalresults. Only called if both
    evalfunc and evalresults is not None.

    Returns a tuple: (trnresultlist, valresultlist)
    where each list is ntimes * kfold long.
    '''
    trnresults = []
    valresults = []

    # This might be a decimal number, remember to round it off
    indices = np.arange(len(data))

    classes = np.unique(data[:, targetcols[1]])
    classindices = {}
    for c in classes:
        classindices[c] = indices[data[:, targetcols[1]] == c]

    for n in range(ntimes):
        # Re-shuffle the data every time
        for c in classes:
            np.random.shuffle(classindices[c])

        for k in range(kfold):
            valindices = []
            trnindices = []

            # Join the data pieces
            for p in range(kfold):
                # validation piece
                if k == p:
                    for idx in classindices.values():
                        # Calc piece length
                        plength = int(round(len(idx) / kfold))
                        valindices.extend(idx[p * plength:(p + 1) * plength])
                else:
                    for idx in classindices.values():
                        # Calc piece length
                        plength = int(round(len(idx) / kfold))
                        trnindices.extend(idx[p * plength:(p + 1) * plength])

            # Ready to train
            net = net_constructor()
            net.learn(data[trnindices][:, inputcols],
                      data[trnindices][:, targetcols])

            # Training result
            try:
                predictions = np.array(
                    net.output_all(data[trnindices][:, inputcols]))
            except:
                predictions = np.array(
                    [net.output(x) for x in data[trnindices][:, inputcols]])

            err = np.mean(
                ann.get_error(net.error_function,
                              data[trnindices][:, targetcols], predictions))
            #c_index = get_C_index(data[trnindices][:, targetcols], predictions)
            trnresults.append(err)

            # Validation result
            try:
                predictions = np.array(
                    net.output_all(data[valindices][:, inputcols]))
            except:
                predictions = np.array(
                    [net.output(x) for x in data[valindices][:, inputcols]])

            err = np.mean(
                ann.get_error(net.error_function,
                              data[valindices][:, targetcols], predictions))

            #c_index = get_C_index(data[valindices][:, targetcols], predictions)
            valresults.append(err)

            if evalfunc is not None and evalresults is not None:
                evalfunc(net, data, inputcols, targetcols, trnindices,
                         valindices, evalresults)

    return (trnresults, valresults)