def test_errorfuncs_dims1(): """Should fail with more than 2 dimensions""" import ann x = np.zeros((2, 2, 2)) y = np.zeros((2, 2, 2)) MSE = ann.ERROR_MSE failed = False try: ann.get_error(MSE, x, y) except ValueError: failed = True assert failed, "Should not work with more than 2 dimensions"
def test_errorfuncs_dims1(): """Should fail with more than 2 dimensions""" import ann x = np.zeros((2,2,2)) y = np.zeros((2,2,2)) MSE = ann.ERROR_MSE failed = False try: ann.get_error(MSE, x, y) except ValueError: failed = True assert failed, "Should not work with more than 2 dimensions"
def test_errorfuncs_dims2(): """Should fail if arrays have different dimensions""" import ann # First, should fail if arrays differ in dimension x = np.zeros((2,2)) y = np.zeros((3,3)) MSE = ann.ERROR_MSE failed = False try: ann.get_error(MSE, x, y) except ValueError: failed = True assert failed, "Dimensions should not match!"
def test_errorfuncs_dims2(): """Should fail if arrays have different dimensions""" import ann # First, should fail if arrays differ in dimension x = np.zeros((2, 2)) y = np.zeros((3, 3)) MSE = ann.ERROR_MSE failed = False try: ann.get_error(MSE, x, y) except ValueError: failed = True assert failed, "Dimensions should not match!"
def get_error_nn(model, X, y): with torch.no_grad(): y_pred = get_sign(model(X)) y_true = np.array(y) return get_error(y_pred, y_true)
def test_parameter_values(net_constructor, data, inputcols, targetcols, name, values, ntimes=10): ''' Given a parameter and a list of values, will test ntimes for each parameter value. This trains on the entire data set each time so is suitable for parameters which should be treated as unrelated to overtraining. Returns the result as a dict, defined as dict[value] = resultlist Keyword arguments: net_constructor - A function that should return a new neural network with all properties set to suitable values. Will be called as: net_constructor(name, value) data - The data to do crossvalidation on. Should be a two-dimensional numpy array (or compatible). inputcols - A tuple/list of the column numbers which represent the input data. targetcols - A tuple/list expected to have two members. First being the column number of the survival times. The second being the column number of the event column. name - Name of the property to test different values for. Is up to net_constructor to set this property on the network values - A list of values to test 'name' with. ntimes - The number of times to train the network. ''' # Store results in a hash results = {} # Try these values for generations parameter for value in values: print("Training with {} = {}".format(name, value)) net = net_constructor(name, value) # Store results in a hash where the values are lists results[value] = [] #Train n times for each value for x in range(ntimes): net.learn(data[:, inputcols], data[:, targetcols]) predictions = np.array([net.output(x) for x in data[:, inputcols]]) results[value].append( np.mean( ann.get_error(net.error_function, data[:, targetcols], predictions))) #results[value].append(get_C_index(data[:, targetcols], predictions)) return results
def test_parameter_values(net_constructor, data, inputcols, targetcols, name, values, ntimes=10): ''' Given a parameter and a list of values, will test ntimes for each parameter value. This trains on the entire data set each time so is suitable for parameters which should be treated as unrelated to overtraining. Returns the result as a dict, defined as dict[value] = resultlist Keyword arguments: net_constructor - A function that should return a new neural network with all properties set to suitable values. Will be called as: net_constructor(name, value) data - The data to do crossvalidation on. Should be a two-dimensional numpy array (or compatible). inputcols - A tuple/list of the column numbers which represent the input data. targetcols - A tuple/list expected to have two members. First being the column number of the survival times. The second being the column number of the event column. name - Name of the property to test different values for. Is up to net_constructor to set this property on the network values - A list of values to test 'name' with. ntimes - The number of times to train the network. ''' # Store results in a hash results = {} # Try these values for generations parameter for value in values: print("Training with {} = {}".format(name, value)) net = net_constructor(name, value) # Store results in a hash where the values are lists results[value] = [] #Train n times for each value for x in range(ntimes): net.learn(data[:, inputcols], data[:, targetcols]) predictions = np.array([net.output(x) for x in data[:, inputcols]]) results[value].append(np.mean(ann.get_error(net.error_function, data[:, targetcols], predictions))) #results[value].append(get_C_index(data[:, targetcols], predictions)) return results
def test_errorfuncs_data1dlistsminimal(): """Verify dimensions and value of result""" import ann rows, cols = 1, 1 x = [0.0 for i in range(rows)] y = [2.0 for i in range(rows)] MSE = ann.ERROR_MSE error = ann.get_error(MSE, x, y) assert len(error.shape) == 1, "Should be one-dimensional result" assert error.shape[0] == cols, "Count should match column number" for e in error: assert 0.000001 > e - 0.5*(2 - 0)**2, "Error is incorrect"
def test_errorfuncs_data1d(): """Result must match dimensions of input""" import ann rows, cols = 5, 1 x = np.zeros(rows) y = np.ones(rows) * 2 MSE = ann.ERROR_MSE error = ann.get_error(MSE, x, y) for s1, s2 in zip(error.shape, y.shape): assert s1 == s2, "Dimensions of result should match input" for e in error: assert 0.000001 > e - 0.5*(2 - 0)**2, "Error is incorrect"
def test_errorfuncs_data1dlistsminimal(): """Verify dimensions and value of result""" import ann rows, cols = 1, 1 x = [0.0 for i in range(rows)] y = [2.0 for i in range(rows)] MSE = ann.ERROR_MSE error = ann.get_error(MSE, x, y) assert len(error.shape) == 1, "Should be one-dimensional result" assert error.shape[0] == cols, "Count should match column number" for e in error: assert 0.000001 > e - 0.5 * (2 - 0)**2, "Error is incorrect"
def test_errorfuncs_data1d(): """Result must match dimensions of input""" import ann rows, cols = 5, 1 x = np.zeros(rows) y = np.ones(rows) * 2 MSE = ann.ERROR_MSE error = ann.get_error(MSE, x, y) for s1, s2 in zip(error.shape, y.shape): assert s1 == s2, "Dimensions of result should match input" for e in error: assert 0.000001 > e - 0.5 * (2 - 0)**2, "Error is incorrect"
def test_surv_likelihood(): import ann dim = (20,2) targets = np.ones(dim) censlvl=0.0 for i in range(len(targets)): if np.random.uniform() < censlvl: targets[i, 1] = 0 outputs = np.random.normal(1, 10, size=dim) #timesorting = outputs[:, 0].argsort() cens = targets[:, 1] < 1 uncens = targets[:, 1] == 1 errors = ann.get_error(ann.ERROR_SURV_LIKELIHOOD, targets, outputs) derivs = ann.get_deriv(ann.ERROR_SURV_LIKELIHOOD, targets, outputs)
def test_surv_likelihood(): import ann dim = (20, 2) targets = np.ones(dim) censlvl = 0.0 for i in range(len(targets)): if np.random.uniform() < censlvl: targets[i, 1] = 0 outputs = np.random.normal(1, 10, size=dim) #timesorting = outputs[:, 0].argsort() cens = targets[:, 1] < 1 uncens = targets[:, 1] == 1 errors = ann.get_error(ann.ERROR_SURV_LIKELIHOOD, targets, outputs) derivs = ann.get_deriv(ann.ERROR_SURV_LIKELIHOOD, targets, outputs)
def crossvalidate(net_constructor, data, inputcols, targetcols, ntimes=5, kfold=3, evalfunc = None, evalresults = None): ''' Does crossvalidation testing on a network the designated number of times. Random divisions are stratified for events. Keyword arguments: net_constructor - A function that should return a new neural network with all properties set to suitable values. The model must a "learn" method, and one of the following: output or output_all, where they are called as: output(x), for x in data[:, inputcols] output_all(data[:, inputcols) data - The data to do crossvalidation on. Should be a two-dimensional numpy array (or compatible). inputcols - A tuple/list of the column numbers which represent the input data. targetcols - A tuple/list expected to have two members. First being the column number of the survival times. The second being the column number of the event column. The data sets are stratified for the event column. ntimes - The number of times to divide the data. kfold - The number of folds to divide the data in. Total number of results will equal ntimes * kfold. Where each row has featured in a test set ntimes. evalfunc - Function to apply at end of training. Called with the following signature: evalfunc(net, data, inputcols, targetcols, trnindices, valindices, evalresults) Results are expected to be placed in evalresults. Only called if both evalfunc and evalresults is not None. Returns a tuple: (trnresultlist, valresultlist) where each list is ntimes * kfold long. ''' trnresults = [] valresults = [] # This might be a decimal number, remember to round it off indices = np.arange(len(data)) classes = np.unique(data[:, targetcols[1]]) classindices = {} for c in classes: classindices[c] = indices[data[:, targetcols[1]] == c] for n in range(ntimes): # Re-shuffle the data every time for c in classes: np.random.shuffle(classindices[c]) for k in range(kfold): valindices = [] trnindices = [] # Join the data pieces for p in range(kfold): # validation piece if k == p: for idx in classindices.values(): # Calc piece length plength = int(round(len(idx) / kfold)) valindices.extend(idx[p*plength:(p+1)*plength]) else: for idx in classindices.values(): # Calc piece length plength = int(round(len(idx) / kfold)) trnindices.extend(idx[p*plength:(p+1)*plength]) # Ready to train net = net_constructor() net.learn(data[trnindices][:, inputcols], data[trnindices][:, targetcols]) # Training result try: predictions = np.array(net.output_all(data[trnindices][:, inputcols])) except: predictions = np.array([net.output(x) for x in data[trnindices][:, inputcols]]) err = np.mean(ann.get_error(net.error_function, data[trnindices][:, targetcols], predictions)) #c_index = get_C_index(data[trnindices][:, targetcols], predictions) trnresults.append(err) # Validation result try: predictions = np.array(net.output_all(data[valindices][:, inputcols])) except: predictions = np.array([net.output(x) for x in data[valindices][:, inputcols]]) err = np.mean(ann.get_error(net.error_function, data[valindices][:, targetcols], predictions)) #c_index = get_C_index(data[valindices][:, targetcols], predictions) valresults.append(err) if evalfunc is not None and evalresults is not None: evalfunc(net, data, inputcols, targetcols, trnindices, valindices, evalresults) return (trnresults, valresults)
def crossvalidate(net_constructor, data, inputcols, targetcols, ntimes=5, kfold=3, evalfunc=None, evalresults=None): ''' Does crossvalidation testing on a network the designated number of times. Random divisions are stratified for events. Keyword arguments: net_constructor - A function that should return a new neural network with all properties set to suitable values. The model must a "learn" method, and one of the following: output or output_all, where they are called as: output(x), for x in data[:, inputcols] output_all(data[:, inputcols) data - The data to do crossvalidation on. Should be a two-dimensional numpy array (or compatible). inputcols - A tuple/list of the column numbers which represent the input data. targetcols - A tuple/list expected to have two members. First being the column number of the survival times. The second being the column number of the event column. The data sets are stratified for the event column. ntimes - The number of times to divide the data. kfold - The number of folds to divide the data in. Total number of results will equal ntimes * kfold. Where each row has featured in a test set ntimes. evalfunc - Function to apply at end of training. Called with the following signature: evalfunc(net, data, inputcols, targetcols, trnindices, valindices, evalresults) Results are expected to be placed in evalresults. Only called if both evalfunc and evalresults is not None. Returns a tuple: (trnresultlist, valresultlist) where each list is ntimes * kfold long. ''' trnresults = [] valresults = [] # This might be a decimal number, remember to round it off indices = np.arange(len(data)) classes = np.unique(data[:, targetcols[1]]) classindices = {} for c in classes: classindices[c] = indices[data[:, targetcols[1]] == c] for n in range(ntimes): # Re-shuffle the data every time for c in classes: np.random.shuffle(classindices[c]) for k in range(kfold): valindices = [] trnindices = [] # Join the data pieces for p in range(kfold): # validation piece if k == p: for idx in classindices.values(): # Calc piece length plength = int(round(len(idx) / kfold)) valindices.extend(idx[p * plength:(p + 1) * plength]) else: for idx in classindices.values(): # Calc piece length plength = int(round(len(idx) / kfold)) trnindices.extend(idx[p * plength:(p + 1) * plength]) # Ready to train net = net_constructor() net.learn(data[trnindices][:, inputcols], data[trnindices][:, targetcols]) # Training result try: predictions = np.array( net.output_all(data[trnindices][:, inputcols])) except: predictions = np.array( [net.output(x) for x in data[trnindices][:, inputcols]]) err = np.mean( ann.get_error(net.error_function, data[trnindices][:, targetcols], predictions)) #c_index = get_C_index(data[trnindices][:, targetcols], predictions) trnresults.append(err) # Validation result try: predictions = np.array( net.output_all(data[valindices][:, inputcols])) except: predictions = np.array( [net.output(x) for x in data[valindices][:, inputcols]]) err = np.mean( ann.get_error(net.error_function, data[valindices][:, targetcols], predictions)) #c_index = get_C_index(data[valindices][:, targetcols], predictions) valresults.append(err) if evalfunc is not None and evalresults is not None: evalfunc(net, data, inputcols, targetcols, trnindices, valindices, evalresults) return (trnresults, valresults)