Beispiel #1
0
def learn(n_vow, N_reservoir=100, leaky=True, classification=True, **kwargs):
    """ function to perform supervised learning on an ESN
         data: data to be learned (ndarray including AN activations and teacher signals) OLD VERSION
         n_vow: total number of vowels used
         N_reservoir: size of ESN
         leaky: boolean defining if leaky ESN is to be used
         plots: boolean defining if results are to be plotted
         output: boolean defining if progress messages are to be displayed
         testdata: provide test data for manual testing (no cross validation) OLD VERSION
         separate: boolean defining if infant data is used as test set or test set is drawn randomly from adult+infant (n_vow=3)
         n_channels: number of channels used
         classification: boolean defining if sensory classification is performed instead of motor prediction"""

    output_folder = kwargs['output_folder']
    regularization = kwargs['regularization']
    logistic = kwargs['logistic']
    leak_rate = kwargs['leak_rate']
    spectral_radius = kwargs['spectral_radius']
    n_channels = kwargs['n_channels']
    n_vow = kwargs['n_vowel']
    n_samples = kwargs['n_samples']
    n_training = kwargs['n_training']
    output = kwargs['verbose']
    flow = kwargs['flow']
    rank = kwargs['rank']

    training_set, test_set = get_training_and_test_sets(n_samples, n_training, n_vow)

    if output:
        print('samples_test = '+str(test_set))
        print('len(samples_train) = '+str(len(training_set)))

    N_classes = n_vow+1                  # number of classes is total number of vowels + null class
    input_dim = n_channels              # input dimension is number of used channels

    if output:
        print('constructing reservoir')

    # construct individual nodes
    if leaky:                           # construct leaky reservoir
        reservoir = Oger.nodes.LeakyReservoirNode(input_dim=input_dim, output_dim=N_reservoir, input_scaling=1., 
            spectral_radius=spectral_radius, leak_rate=leak_rate)
                                        # call LeakyReservoirNode with appropriate number of input units and 
                                        #  given number of reservoir units
    else:                               # construct non-leaky reservoir
        reservoir = Oger.nodes.ReservoirNode(input_dim=input_dim, output_dim=N_reservoir, input_scaling=1.)
                                        # call ReservoirNode with appropriate number of input units and given number of reservoir units

    if logistic:
        readout = Oger.nodes.LogisticRegressionNode()
    else:
        readout = Oger.nodes.RidgeRegressionNode(regularization)
                                        # construct output units with Ridge Regression training method

    flow = mdp.Flow([reservoir, readout])
                                        # connect reservoir and output nodes


    if output:
        print("Training...")

    import pdb
    pdb.set_trace()
    flow.train([[], training_set])
                                        # train flow with input files provided by file iterator


    ytest = []                          # initialize list of test output

    if output:
        print("Applying to testset...")

    losses = []                         # initiate list for discrete recognition variable for each test item
    ymean = []                          # initiate list for true class of each test item
    ytestmean = []                      # initiate list for class vote of trained flow for each test item

    for i_sample in xrange(len(test_set)):       # loop over all test samples
        if output:
            print('testing with sample '+str(i_sample))

        xtest = test_set[i_sample][0]
                                        # load xtest and ytarget as separate numpy arrays
        ytarget = test_set[i_sample][1]
        ytest = flow(xtest)             # evaluate trained output units' responses for current test item

        mean_sample_vote = mdp.numx.mean(ytest, axis=0)
                                        # average each output neurons' response over time
        if output:
            print('mean_sample_vote = '+str(mean_sample_vote))
        target = mdp.numx.mean(ytarget, axis=0)
                                        # average teacher signals over time
        if output:
            print('target = '+str(target))

        argmax_vote = sp.argmax(mean_sample_vote)
                                        # winner-take-all vote for final classification
        ytestmean.append(argmax_vote)   # append current vote to votes list of all items
        argmax_target = sp.argmax(target)
                                        # evaluate true class of current test item
        ymean.append(argmax_target)     # append current true class to list of all items

        loss = Oger.utils.loss_01(mdp.numx.atleast_2d(argmax_vote), mdp.numx.atleast_2d(argmax_target))
                                        # call loss_01 to compare vote and true class, 0 if match, 1 else
        if output:
            print('loss = '+str(loss))
        losses.append(loss)             # append current loss to losses of all items

        xtest = None                    # destroy xtest, ytest, ytarget, current_data to free up memory
        ytest = None
        ytarget = None

    error = mdp.numx.mean(losses)       # error rate is average number of mismatches
    if output:
        print('error = '+str(error))

    if output:
        print("error: "+str(error))
        print('ymean: '+str(ymean))
        print('ytestmean: '+str(ytestmean))

    ytestmean = np.array(ytestmean)     # convert ytestmean and ymean lists to numpy array for confusion matrix
    ymean = np.array(ymean)

    confusion_matrix = ConfusionMatrix.from_data(N_classes, ytestmean, ymean) # 10 classes
                                        # create confusion matrix from class votes and true classes
    c_matrix = confusion_matrix.balance()
                                        # normalize confusion matrix
    c_matrix = np.array(c_matrix)

    if output:
      print('confusion_matrix = '+str(c_matrix))


    save_flow(flow, N_reservoir, leaky, rank, output_folder)

    return error, c_matrix              # return current error rate and confusion matrix
Beispiel #2
0
def learn(n_vow, N_reservoir=100, leaky=True, classification=True, **kwargs):
    """ function to perform supervised learning on an ESN
         data: data to be learned (ndarray including AN activations and teacher signals) OLD VERSION
         n_vow: total number of vowels used
         N_reservoir: size of ESN
         leaky: boolean defining if leaky ESN is to be used
         plots: boolean defining if results are to be plotted
         output: boolean defining if progress messages are to be displayed
         testdata: provide test data for manual testing (no cross validation) OLD VERSION
         separate: boolean defining if infant data is used as test set or test set is drawn randomly from adult+infant (n_vow=3)
         n_channels: number of channels used
         classification: boolean defining if sensory classification is performed instead of motor prediction"""

    output_folder = kwargs['output_folder']
    regularization = kwargs['regularization']
    logistic = kwargs['logistic']
    leak_rate = kwargs['leak_rate']
    spectral_radius = kwargs['spectral_radius']
    n_channels = kwargs['n_channels']
    n_vow = kwargs['n_vowel']
    n_samples = kwargs['n_samples']
    n_training = kwargs['n_training']
    output = kwargs['verbose']
    flow = kwargs['flow']
    rank = kwargs['rank']

    training_set, test_set = get_training_and_test_sets(n_samples, n_training, n_vow)

    if output:
        print('samples_test = '+str(test_set))
        print('len(samples_train) = '+str(len(training_set)))

    N_classes = n_vow+1                  # number of classes is total number of vowels + null class
    input_dim = n_channels              # input dimension is number of used channels

    if output:
        print('constructing reservoir')

    # construct individual nodes
    if leaky:                           # construct leaky reservoir
        reservoir = Oger.nodes.LeakyReservoirNode(input_dim=input_dim, output_dim=N_reservoir, input_scaling=1.,
            spectral_radius=spectral_radius, leak_rate=leak_rate)
                                        # call LeakyReservoirNode with appropriate number of input units and
                                        #  given number of reservoir units
    else:                               # construct non-leaky reservoir
        reservoir = Oger.nodes.ReservoirNode(input_dim=input_dim, output_dim=N_reservoir, input_scaling=1.)
                                        # call ReservoirNode with appropriate number of input units and given number of reservoir units

    if logistic:
        readout = Oger.nodes.LogisticRegressionNode()
    else:
        readout = Oger.nodes.RidgeRegressionNode(regularization)
                                        # construct output units with Ridge Regression training method

    flow = mdp.Flow([reservoir, readout])
                                        # connect reservoir and output nodes


    if output:
        print("Training...")

    flow.train([[], training_set])
                                        # train flow with input files provided by file iterator


    ytest = []                          # initialize list of test output

    if output:
        print("Applying to testset...")

    losses = []                         # initiate list for discrete recognition variable for each test item
    ymean = []                          # initiate list for true class of each test item
    ytestmean = []                      # initiate list for class vote of trained flow for each test item

    for i_sample in xrange(len(test_set)):       # loop over all test samples
        if output:
            print('testing with sample '+str(i_sample))

        xtest = test_set[i_sample][0]
                                        # load xtest and ytarget as separate numpy arrays
        ytarget = test_set[i_sample][1]
        ytest = flow(xtest)             # evaluate trained output units' responses for current test item

        mean_sample_vote = mdp.numx.mean(ytest, axis=0)
                                        # average each output neurons' response over time
        if output:
            print('mean_sample_vote = '+str(mean_sample_vote))
        target = mdp.numx.mean(ytarget, axis=0)
                                        # average teacher signals over time
        if output:
            print('target = '+str(target))

        argmax_vote = sp.argmax(mean_sample_vote)
                                        # winner-take-all vote for final classification
        ytestmean.append(argmax_vote)   # append current vote to votes list of all items
        argmax_target = sp.argmax(target)
                                        # evaluate true class of current test item
        ymean.append(argmax_target)     # append current true class to list of all items

        loss = Oger.utils.loss_01(mdp.numx.atleast_2d(argmax_vote), mdp.numx.atleast_2d(argmax_target))
                                        # call loss_01 to compare vote and true class, 0 if match, 1 else
        if output:
            print('loss = '+str(loss))
        losses.append(loss)             # append current loss to losses of all items

        xtest = None                    # destroy xtest, ytest, ytarget, current_data to free up memory
        ytest = None
        ytarget = None

    error = mdp.numx.mean(losses)       # error rate is average number of mismatches
    if output:
        print('error = '+str(error))

    if output:
        print("error: "+str(error))
        print('ymean: '+str(ymean))
        print('ytestmean: '+str(ytestmean))

    ytestmean = np.array(ytestmean)     # convert ytestmean and ymean lists to numpy array for confusion matrix
    ymean = np.array(ymean)

    confusion_matrix = ConfusionMatrix.from_data(N_classes, ytestmean, ymean) # 10 classes
                                        # create confusion matrix from class votes and true classes
    c_matrix = confusion_matrix.balance()
                                        # normalize confusion matrix
    c_matrix = np.array(c_matrix)

    if output:
      print('confusion_matrix = '+str(c_matrix))


    save_flow(flow, N_reservoir, leaky, rank, output_folder)

    return error, c_matrix              # return current error rate and confusion matrix