Example #1
0
    def TrainModel(self, msg):
        """Load block group patterns data from this and the previous run and
        create the ML model for the next run. Save the model to a file.
        """
        reply = super().TrainModel(msg)
        trainStart = time.time()  # start timing

        # load data to train model
        trainCfg = msg.fields.cfg
        bgRef1 = StructDict(trainCfg.blkGrpRefs[0])
        bgRef2 = StructDict(trainCfg.blkGrpRefs[1])
        try:
            bg1 = self.getPrevBlkGrp(self.id_fields.sessionId, bgRef1.run,
                                     bgRef1.phase)
            bg2 = self.getPrevBlkGrp(self.id_fields.sessionId, bgRef2.run,
                                     bgRef2.phase)
        except Exception as err:
            errorReply = self.createReplyMessage(msg, MsgResult.Error)
            if bg1 is None:
                errorReply.data = "Error: getPrevBlkGrp(%r, %r, %r): %r" %\
                    (self.id_fields.sessionId, bgRef1.run, bgRef1.phase, err)
            else:
                errorReply.data = "Error: getPrevBlkGrp(%r, %r, %r): %r" %\
                    (self.id_fields.sessionId, bgRef2.run, bgRef2.phase, err)
            return errorReply

        trainIdx1 = utils.find(np.any(bg1.patterns.regressor, axis=0))
        trainLabels1 = np.transpose(bg1.patterns.regressor[:, trainIdx1]
                                    )  # find the labels of those indices
        trainPats1 = bg1.patterns.raw_sm_filt_z[
            trainIdx1, :]  # retrieve the patterns of those indices

        trainIdx2 = utils.find(np.any(bg2.patterns.regressor, axis=0))
        trainLabels2 = np.transpose(bg2.patterns.regressor[:, trainIdx2])
        trainPats2 = bg2.patterns.raw_sm_filt_z[trainIdx2, :]

        trainPats = np.concatenate((trainPats1, trainPats2))
        trainLabels = np.concatenate((trainLabels1, trainLabels2))
        trainLabels = trainLabels.astype(np.uint8)

        # train the model
        # sklearn LogisticRegression takes on set of labels and returns one set of weights.
        # The version implemented in Matlab can take multple sets of labels and return multiple weights.
        # To reproduct that behavior here, we will use a LogisticRegression instance for each set of lables (2 in this case)
        lrc1 = LogisticRegression(solver='saga', penalty='l2', max_iter=300)
        lrc2 = LogisticRegression(solver='saga', penalty='l2', max_iter=300)
        lrc1.fit(trainPats, trainLabels[:, 0])
        lrc2.fit(trainPats, trainLabels[:, 1])
        newTrainedModel = utils.MatlabStructDict({}, 'trainedModel')
        newTrainedModel.trainedModel = StructDict({})
        newTrainedModel.trainedModel.weights = np.concatenate(
            (lrc1.coef_.T, lrc2.coef_.T), axis=1)
        newTrainedModel.trainedModel.biases = np.concatenate(
            (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2)
        newTrainedModel.trainPats = trainPats
        newTrainedModel.trainLabels = trainLabels
        newTrainedModel.FWHM = self.session.FWHM
        newTrainedModel.cutoff = self.session.cutoff
        newTrainedModel.gitCodeId = utils.getGitCodeId()

        trainEnd = time.time()  # end timing
        trainingOnlyTime = trainEnd - trainStart

        # print training timing and results
        reply.fields.outputlns.append('Model training completed')
        outStr = 'Model training time: \t{:.3f}'.format(trainingOnlyTime)
        reply.fields.outputlns.append(outStr)
        if newTrainedModel.biases is not None:
            outStr = 'Model biases: \t{:.3f}\t{:.3f}'.format(
                newTrainedModel.biases[0, 0], newTrainedModel.biases[0, 1])
            reply.fields.outputlns.append(outStr)

        # cache the trained model
        self.modelCache[self.id_fields.runId] = newTrainedModel

        if self.session.validate:
            try:
                self.validateModel(newTrainedModel, reply.fields.outputlns)
            except Exception as err:
                # Just log that an error happened during validation
                logging.error("validateModel: %r", err)
                pass
        # write trained model to a file
        filename = getModelFilename(self.id_fields.sessionId,
                                    self.id_fields.runId)
        trainedModel_fn = os.path.join(self.dirs.dataDir, filename)
        try:
            sio.savemat(trainedModel_fn, newTrainedModel, appendmat=False)
        except Exception as err:
            errorReply = self.createReplyMessage(msg, MsgResult.Error)
            errorReply.data = "Error: Unable to save trainedModel %s: %s" % (
                filename, str(err))
            return errorReply
        return reply
    for runId in np.arange(1,nRuns):
        print(runId)
        runDir = 'run'+str(runId)+'/'
        pyModelFn = utils.findNewestFile(pyDataDir, 'trainedModel_r'+str(runId)+'*_py.mat')
        # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels
        # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes]
        # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run
        # now load testing data from the next run to test it on
        pyModel_train = utils.loadMatFile(pyModelFn)
        # INSTEAD MAKE NEW MODEL
        lrc1 = LogisticRegression(penalty='l2', solver='saga',max_iter=300)
        lrc2 = LogisticRegression(penalty='l2', solver='saga',max_iter=300)

        lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0])
        lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1])
        newTrainedModel = utils.MatlabStructDict({}, 'trainedModel')
        newTrainedModel.trainedModel = StructDict({})
        newTrainedModel.trainedModel.weights = np.concatenate((lrc1.coef_.T, lrc2.coef_.T), axis=1)
        newTrainedModel.trainedModel.biases = np.concatenate((lrc1.intercept_, lrc2.intercept_)).reshape(1, 2)
        newTrainedModel.trainPats = pyModel_train.trainPats
        newTrainedModel.trainLabels = pyModel_train.trainLabels

        # now load testing data for CV
        pyModelFn = utils.findNewestFile(pyDataDir, 'trainedModel_r'+str(runId + 1)+'*_py.mat')
        pyModel_test = utils.loadMatFile(pyModelFn)
        py_test_data = pyModel_test.trainPats[nTRTest:,:]
        test_labels = pyModel_test.trainLabels[nTRTest:,:]
        py_cs = np.zeros((nTRTest, 1))
        activations = np.zeros((nTRTest,2))
        for t in np.arange(nTRTest):
            _, _, _, activations_py = Test_L2_RLR_realtime(newTrainedModel,py_test_data[t,:],test_labels[t,:])
Example #3
0
def validateModelsMatlabPython(subjectNum, subjectDay, usesamedata):

    dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/'
    configFile = dataPath + 'subject' + str(
        subjectNum) + '/usedscripts/PennCfg_Day' + str(subjectDay) + '.toml'
    cfg = loadConfigFile(configFile)
    #subjectDayDir = getSubjectDayDir(cfg.session.subjectNum, cfg.session.subjectDay)
    subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str(
        cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay)
    matDataDir = subjectDayDir  #os.path.join(cfg.session.dataDir, subjectDayDir)
    pyDataDir = matDataDir
    all_vals = np.zeros((100, 2, cfg.session.Runs[-1] - 1))
    usenewmodel = 1
    #usesamedata = 1 #whether or not to use same data as with matlab
    for runId in np.arange(1, cfg.session.Runs[-1]):
        runDir = 'run' + str(runId) + '/'
        matModelFn = utils.findNewestFile(
            matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat')
        pyModelFn = utils.findNewestFile(
            pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat')
        matModel_train = utils.loadMatFile(matModelFn)
        # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels
        # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes]
        # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run
        # now load testing data from the next run to test it on
        pyModel_train = utils.loadMatFile(pyModelFn)
        # INSTEAD MAKE NEW MODEL
        print(runId)
        if usenewmodel:
            lrc1 = LogisticRegression(penalty='l2', solver='sag', max_iter=300)
            lrc2 = LogisticRegression(penalty='l2', solver='sag', max_iter=300)
            if usesamedata:
                lrc1.fit(matModel_train.trainPats,
                         pyModel_train.trainLabels[:, 0])
                lrc2.fit(matModel_train.trainPats,
                         pyModel_train.trainLabels[:, 1])
            else:
                lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:,
                                                                            0])
                lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:,
                                                                            1])
            newTrainedModel = utils.MatlabStructDict({}, 'trainedModel')
            newTrainedModel.trainedModel = StructDict({})
            newTrainedModel.trainedModel.weights = np.concatenate(
                (lrc1.coef_.T, lrc2.coef_.T), axis=1)
            newTrainedModel.trainedModel.biases = np.concatenate(
                (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2)
            newTrainedModel.trainPats = pyModel_train.trainPats
            newTrainedModel.trainLabels = pyModel_train.trainLabels
        # now load the models to test on
        matModelFn = utils.findNewestFile(
            matDataDir, 'run' + str(runId + 1) + '/' + 'trainedModel_' +
            str(runId + 1) + '*.mat')
        pyModelFn = utils.findNewestFile(
            pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat')
        matModel_test = utils.loadMatFile(matModelFn)
        pyModel_test = utils.loadMatFile(pyModelFn)
        nTRTest = 100
        mat_test_data = matModel_test.trainPats[nTRTest:, :]
        py_test_data = pyModel_test.trainPats[nTRTest:, :]
        test_labels = matModel_test.trainLabels[nTRTest:, :]
        mat_cs = np.zeros((nTRTest, 1))
        py_cs = np.zeros((nTRTest, 1))
        for t in np.arange(nTRTest):
            categ = np.flatnonzero(test_labels[t, :])
            otherCateg = (categ + 1) % 2
            _, _, _, activations_mat = Test_L2_RLR_realtime(
                matModel_train, mat_test_data[t, :], test_labels[t, :])
            mat_cs[t] = activations_mat[categ] - activations_mat[otherCateg]
            if not usenewmodel:
                if not usesamedata:
                    _, _, _, activations_py = Test_L2_RLR_realtime(
                        pyModel_train, py_test_data[t, :], test_labels[t, :])
                else:
                    _, _, _, activations_py = Test_L2_RLR_realtime(
                        pyModel_train, mat_test_data[t, :], test_labels[t, :])
            else:
                if not usesamedata:
                    _, _, _, activations_py = Test_L2_RLR_realtime(
                        newTrainedModel, py_test_data[t, :], test_labels[t, :])
                else:
                    _, _, _, activations_py = Test_L2_RLR_realtime(
                        newTrainedModel, mat_test_data[t, :],
                        test_labels[t, :])
            py_cs[t] = activations_py[categ] - activations_py[otherCateg]
        all_vals[:, 0, runId - 1] = mat_cs[:, 0]
        all_vals[:, 1, runId - 1] = py_cs[:, 0]
        #plt.figure()
        #if usenewmodel:
        #	plt.plot(matModel_train.weights[:,0],newTrainedModel.weights[:,0], '.')
        #else:
        #	plt.plot(matModel_train.weights[:,0],pyModel_train.weights[:,0], '.')
        #plt.xlim([-.02 ,.02])
        #plt.ylim([-.02 ,.02])
        #plt.xlabel('MATLAB')
        #plt.ylabel('PYTHON')
        #plt.show()
    all_mat_ev = np.reshape(all_vals[:, 0, :],
                            ((cfg.session.Runs[-1] - 1) * 100, 1))
    all_py_ev = np.reshape(all_vals[:, 1, :],
                           ((cfg.session.Runs[-1] - 1) * 100, 1))
    fix, ax = plt.subplots(figsize=(12, 7))
    plt.plot(all_mat_ev, all_py_ev, '.')
    plt.plot([-5, 5], [-5, 5], '--k')
    plt.title('S%i MAT x PY CORR = %4.4f' %
              (cfg.session.subjectNum,
               scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0]))
    plt.xlabel('MATLAB')
    plt.ylabel('PYTHON')
    plt.xlim([-1.5, 1.5])
    plt.ylim([-1.5, 1.5])
    plt.show()

    plt.figure()
    plt.hist(all_mat_ev, alpha=0.6, label='matlab')
    plt.hist(all_py_ev, alpha=0.6, label='python')
    plt.xlabel('Correct - Incorrect Activation')
    plt.ylabel('Frequency')
    plt.title('S%i MAT x PY CORR = %4.4f' %
              (cfg.session.subjectNum,
               scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0]))
    plt.legend()
    plt.show()
Example #4
0
def train_test_python_classifier(subjectNum):
    ndays = 3
    auc_score = np.zeros((8, ndays))  # save larger to fit all days in
    RT_cs = np.zeros((8, ndays))
    dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/'
    subjectDir = dataPath + '/' + 'subject' + str(subjectNum)
    print(subjectNum)
    all_python_evidence = np.zeros(
        (9, 100, 3))  # time course of classifier evidence
    for d in np.arange(ndays):
        print(d)
        subjectDay = d + 1
        configFile = dataPath + 'subject' + str(
            subjectNum) + '/usedscripts/PennCfg_Day' + str(
                subjectDay) + '.toml'
        cfg = loadConfigFile(configFile)
        subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str(
            cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay)
        pyDataDir = subjectDayDir
        if subjectDay == 1:
            nRuns = 7
            print('here')
            if str(subjectNum) == '106':
                nRuns = 6
                print('here')
            else:
                print(subjectNum)
                if subjectNum == 106:
                    print('finding it here')
                print('nothere')
        elif subjectDay == 2:
            nRuns = 9
        elif subjectDay == 3:
            nRuns = 8
        print('total number of runs: %i' % nRuns)
        print(subjectNum)
        print(subjectDay)
        print(nRuns)
        #nruns = len(cfg.session.Runs) - 1
        #nruns = len(cfg.session.Runs) - 1
        for r in np.arange(0, nRuns - 1):
            runId = r + 1  # now it goes from 0 : n Runs - 1
            print(runId)
            runDir = 'run' + str(runId) + '/'
            pyModelFn = utils.findNewestFile(
                pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat')
            # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels
            # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes]
            # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run
            # now load testing data from the next run to test it on
            pyModel_train = utils.loadMatFile(pyModelFn)
            # INSTEAD MAKE NEW MODEL
            lrc1 = LogisticRegression(penalty='l2',
                                      solver='saga',
                                      max_iter=300)
            lrc2 = LogisticRegression(penalty='l2',
                                      solver='saga',
                                      max_iter=300)

            lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0])
            lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1])
            newTrainedModel = utils.MatlabStructDict({}, 'trainedModel')
            newTrainedModel.trainedModel = StructDict({})
            newTrainedModel.trainedModel.weights = np.concatenate(
                (lrc1.coef_.T, lrc2.coef_.T), axis=1)
            newTrainedModel.trainedModel.biases = np.concatenate(
                (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2)
            newTrainedModel.trainPats = pyModel_train.trainPats
            newTrainedModel.trainLabels = pyModel_train.trainLabels

            # now load testing data for CV
            pyModelFn = utils.findNewestFile(
                pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat')
            pyModel_test = utils.loadMatFile(pyModelFn)
            nTRTest = 100
            py_test_data = pyModel_test.trainPats[nTRTest:, :]
            test_labels = pyModel_test.trainLabels[nTRTest:, :]
            py_cs = np.zeros((nTRTest, 1))
            activations = np.zeros((nTRTest, 2))
            for t in np.arange(nTRTest):
                _, _, _, activations_py = Test_L2_RLR_realtime(
                    newTrainedModel, py_test_data[t, :], test_labels[t, :])
                activations[t, :] = activations_py

            fpr2, tpr2, thresholds2 = metrics.roc_curve(test_labels[:, 1],
                                                        activations[:, 1] -
                                                        activations[:, 0],
                                                        pos_label=1)
            auc_score[r, d] = metrics.auc(
                fpr2, tpr2
            )  # auc of this data applied to the first half of the next run
            # now apply to block data-- realtime values
            pyDataFn = utils.findNewestFile(
                pyDataDir, 'blkGroup_r' + str(runId + 1) + '_p2_*_py.mat')
            pyData_test = utils.loadMatFile(pyDataFn)
            regressor = pyData_test.regressor
            TRs_to_test = np.argwhere(np.sum(regressor, axis=0))
            RT_data = pyData_test.raw_sm_filt_z[TRs_to_test, :]
            RT_regressor = regressor[:, TRs_to_test].T.reshape(nTRTest, 2)
            # now do the same thing and test for every TR --> get category separation
            cs = np.zeros((nTRTest, 1))
            for t in np.arange(nTRTest):
                categ = np.flatnonzero(RT_regressor[t, :])
                otherCateg = (categ + 1) % 2
                _, _, _, activations_py = Test_L2_RLR_realtime(
                    newTrainedModel, RT_data[t, :].flatten(),
                    RT_regressor[t, :])
                cs[t] = activations_py[categ] - activations_py[otherCateg]

            # take average for this run
            RT_cs[r, d] = np.mean(cs)
            all_python_evidence[r, :, d] = cs[:, 0]
    outfile = subjectDir + '/' 'offlineAUC_RTCS'
    np.savez(outfile, auc=auc_score, cs=RT_cs, all_ev=all_python_evidence)