def TrainModel(self, msg): """Load block group patterns data from this and the previous run and create the ML model for the next run. Save the model to a file. """ reply = super().TrainModel(msg) trainStart = time.time() # start timing # load data to train model trainCfg = msg.fields.cfg bgRef1 = StructDict(trainCfg.blkGrpRefs[0]) bgRef2 = StructDict(trainCfg.blkGrpRefs[1]) try: bg1 = self.getPrevBlkGrp(self.id_fields.sessionId, bgRef1.run, bgRef1.phase) bg2 = self.getPrevBlkGrp(self.id_fields.sessionId, bgRef2.run, bgRef2.phase) except Exception as err: errorReply = self.createReplyMessage(msg, MsgResult.Error) if bg1 is None: errorReply.data = "Error: getPrevBlkGrp(%r, %r, %r): %r" %\ (self.id_fields.sessionId, bgRef1.run, bgRef1.phase, err) else: errorReply.data = "Error: getPrevBlkGrp(%r, %r, %r): %r" %\ (self.id_fields.sessionId, bgRef2.run, bgRef2.phase, err) return errorReply trainIdx1 = utils.find(np.any(bg1.patterns.regressor, axis=0)) trainLabels1 = np.transpose(bg1.patterns.regressor[:, trainIdx1] ) # find the labels of those indices trainPats1 = bg1.patterns.raw_sm_filt_z[ trainIdx1, :] # retrieve the patterns of those indices trainIdx2 = utils.find(np.any(bg2.patterns.regressor, axis=0)) trainLabels2 = np.transpose(bg2.patterns.regressor[:, trainIdx2]) trainPats2 = bg2.patterns.raw_sm_filt_z[trainIdx2, :] trainPats = np.concatenate((trainPats1, trainPats2)) trainLabels = np.concatenate((trainLabels1, trainLabels2)) trainLabels = trainLabels.astype(np.uint8) # train the model # sklearn LogisticRegression takes on set of labels and returns one set of weights. # The version implemented in Matlab can take multple sets of labels and return multiple weights. # To reproduct that behavior here, we will use a LogisticRegression instance for each set of lables (2 in this case) lrc1 = LogisticRegression(solver='saga', penalty='l2', max_iter=300) lrc2 = LogisticRegression(solver='saga', penalty='l2', max_iter=300) lrc1.fit(trainPats, trainLabels[:, 0]) lrc2.fit(trainPats, trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate( (lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate( (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2) newTrainedModel.trainPats = trainPats newTrainedModel.trainLabels = trainLabels newTrainedModel.FWHM = self.session.FWHM newTrainedModel.cutoff = self.session.cutoff newTrainedModel.gitCodeId = utils.getGitCodeId() trainEnd = time.time() # end timing trainingOnlyTime = trainEnd - trainStart # print training timing and results reply.fields.outputlns.append('Model training completed') outStr = 'Model training time: \t{:.3f}'.format(trainingOnlyTime) reply.fields.outputlns.append(outStr) if newTrainedModel.biases is not None: outStr = 'Model biases: \t{:.3f}\t{:.3f}'.format( newTrainedModel.biases[0, 0], newTrainedModel.biases[0, 1]) reply.fields.outputlns.append(outStr) # cache the trained model self.modelCache[self.id_fields.runId] = newTrainedModel if self.session.validate: try: self.validateModel(newTrainedModel, reply.fields.outputlns) except Exception as err: # Just log that an error happened during validation logging.error("validateModel: %r", err) pass # write trained model to a file filename = getModelFilename(self.id_fields.sessionId, self.id_fields.runId) trainedModel_fn = os.path.join(self.dirs.dataDir, filename) try: sio.savemat(trainedModel_fn, newTrainedModel, appendmat=False) except Exception as err: errorReply = self.createReplyMessage(msg, MsgResult.Error) errorReply.data = "Error: Unable to save trainedModel %s: %s" % ( filename, str(err)) return errorReply return reply
for runId in np.arange(1,nRuns): print(runId) runDir = 'run'+str(runId)+'/' pyModelFn = utils.findNewestFile(pyDataDir, 'trainedModel_r'+str(runId)+'*_py.mat') # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # INSTEAD MAKE NEW MODEL lrc1 = LogisticRegression(penalty='l2', solver='saga',max_iter=300) lrc2 = LogisticRegression(penalty='l2', solver='saga',max_iter=300) lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate((lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate((lrc1.intercept_, lrc2.intercept_)).reshape(1, 2) newTrainedModel.trainPats = pyModel_train.trainPats newTrainedModel.trainLabels = pyModel_train.trainLabels # now load testing data for CV pyModelFn = utils.findNewestFile(pyDataDir, 'trainedModel_r'+str(runId + 1)+'*_py.mat') pyModel_test = utils.loadMatFile(pyModelFn) py_test_data = pyModel_test.trainPats[nTRTest:,:] test_labels = pyModel_test.trainLabels[nTRTest:,:] py_cs = np.zeros((nTRTest, 1)) activations = np.zeros((nTRTest,2)) for t in np.arange(nTRTest): _, _, _, activations_py = Test_L2_RLR_realtime(newTrainedModel,py_test_data[t,:],test_labels[t,:])
def validateModelsMatlabPython(subjectNum, subjectDay, usesamedata): dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/' configFile = dataPath + 'subject' + str( subjectNum) + '/usedscripts/PennCfg_Day' + str(subjectDay) + '.toml' cfg = loadConfigFile(configFile) #subjectDayDir = getSubjectDayDir(cfg.session.subjectNum, cfg.session.subjectDay) subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str( cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay) matDataDir = subjectDayDir #os.path.join(cfg.session.dataDir, subjectDayDir) pyDataDir = matDataDir all_vals = np.zeros((100, 2, cfg.session.Runs[-1] - 1)) usenewmodel = 1 #usesamedata = 1 #whether or not to use same data as with matlab for runId in np.arange(1, cfg.session.Runs[-1]): runDir = 'run' + str(runId) + '/' matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel_train = utils.loadMatFile(matModelFn) # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # INSTEAD MAKE NEW MODEL print(runId) if usenewmodel: lrc1 = LogisticRegression(penalty='l2', solver='sag', max_iter=300) lrc2 = LogisticRegression(penalty='l2', solver='sag', max_iter=300) if usesamedata: lrc1.fit(matModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(matModel_train.trainPats, pyModel_train.trainLabels[:, 1]) else: lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate( (lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate( (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2) newTrainedModel.trainPats = pyModel_train.trainPats newTrainedModel.trainLabels = pyModel_train.trainLabels # now load the models to test on matModelFn = utils.findNewestFile( matDataDir, 'run' + str(runId + 1) + '/' + 'trainedModel_' + str(runId + 1) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat') matModel_test = utils.loadMatFile(matModelFn) pyModel_test = utils.loadMatFile(pyModelFn) nTRTest = 100 mat_test_data = matModel_test.trainPats[nTRTest:, :] py_test_data = pyModel_test.trainPats[nTRTest:, :] test_labels = matModel_test.trainLabels[nTRTest:, :] mat_cs = np.zeros((nTRTest, 1)) py_cs = np.zeros((nTRTest, 1)) for t in np.arange(nTRTest): categ = np.flatnonzero(test_labels[t, :]) otherCateg = (categ + 1) % 2 _, _, _, activations_mat = Test_L2_RLR_realtime( matModel_train, mat_test_data[t, :], test_labels[t, :]) mat_cs[t] = activations_mat[categ] - activations_mat[otherCateg] if not usenewmodel: if not usesamedata: _, _, _, activations_py = Test_L2_RLR_realtime( pyModel_train, py_test_data[t, :], test_labels[t, :]) else: _, _, _, activations_py = Test_L2_RLR_realtime( pyModel_train, mat_test_data[t, :], test_labels[t, :]) else: if not usesamedata: _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, py_test_data[t, :], test_labels[t, :]) else: _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, mat_test_data[t, :], test_labels[t, :]) py_cs[t] = activations_py[categ] - activations_py[otherCateg] all_vals[:, 0, runId - 1] = mat_cs[:, 0] all_vals[:, 1, runId - 1] = py_cs[:, 0] #plt.figure() #if usenewmodel: # plt.plot(matModel_train.weights[:,0],newTrainedModel.weights[:,0], '.') #else: # plt.plot(matModel_train.weights[:,0],pyModel_train.weights[:,0], '.') #plt.xlim([-.02 ,.02]) #plt.ylim([-.02 ,.02]) #plt.xlabel('MATLAB') #plt.ylabel('PYTHON') #plt.show() all_mat_ev = np.reshape(all_vals[:, 0, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) all_py_ev = np.reshape(all_vals[:, 1, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) fix, ax = plt.subplots(figsize=(12, 7)) plt.plot(all_mat_ev, all_py_ev, '.') plt.plot([-5, 5], [-5, 5], '--k') plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0])) plt.xlabel('MATLAB') plt.ylabel('PYTHON') plt.xlim([-1.5, 1.5]) plt.ylim([-1.5, 1.5]) plt.show() plt.figure() plt.hist(all_mat_ev, alpha=0.6, label='matlab') plt.hist(all_py_ev, alpha=0.6, label='python') plt.xlabel('Correct - Incorrect Activation') plt.ylabel('Frequency') plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0])) plt.legend() plt.show()
def train_test_python_classifier(subjectNum): ndays = 3 auc_score = np.zeros((8, ndays)) # save larger to fit all days in RT_cs = np.zeros((8, ndays)) dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/' subjectDir = dataPath + '/' + 'subject' + str(subjectNum) print(subjectNum) all_python_evidence = np.zeros( (9, 100, 3)) # time course of classifier evidence for d in np.arange(ndays): print(d) subjectDay = d + 1 configFile = dataPath + 'subject' + str( subjectNum) + '/usedscripts/PennCfg_Day' + str( subjectDay) + '.toml' cfg = loadConfigFile(configFile) subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str( cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay) pyDataDir = subjectDayDir if subjectDay == 1: nRuns = 7 print('here') if str(subjectNum) == '106': nRuns = 6 print('here') else: print(subjectNum) if subjectNum == 106: print('finding it here') print('nothere') elif subjectDay == 2: nRuns = 9 elif subjectDay == 3: nRuns = 8 print('total number of runs: %i' % nRuns) print(subjectNum) print(subjectDay) print(nRuns) #nruns = len(cfg.session.Runs) - 1 #nruns = len(cfg.session.Runs) - 1 for r in np.arange(0, nRuns - 1): runId = r + 1 # now it goes from 0 : n Runs - 1 print(runId) runDir = 'run' + str(runId) + '/' pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # INSTEAD MAKE NEW MODEL lrc1 = LogisticRegression(penalty='l2', solver='saga', max_iter=300) lrc2 = LogisticRegression(penalty='l2', solver='saga', max_iter=300) lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate( (lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate( (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2) newTrainedModel.trainPats = pyModel_train.trainPats newTrainedModel.trainLabels = pyModel_train.trainLabels # now load testing data for CV pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat') pyModel_test = utils.loadMatFile(pyModelFn) nTRTest = 100 py_test_data = pyModel_test.trainPats[nTRTest:, :] test_labels = pyModel_test.trainLabels[nTRTest:, :] py_cs = np.zeros((nTRTest, 1)) activations = np.zeros((nTRTest, 2)) for t in np.arange(nTRTest): _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, py_test_data[t, :], test_labels[t, :]) activations[t, :] = activations_py fpr2, tpr2, thresholds2 = metrics.roc_curve(test_labels[:, 1], activations[:, 1] - activations[:, 0], pos_label=1) auc_score[r, d] = metrics.auc( fpr2, tpr2 ) # auc of this data applied to the first half of the next run # now apply to block data-- realtime values pyDataFn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId + 1) + '_p2_*_py.mat') pyData_test = utils.loadMatFile(pyDataFn) regressor = pyData_test.regressor TRs_to_test = np.argwhere(np.sum(regressor, axis=0)) RT_data = pyData_test.raw_sm_filt_z[TRs_to_test, :] RT_regressor = regressor[:, TRs_to_test].T.reshape(nTRTest, 2) # now do the same thing and test for every TR --> get category separation cs = np.zeros((nTRTest, 1)) for t in np.arange(nTRTest): categ = np.flatnonzero(RT_regressor[t, :]) otherCateg = (categ + 1) % 2 _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, RT_data[t, :].flatten(), RT_regressor[t, :]) cs[t] = activations_py[categ] - activations_py[otherCateg] # take average for this run RT_cs[r, d] = np.mean(cs) all_python_evidence[r, :, d] = cs[:, 0] outfile = subjectDir + '/' 'offlineAUC_RTCS' np.savez(outfile, auc=auc_score, cs=RT_cs, all_ev=all_python_evidence)