def crossvalidateModels(matDataDir, pyDataDir, runId): runDir = 'run' + str(runId) + '/' matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel = utils.loadMatFile(matModelFn) pyModel = utils.loadMatFile(pyModelFn) selector = np.concatenate((0 * np.ones((50)), 1 * np.ones( (50)), 2 * np.ones((50)), 3 * np.ones((50))), axis=0) X = np.array([1, 2, 3, 4]) nfold = 4 kf = KFold(nfold) mat_cs = np.zeros((nfold, 50)) py_cs = np.zeros((nfold, 50)) i = 0 for train_index, test_index in kf.split(X): print("TRAIN:", train_index, "TEST:", test_index) trTrain = np.in1d(selector, train_index) trTest = np.in1d(selector, test_index) # matlab first mat_lrc = LogisticRegression() categoryTrainLabels = np.argmax(matModel.trainLabels[trTrain, :], axis=1) mat_lrc.fit(matModel.trainPats[trTrain, :], categoryTrainLabels) mat_predict = mat_lrc.predict_proba(matModel.trainPats[trTest, :]) categ_sep = -1 * np.diff(mat_predict, axis=1) C0 = np.argwhere( np.argmax(matModel.trainLabels[trTest, :], axis=1) == 0) C1 = np.argwhere( np.argmax(matModel.trainLabels[trTest, :], axis=1) == 1) C1_label = C1.flatten() mat_correct_subtraction = categ_sep.flatten() mat_correct_subtraction[ C1_label] = -1 * mat_correct_subtraction[C1_label] # python second py_lrc = LogisticRegression() categoryTrainLabels = np.argmax(pyModel.trainLabels[trTrain, :], axis=1) py_lrc.fit(pyModel.trainPats[trTrain, :], categoryTrainLabels) py_predict = py_lrc.predict_proba(pyModel.trainPats[trTest, :]) categ_sep = -1 * np.diff(py_predict, axis=1) C0 = np.argwhere( np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 0) C1 = np.argwhere( np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 1) C1_label = C1.flatten() py_correct_subtraction = categ_sep.flatten() py_correct_subtraction[ C1_label] = -1 * py_correct_subtraction[C1_label] mat_cs[i, :] = mat_correct_subtraction py_cs[i, :] = py_correct_subtraction i += 1 mat_corr = mat_cs.flatten() py_corr = py_cs.flatten() return mat_corr, py_corr
def validatePatternsData(matDataDir, pyDataDir, runId): runDir = 'run' + str(runId) + '/' # Check how well raw_sm_filt_z values match matPatternsFn = utils.findNewestFile( matDataDir, runDir + 'patternsdata_' + str(runId) + '*.mat') pyBlkGrp1Fn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId) + '_p1_*_py.mat') pyBlkGrp2Fn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId) + '_p2_*_py.mat') print("Validating patternrs: Matlab {}, Python {} {}".format( matPatternsFn, pyBlkGrp1Fn, pyBlkGrp2Fn)) matPatterns = utils.loadMatFile(matPatternsFn) pyBlkGrp1 = utils.loadMatFile(pyBlkGrp1Fn) pyBlkGrp2 = utils.loadMatFile(pyBlkGrp2Fn) mat_nTRs = matPatterns.raw.shape[0] pyp1_nTRs = pyBlkGrp1.raw.shape[0] pyp2_nTRs = pyBlkGrp2.raw.shape[0] py_nTRs = pyp1_nTRs + pyp2_nTRs mat_nVoxels = matPatterns.raw.shape[1] py_nVoxels = pyBlkGrp1.raw.shape[1] if mat_nTRs != py_nTRs or mat_nVoxels != py_nVoxels: raise ValidationError( "Number of TRs or Voxels don't match: nTRs m{} p{}, nVoxels m{} p{}" .format(mat_nTRs, py_nTRs, mat_nVoxels, py_nVoxels)) pyCombined_raw_sm_file_z = np.full((py_nTRs, py_nVoxels), np.nan) pyCombined_raw_sm_file_z[0:pyp1_nTRs] = pyBlkGrp1.raw_sm_filt_z pyCombined_raw_sm_file_z[pyp1_nTRs:] = pyBlkGrp2.raw_sm_filt_z corr = vutils.pearsons_mean_corr(matPatterns.raw_sm_filt_z, pyCombined_raw_sm_file_z) print("raw_sm_filt_z correlation: {}".format(corr)) if corr < 0.99: raise ValidationError( "Pearson correlation low for raw_sm_filt_z: {}".format(corr)) # Check how well the models match matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel = utils.loadMatFile(matModelFn) pyModel = utils.loadMatFile(pyModelFn) corr = vutils.pearsons_mean_corr(matModel.weights, pyModel.weights) print("model weights correlation: {}".format(corr)) if corr < 0.99: raise ValidationError( "Pearson correlation low for model weights: {}".format(corr)) return
def findPatternsDesignFile(session, dataDir, runId): fullPathRegex = getPatternsFileRegex(session, dataDir, runId, addRunDir=True) baseDir, filePattern = os.path.split(fullPathRegex) pdesignFile = findNewestFile(baseDir, filePattern) if pdesignFile is not None and pdesignFile != '': return pdesignFile fullPathRegex = getPatternsFileRegex(session, dataDir, runId) pdesignFile = findNewestFile('', fullPathRegex) if pdesignFile is None or pdesignFile == '': raise FileNotFoundError( "No files found matching {}".format(fullPathRegex)) return pdesignFile
def getNewestFile(self, filePattern): baseDir, filePattern = os.path.split(filePattern) if not os.path.isabs(baseDir): # TODO - handle relative paths pass filename = findNewestFile(baseDir, filePattern) if filename is None: # No file matching pattern raise FileNotFoundError('No file found matching pattern {}'.format(filePattern)) elif not os.path.exists(filename): raise FileNotFoundError('File missing after match {}'.format(filePattern)) else: with open(filename, 'rb') as fp: data = fp.read() return data
def RetrieveData(self, msg): """Retrieve a specfic data file. Sets the file path based on the session directory settings and then calls the BaseModel retrieve function. """ fileInfo = msg.fields.cfg dataDir = getSubjectDataDir(self.session.serverDataDir, fileInfo.subjectNum, fileInfo.subjectDay) fullFileName = os.path.join(dataDir, fileInfo.filename) if fileInfo.findNewestPattern not in (None, ''): fullFileName = utils.findNewestFile(dataDir, fileInfo.findNewestPattern) if fullFileName is None: reply = self.createReplyMessage(msg, MsgResult.Error) reply.data = "FindNewestFile failed: %s: no matches found" % ( fullFileName) return reply msg.fields.cfg = fullFileName reply = super().RetrieveData(msg) reply.fields.filename = os.path.basename(fullFileName) return reply
def getTrainedModel(self, sessionId, runId): """Retrieve a ML model trained in a previous run (runId). First see if it is cached in memory, if not load it from file and add it to the cache. """ model = self.modelCache.get(runId, None) if model is None: # load it from file logging.info("modelCache miss on runId %d", runId) fname = os.path.join(self.dirs.dataDir, getModelFilename(sessionId, runId)) if self.session.useSessionTimestamp is True: sessionWildcard = re.sub('T.*', 'T*', sessionId) filePattern = getModelFilename(sessionWildcard, runId) fname = utils.findNewestFile(self.dirs.dataDir, filePattern) model = utils.loadMatFile(fname) # loadMatFile should either raise an exception or return a value if model is None: raise StateError("Load model returned None: {}".format(fname)) if sessionId == self.id_fields.sessionId: self.modelCache[runId] = model return model
def getPrevBlkGrp(self, sessionId, runId, blkGrpId): """Retrieve a block group patterns data, first see if it is cached in memory, if not load it from file and add it to the cache. """ bgKey = getBlkGrpKey(runId, blkGrpId) prev_bg = self.blkGrpCache.get(bgKey, None) if prev_bg is None: # load it from file logging.info("blkGrpCache miss on <runId, blkGrpId> %s", bgKey) fname = os.path.join(self.dirs.dataDir, getBlkGrpFilename(sessionId, runId, blkGrpId)) if self.session.useSessionTimestamp is True: sessionWildcard = re.sub('T.*', 'T*', sessionId) filePattern = getBlkGrpFilename(sessionWildcard, runId, blkGrpId) fname = utils.findNewestFile(self.dirs.dataDir, filePattern) prev_bg = utils.loadMatFile(fname) # loadMatFile should either raise an exception or return a value if prev_bg is None: raise StateError("Load blkGrp returned None: {}".format(fname)) if sessionId == self.id_fields.sessionId: self.blkGrpCache[bgKey] = prev_bg return prev_bg
def getCategSepData(subjectNum): rtAttenPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo' subjectDir = rtAttenPath + '/' + 'subject' + str(subjectNum) # this will be less annoying but will just have to average and remember padding zeros all_matlab_evidence = np.zeros((9, 100, 3)) all_python_evidence = np.zeros((9, 100, 3)) for d in np.arange(3): subjectDay = d + 1 matDataDir = rtAttenPath + '/' + 'subject' + str( subjectNum) + '/' + 'day' + str(subjectDay) pyDataDir = rtAttenPath + '/' + 'subject' + str( subjectNum) + '/' + 'day' + str(subjectDay) if subjectDay == 1: # then we have 7 runs nRuns = 7 if subjectNum == 106: nRuns = 6 elif subjectDay == 2: nRuns = 9 elif subjectDay == 3: nRuns = 8 #n_feedback_runs = nRuns - 1 # no feedback in first run print(np.arange(2, nRuns + 1)) #def getCategSepData(matDataDir, pyDataDir, runId): for RUN in np.arange(2, nRuns + 1): runId = RUN # because 0 index, skip first run runDir = 'run' + str(runId) + '/' # Check how well raw_sm_filt_z values match matPatternsFn = utils.findNewestFile( matDataDir, runDir + 'patternsdata_' + str(runId) + '*.mat') pyBlkGrp1Fn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId) + '_p1_*_py.mat') pyBlkGrp2Fn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId) + '_p2_*_py.mat') print("Getting rt classifer data from: Matlab {}, Python {} {}". format(matPatternsFn, pyBlkGrp1Fn, pyBlkGrp2Fn)) matPatterns = utils.loadMatFile(matPatternsFn) pyBlkGrp1 = utils.loadMatFile(pyBlkGrp1Fn) pyBlkGrp2 = utils.loadMatFile(pyBlkGrp2Fn) mat_nTRs = matPatterns.raw.shape[0] pyp1_nTRs = pyBlkGrp1.raw.shape[0] pyp2_nTRs = pyBlkGrp2.raw.shape[0] py_nTRs = pyp1_nTRs + pyp2_nTRs mat_nVoxels = matPatterns.raw.shape[1] py_nVoxels = pyBlkGrp1.raw.shape[1] if mat_nTRs != py_nTRs or mat_nVoxels != py_nVoxels: raise ValidationError( "Number of TRs or Voxels don't match: nTRs m{} p{}, nVoxels m{} p{}" .format(mat_nTRs, py_nTRs, mat_nVoxels, py_nVoxels)) matPatterns.categoryseparation relevant_TR = np.argwhere(np.sum(matPatterns.regressor, 0))[:, 0] RT_TR = relevant_TR[int(len(relevant_TR) / 2):] mat_RT_CS = matPatterns.categoryseparation[:, RT_TR][0, :] pyCombined_categoryseparation = np.full((py_nTRs, ), np.nan) pyCombined_categoryseparation[ 0:pyp1_nTRs] = pyBlkGrp1.categoryseparation pyCombined_categoryseparation[ pyp1_nTRs:] = pyBlkGrp2.categoryseparation py_RT_CS = pyCombined_categoryseparation[RT_TR] all_matlab_evidence[RUN - 2, :, d] = mat_RT_CS all_python_evidence[RUN - 2, :, d] = py_RT_CS # now you have 2 (100,) arrays with the category separation from that feedback run outfile = subjectDir + '/' 'realtimeevidence' np.savez(outfile, mat=all_matlab_evidence, py=all_python_evidence)
def validateModelsMatlabPython(subjectNum, subjectDay, usesamedata): dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/' configFile = dataPath + 'subject' + str( subjectNum) + '/usedscripts/PennCfg_Day' + str(subjectDay) + '.toml' cfg = loadConfigFile(configFile) #subjectDayDir = getSubjectDayDir(cfg.session.subjectNum, cfg.session.subjectDay) subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str( cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay) matDataDir = subjectDayDir #os.path.join(cfg.session.dataDir, subjectDayDir) pyDataDir = matDataDir all_vals = np.zeros((100, 2, cfg.session.Runs[-1] - 1)) usenewmodel = 1 #usesamedata = 1 #whether or not to use same data as with matlab for runId in np.arange(1, cfg.session.Runs[-1]): runDir = 'run' + str(runId) + '/' matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel_train = utils.loadMatFile(matModelFn) # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # INSTEAD MAKE NEW MODEL print(runId) if usenewmodel: lrc1 = LogisticRegression(penalty='l2', solver='sag', max_iter=300) lrc2 = LogisticRegression(penalty='l2', solver='sag', max_iter=300) if usesamedata: lrc1.fit(matModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(matModel_train.trainPats, pyModel_train.trainLabels[:, 1]) else: lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate( (lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate( (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2) newTrainedModel.trainPats = pyModel_train.trainPats newTrainedModel.trainLabels = pyModel_train.trainLabels # now load the models to test on matModelFn = utils.findNewestFile( matDataDir, 'run' + str(runId + 1) + '/' + 'trainedModel_' + str(runId + 1) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat') matModel_test = utils.loadMatFile(matModelFn) pyModel_test = utils.loadMatFile(pyModelFn) nTRTest = 100 mat_test_data = matModel_test.trainPats[nTRTest:, :] py_test_data = pyModel_test.trainPats[nTRTest:, :] test_labels = matModel_test.trainLabels[nTRTest:, :] mat_cs = np.zeros((nTRTest, 1)) py_cs = np.zeros((nTRTest, 1)) for t in np.arange(nTRTest): categ = np.flatnonzero(test_labels[t, :]) otherCateg = (categ + 1) % 2 _, _, _, activations_mat = Test_L2_RLR_realtime( matModel_train, mat_test_data[t, :], test_labels[t, :]) mat_cs[t] = activations_mat[categ] - activations_mat[otherCateg] if not usenewmodel: if not usesamedata: _, _, _, activations_py = Test_L2_RLR_realtime( pyModel_train, py_test_data[t, :], test_labels[t, :]) else: _, _, _, activations_py = Test_L2_RLR_realtime( pyModel_train, mat_test_data[t, :], test_labels[t, :]) else: if not usesamedata: _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, py_test_data[t, :], test_labels[t, :]) else: _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, mat_test_data[t, :], test_labels[t, :]) py_cs[t] = activations_py[categ] - activations_py[otherCateg] all_vals[:, 0, runId - 1] = mat_cs[:, 0] all_vals[:, 1, runId - 1] = py_cs[:, 0] #plt.figure() #if usenewmodel: # plt.plot(matModel_train.weights[:,0],newTrainedModel.weights[:,0], '.') #else: # plt.plot(matModel_train.weights[:,0],pyModel_train.weights[:,0], '.') #plt.xlim([-.02 ,.02]) #plt.ylim([-.02 ,.02]) #plt.xlabel('MATLAB') #plt.ylabel('PYTHON') #plt.show() all_mat_ev = np.reshape(all_vals[:, 0, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) all_py_ev = np.reshape(all_vals[:, 1, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) fix, ax = plt.subplots(figsize=(12, 7)) plt.plot(all_mat_ev, all_py_ev, '.') plt.plot([-5, 5], [-5, 5], '--k') plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0])) plt.xlabel('MATLAB') plt.ylabel('PYTHON') plt.xlim([-1.5, 1.5]) plt.ylim([-1.5, 1.5]) plt.show() plt.figure() plt.hist(all_mat_ev, alpha=0.6, label='matlab') plt.hist(all_py_ev, alpha=0.6, label='python') plt.xlabel('Correct - Incorrect Activation') plt.ylabel('Frequency') plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0])) plt.legend() plt.show()
def test_pathInPattern(self): print("Test findNewestFile path embedded in pattern:") filename = utils.findNewestFile('/tmp/testdir', '/tmp/testdir/file1_20170101*') self.assert_result_matches_filename(filename)
cfg = loadConfigFile(configFile) subjectDayDir = '/data/jux/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str(cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay) pyDataDir = subjectDayDir if subjectDay == 1: nRuns = 7 if subjectNum == 106: nRuns = 6 elif subjectDay == 2: nRuns = 9 elif subjectDay == 3: nRuns = 8 #nruns = len(cfg.session.Runs) - 1 for runId in np.arange(1,nRuns): print(runId) runDir = 'run'+str(runId)+'/' pyModelFn = utils.findNewestFile(pyDataDir, 'trainedModel_r'+str(runId)+'*_py.mat') # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # INSTEAD MAKE NEW MODEL lrc1 = LogisticRegression(penalty='l2', solver='saga',max_iter=300) lrc2 = LogisticRegression(penalty='l2', solver='saga',max_iter=300) lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate((lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate((lrc1.intercept_, lrc2.intercept_)).reshape(1, 2)
def test_pathPartiallyInPattern(self): print("Test findNewestFile path partially in pattern:") filename = utils.findNewestFile('/tmp', 'testdir/file1_20170101*') self.assert_result_matches_filename(filename)
def test_noMatchingFiles(self): print("Test findNewestFile no matching files:") filename = utils.findNewestFile('/tmp/testdir/', 'no_such_file') assert filename is None
def validateModelsMatlabPython(subjectNum, subjectDay): dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/' configFile = dataPath + 'subject' + str( subjectNum) + '/usedscripts/PennCfg_Day' + str(subjectDay) + '.toml' cfg = loadConfigFile(configFile) #subjectDayDir = getSubjectDayDir(cfg.session.subjectNum, cfg.session.subjectDay) subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str( cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay) matDataDir = subjectDayDir #os.path.join(cfg.session.dataDir, subjectDayDir) pyDataDir = matDataDir all_vals = np.zeros((100, 2, cfg.session.Runs[-1] - 1)) for runId in np.arange(1, cfg.session.Runs[-1]): runDir = 'run' + str(runId) + '/' matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel_train = utils.loadMatFile(matModelFn) # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # now load the models to test on matModelFn = utils.findNewestFile( matDataDir, 'run' + str(runId + 1) + '/' + 'trainedModel_' + str(runId + 1) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat') matModel_test = utils.loadMatFile(matModelFn) pyModel_test = utils.loadMatFile(pyModelFn) nTRTest = 100 mat_test_data = matModel_test.trainPats[nTRTest:, :] py_test_data = pyModel_test.trainPats[nTRTest:, :] test_labels = matModel_test.trainLabels[nTRTest:, :] mat_cs = np.zeros((nTRTest, 1)) py_cs = np.zeros((nTRTest, 1)) for t in np.arange(nTRTest): categ = np.flatnonzero(test_labels[t, :]) otherCateg = (categ + 1) % 2 _, _, _, activations_mat = Test_L2_RLR_realtime( matModel_train, mat_test_data[t, :], test_labels[t, :]) mat_cs[t] = activations_mat[categ] - activations_mat[otherCateg] _, _, _, activations_py = Test_L2_RLR_realtime( pyModel_train, py_test_data[t, :], test_labels[t, :]) py_cs[t] = activations_py[categ] - activations_py[otherCateg] all_vals[:, 0, runId - 1] = mat_cs[:, 0] all_vals[:, 1, runId - 1] = py_cs[:, 0] all_mat_ev = np.reshape(all_vals[:, 0, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) all_py_ev = np.reshape(all_vals[:, 1, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) #fix,ax = plt.subplots(figsize=(12,7)) #plt.plot(all_mat_ev,all_py_ev, '.') #plt.plot([-5,5],[-5,5], '--k') #plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev,all_py_ev)[0][0])) #plt.xlabel('MATLAB') #plt.ylabel('PYTHON') #plt.xlim([-1.5,1.5]) #plt.ylim([-1.5,1.5]) #plt.show() plt.figure() plt.hist(all_mat_ev, alpha=0.6, label='matlab') plt.hist(all_py_ev, alpha=0.6, label='python') plt.xlabel('Correct - Incorrect Activation') plt.ylabel('Frequency') plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0])) plt.legend() plt.show()
def test_emptyPath(self): print("Test findNewestFile empty path:") filename = utils.findNewestFile('', '/tmp/testdir/file1_20170101*') self.assert_result_matches_filename(filename)
def on_message(client, message): fileWatcher = WebSocketFileWatcher.fileWatcher response = {'status': 400, 'error': 'unhandled request'} try: request = json.loads(message) cmd = request['cmd'] if cmd == 'initWatch': dir = request['dir'] filePattern = request['filePattern'] minFileSize = request['minFileSize'] demoStep = request.get('demoStep') logging.log(DebugLevels.L3, "initWatch: %s, %s, %d", dir, filePattern, minFileSize) if dir is None or filePattern is None or minFileSize is None: errStr = "InitWatch: Missing file information: {} {}".format( dir, filePattern) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif WebSocketFileWatcher.validateRequestedFile(dir, None) is False: errStr = 'InitWatch: Non-allowed directory {}'.format(dir) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif not os.path.exists(dir): errStr = 'InitWatch: No such directory: {}'.format(dir) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) else: WebSocketFileWatcher.fileWatchLock.acquire() try: fileWatcher.initFileNotifier(dir, filePattern, minFileSize, demoStep) finally: WebSocketFileWatcher.fileWatchLock.release() response = {'status': 200} elif cmd == 'watchFile': filename = request['filename'] timeout = request['timeout'] logging.log(DebugLevels.L3, "watchFile: %s", filename) if filename is None: errStr = 'WatchFile: Missing filename' response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif WebSocketFileWatcher.validateRequestedFile( None, filename) is False: errStr = 'WatchFile: Non-allowed file {}'.format(filename) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) else: WebSocketFileWatcher.fileWatchLock.acquire() try: retVal = fileWatcher.waitForFile(filename, timeout=timeout) finally: WebSocketFileWatcher.fileWatchLock.release() if retVal is None: errStr = "WatchFile: 408 Timeout {}s: {}".format( timeout, filename) response = {'status': 408, 'error': errStr} logging.log(logging.WARNING, errStr) else: # TODO - may need some retry logic here if the file was read # before it was completely written. Maybe checking filesize # against data size. with open(filename, 'rb') as fp: data = fp.read() b64Data = b64encode(data) b64StrData = b64Data.decode('utf-8') response = { 'status': 200, 'filename': filename, 'data': b64StrData } elif cmd == 'getFile': filename = request['filename'] if filename is not None and not os.path.isabs(filename): # relative path to the watch dir filename = os.path.join(fileWatcher.watchDir, filename) logging.log(DebugLevels.L3, "getFile: %s", filename) if filename is None: errStr = "GetFile: Missing filename" response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif WebSocketFileWatcher.validateRequestedFile( None, filename) is False: errStr = 'GetFile: Non-allowed file {}'.format(filename) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif not os.path.exists(filename): errStr = "GetFile: File not found {}".format(filename) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) else: with open(filename, 'rb') as fp: data = fp.read() b64Data = b64encode(data) b64StrData = b64Data.decode('utf-8') response = { 'status': 200, 'filename': filename, 'data': b64StrData } elif cmd == 'getNewestFile': filename = request['filename'] logging.log(DebugLevels.L3, "getNewestFile: %s", filename) if filename is None: errStr = "GetNewestFile: Missing filename" response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif WebSocketFileWatcher.validateRequestedFile( None, filename) is False: errStr = 'GetNewestFile: Non-allowed file {}'.format( filename) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) else: baseDir, filePattern = os.path.split(filename) if not os.path.isabs(baseDir): # relative path to the watch dir baseDir = os.path.join(fileWatcher.watchDir, baseDir) filename = findNewestFile(baseDir, filePattern) if filename is None or not os.path.exists(filename): errStr = 'GetNewestFile: file not found: {}'.format( os.path.join(baseDir, filePattern)) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) else: with open(filename, 'rb') as fp: data = fp.read() b64Data = b64encode(data) b64StrData = b64Data.decode('utf-8') response = { 'status': 200, 'filename': filename, 'data': b64StrData } elif cmd == 'ping': response = {'status': 200} elif cmd == 'putTextFile': filename = request['filename'] text = request['text'] logging.log(DebugLevels.L3, "putTextFile: %s", filename) if filename is None: errStr = 'PutTextFile: Missing filename field' response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif text is None: errStr = 'PutTextFile: Missing text field' response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif WebSocketFileWatcher.validateRequestedFile( None, filename, textFileTypeOnly=True) is False: errStr = 'PutTextFile: Non-allowed file {}'.format( filename) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif type(text) is not str: errStr = "PutTextFile: Only text allowed" response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) else: outputDir = os.path.dirname(filename) if not os.path.exists(outputDir): os.makedirs(outputDir) # print('putTextFile: write {}'.format(filename)) with open(filename, 'w+') as volFile: volFile.write(text) response = {'status': 200} elif cmd == 'dataLog': filename = request['filename'] logging.log(DebugLevels.L3, "dataLog: %s", filename) logLine = request['logLine'] if filename is None: errStr = 'DataLog: Missing filename field' response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif logLine is None: errStr = 'DataLog: Missing logLine field' response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) elif WebSocketFileWatcher.validateRequestedFile( None, filename, textFileTypeOnly=True) is False: errStr = 'DataLog: Non-allowed file {}'.format(filename) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) else: with open(filename, 'a+') as logFile: logFile.write(logLine + '\n') response = {'status': 200} elif cmd == 'error': errorCode = request['status'] if errorCode == 401: WebSocketFileWatcher.needLogin = True WebSocketFileWatcher.sessionCookie = None errStr = 'Error {}: {}'.format(errorCode, request['error']) logging.log(logging.ERROR, request['error']) return else: errStr = 'OnMessage: Unrecognized command {}'.format(cmd) response = {'status': 400, 'error': errStr} logging.log(logging.WARNING, errStr) except Exception as err: errStr = "OnMessage Exception: {}: {}".format(cmd, err) logging.log(logging.WARNING, errStr) response = {'status': 400, 'error': errStr} if cmd == 'error': sys.exit() # merge response into the request dictionary request.update(response) response = request WebSocketFileWatcher.clientLock.acquire() try: client.send(json.dumps(response)) finally: WebSocketFileWatcher.clientLock.release()
def crossvalidateModels(matDataDir, pyDataDir, runId): runDir = 'run' + str(runId) + '/' matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel = utils.loadMatFile(matModelFn) pyModel = utils.loadMatFile(pyModelFn) selector = np.concatenate((0 * np.ones((50)), 1 * np.ones( (50)), 2 * np.ones((50)), 3 * np.ones((50))), axis=0) X = np.array([1, 2, 3, 4]) nfold = 4 kf = KFold(nfold) mat_roc = np.zeros((nfold)) py_roc = np.zeros((nfold)) i = 0 for train_index, test_index in kf.split(X): print("TRAIN:", train_index, "TEST:", test_index) trTrain = np.in1d(selector, train_index) trTest = np.in1d(selector, test_index) # matlab first mat_lrc = LogisticRegression(solver='sag', penalty='l2', max_iter=300) categoryTrainLabels = np.argmax(matModel.trainLabels[trTrain, :], axis=1) mat_lrc.fit(matModel.trainPats[trTrain, :], categoryTrainLabels) mat_predict = mat_lrc.predict_proba(matModel.trainPats[trTest, :]) categ_sep = -1 * np.diff(mat_predict, axis=1) C0 = np.argwhere( np.argmax(matModel.trainLabels[trTest, :], axis=1) == 0) C1 = np.argwhere( np.argmax(matModel.trainLabels[trTest, :], axis=1) == 1) correctLabels = np.ones((len(categ_sep))) correctLabels[C1] = -1 mat_roc[i] = roc_auc_score(correctLabels, categ_sep) print("MAT AUC for iteration %i is: %.2f" % (i, mat_roc[i])) # python second py_lrc = LogisticRegression(solver='sag', penalty='l2', max_iter=300) categoryTrainLabels = np.argmax(pyModel.trainLabels[trTrain, :], axis=1) py_lrc.fit(pyModel.trainPats[trTrain, :], categoryTrainLabels) py_predict = py_lrc.predict_proba(pyModel.trainPats[trTest, :]) categ_sep = -1 * np.diff(py_predict, axis=1) C0 = np.argwhere( np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 0) C1 = np.argwhere( np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 1) correctLabels = np.ones((len(categ_sep))) correctLabels[C1] = -1 py_roc[i] = roc_auc_score(correctLabels, categ_sep) print("PY AUC for iteration %i is: %.2f\n" % (i, py_roc[i])) i += 1 print("AVG AUC MAT,PY is: %.2f,%.2f\n" % (np.mean(mat_roc), np.mean(py_roc))) #mat_mean = np.mean(mat_roc) #py_mean = np.mean(py_roc) #all_ROC = np.concatenate((mat_roc[:,np.newaxis],py_roc[:,np.newaxis]),axis=1) #fullfilename = matDataDir + '/' + 'xvalresults.npy' #print("saving to %s\n" % fullfilename) #np.save(fullfilename,all_ROC) return mat_roc, py_roc
def train_test_python_classifier(subjectNum): ndays = 3 auc_score = np.zeros((8, ndays)) # save larger to fit all days in RT_cs = np.zeros((8, ndays)) dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/' subjectDir = dataPath + '/' + 'subject' + str(subjectNum) print(subjectNum) all_python_evidence = np.zeros( (9, 100, 3)) # time course of classifier evidence for d in np.arange(ndays): print(d) subjectDay = d + 1 configFile = dataPath + 'subject' + str( subjectNum) + '/usedscripts/PennCfg_Day' + str( subjectDay) + '.toml' cfg = loadConfigFile(configFile) subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str( cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay) pyDataDir = subjectDayDir if subjectDay == 1: nRuns = 7 print('here') if str(subjectNum) == '106': nRuns = 6 print('here') else: print(subjectNum) if subjectNum == 106: print('finding it here') print('nothere') elif subjectDay == 2: nRuns = 9 elif subjectDay == 3: nRuns = 8 print('total number of runs: %i' % nRuns) print(subjectNum) print(subjectDay) print(nRuns) #nruns = len(cfg.session.Runs) - 1 #nruns = len(cfg.session.Runs) - 1 for r in np.arange(0, nRuns - 1): runId = r + 1 # now it goes from 0 : n Runs - 1 print(runId) runDir = 'run' + str(runId) + '/' pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # INSTEAD MAKE NEW MODEL lrc1 = LogisticRegression(penalty='l2', solver='saga', max_iter=300) lrc2 = LogisticRegression(penalty='l2', solver='saga', max_iter=300) lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate( (lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate( (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2) newTrainedModel.trainPats = pyModel_train.trainPats newTrainedModel.trainLabels = pyModel_train.trainLabels # now load testing data for CV pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat') pyModel_test = utils.loadMatFile(pyModelFn) nTRTest = 100 py_test_data = pyModel_test.trainPats[nTRTest:, :] test_labels = pyModel_test.trainLabels[nTRTest:, :] py_cs = np.zeros((nTRTest, 1)) activations = np.zeros((nTRTest, 2)) for t in np.arange(nTRTest): _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, py_test_data[t, :], test_labels[t, :]) activations[t, :] = activations_py fpr2, tpr2, thresholds2 = metrics.roc_curve(test_labels[:, 1], activations[:, 1] - activations[:, 0], pos_label=1) auc_score[r, d] = metrics.auc( fpr2, tpr2 ) # auc of this data applied to the first half of the next run # now apply to block data-- realtime values pyDataFn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId + 1) + '_p2_*_py.mat') pyData_test = utils.loadMatFile(pyDataFn) regressor = pyData_test.regressor TRs_to_test = np.argwhere(np.sum(regressor, axis=0)) RT_data = pyData_test.raw_sm_filt_z[TRs_to_test, :] RT_regressor = regressor[:, TRs_to_test].T.reshape(nTRTest, 2) # now do the same thing and test for every TR --> get category separation cs = np.zeros((nTRTest, 1)) for t in np.arange(nTRTest): categ = np.flatnonzero(RT_regressor[t, :]) otherCateg = (categ + 1) % 2 _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, RT_data[t, :].flatten(), RT_regressor[t, :]) cs[t] = activations_py[categ] - activations_py[otherCateg] # take average for this run RT_cs[r, d] = np.mean(cs) all_python_evidence[r, :, d] = cs[:, 0] outfile = subjectDir + '/' 'offlineAUC_RTCS' np.savez(outfile, auc=auc_score, cs=RT_cs, all_ev=all_python_evidence)
def test_normalCase(self): print("Test findNewestFile normal case:") filename = utils.findNewestFile('/tmp/testdir', 'file1_20170101*') self.assert_result_matches_filename(filename)