def crossvalidateModels(matDataDir, pyDataDir, runId): runDir = 'run' + str(runId) + '/' matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel = utils.loadMatFile(matModelFn) pyModel = utils.loadMatFile(pyModelFn) selector = np.concatenate((0 * np.ones((50)), 1 * np.ones( (50)), 2 * np.ones((50)), 3 * np.ones((50))), axis=0) X = np.array([1, 2, 3, 4]) nfold = 4 kf = KFold(nfold) mat_cs = np.zeros((nfold, 50)) py_cs = np.zeros((nfold, 50)) i = 0 for train_index, test_index in kf.split(X): print("TRAIN:", train_index, "TEST:", test_index) trTrain = np.in1d(selector, train_index) trTest = np.in1d(selector, test_index) # matlab first mat_lrc = LogisticRegression() categoryTrainLabels = np.argmax(matModel.trainLabels[trTrain, :], axis=1) mat_lrc.fit(matModel.trainPats[trTrain, :], categoryTrainLabels) mat_predict = mat_lrc.predict_proba(matModel.trainPats[trTest, :]) categ_sep = -1 * np.diff(mat_predict, axis=1) C0 = np.argwhere( np.argmax(matModel.trainLabels[trTest, :], axis=1) == 0) C1 = np.argwhere( np.argmax(matModel.trainLabels[trTest, :], axis=1) == 1) C1_label = C1.flatten() mat_correct_subtraction = categ_sep.flatten() mat_correct_subtraction[ C1_label] = -1 * mat_correct_subtraction[C1_label] # python second py_lrc = LogisticRegression() categoryTrainLabels = np.argmax(pyModel.trainLabels[trTrain, :], axis=1) py_lrc.fit(pyModel.trainPats[trTrain, :], categoryTrainLabels) py_predict = py_lrc.predict_proba(pyModel.trainPats[trTest, :]) categ_sep = -1 * np.diff(py_predict, axis=1) C0 = np.argwhere( np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 0) C1 = np.argwhere( np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 1) C1_label = C1.flatten() py_correct_subtraction = categ_sep.flatten() py_correct_subtraction[ C1_label] = -1 * py_correct_subtraction[C1_label] mat_cs[i, :] = mat_correct_subtraction py_cs[i, :] = py_correct_subtraction i += 1 mat_corr = mat_cs.flatten() py_corr = py_cs.flatten() return mat_corr, py_corr
def validatePatternsData(matDataDir, pyDataDir, runId): runDir = 'run' + str(runId) + '/' # Check how well raw_sm_filt_z values match matPatternsFn = utils.findNewestFile( matDataDir, runDir + 'patternsdata_' + str(runId) + '*.mat') pyBlkGrp1Fn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId) + '_p1_*_py.mat') pyBlkGrp2Fn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId) + '_p2_*_py.mat') print("Validating patternrs: Matlab {}, Python {} {}".format( matPatternsFn, pyBlkGrp1Fn, pyBlkGrp2Fn)) matPatterns = utils.loadMatFile(matPatternsFn) pyBlkGrp1 = utils.loadMatFile(pyBlkGrp1Fn) pyBlkGrp2 = utils.loadMatFile(pyBlkGrp2Fn) mat_nTRs = matPatterns.raw.shape[0] pyp1_nTRs = pyBlkGrp1.raw.shape[0] pyp2_nTRs = pyBlkGrp2.raw.shape[0] py_nTRs = pyp1_nTRs + pyp2_nTRs mat_nVoxels = matPatterns.raw.shape[1] py_nVoxels = pyBlkGrp1.raw.shape[1] if mat_nTRs != py_nTRs or mat_nVoxels != py_nVoxels: raise ValidationError( "Number of TRs or Voxels don't match: nTRs m{} p{}, nVoxels m{} p{}" .format(mat_nTRs, py_nTRs, mat_nVoxels, py_nVoxels)) pyCombined_raw_sm_file_z = np.full((py_nTRs, py_nVoxels), np.nan) pyCombined_raw_sm_file_z[0:pyp1_nTRs] = pyBlkGrp1.raw_sm_filt_z pyCombined_raw_sm_file_z[pyp1_nTRs:] = pyBlkGrp2.raw_sm_filt_z corr = vutils.pearsons_mean_corr(matPatterns.raw_sm_filt_z, pyCombined_raw_sm_file_z) print("raw_sm_filt_z correlation: {}".format(corr)) if corr < 0.99: raise ValidationError( "Pearson correlation low for raw_sm_filt_z: {}".format(corr)) # Check how well the models match matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel = utils.loadMatFile(matModelFn) pyModel = utils.loadMatFile(pyModelFn) corr = vutils.pearsons_mean_corr(matModel.weights, pyModel.weights) print("model weights correlation: {}".format(corr)) if corr < 0.99: raise ValidationError( "Pearson correlation low for model weights: {}".format(corr)) return
def getNextTRData(self, run, fileNum): specificFileName = self.getDicomFileName(run.scanNum, fileNum) if self.printFirstFilename: print("Loading first file: {}".format(specificFileName)) self.printFirstFilename = False fileExists = os.path.exists(specificFileName) if not fileExists and self.observer is None: raise FileNotFoundError( "No fileNotifier and dicom file not found %s" % (specificFileName)) while not fileExists: # look for file creation event event, ts = self.fileNotifyQ.get() if event.src_path == specificFileName: fileExists = True # wait for the full file to be written, wait at most 200 ms fileSize = 0 totalWait = 0.0 waitIncrement = 0.01 while fileSize < self.cfg.session.minExpectedDicomSize and totalWait <= 0.3: time.sleep(waitIncrement) totalWait += waitIncrement fileSize = os.path.getsize(specificFileName) logStr = "FileWait: fileNum {}: size {}: wait {:.3f}s\n".format( fileNum, fileSize, totalWait) self.logtimeFile.write(logStr) _, file_extension = os.path.splitext(specificFileName) if file_extension == '.mat': data = utils.loadMatFile(specificFileName) trVol = data.vol else: dicomImg = ReadDicom.readDicomFromFile(specificFileName) trVol = ReadDicom.parseDicomVolume(dicomImg, self.cfg.session.sliceDim) return trVol
def validateTrainBlkGrp(self, target_i1, target_i2, outputlns): """Compare the block group patterns file created in this run with that of a previous run (i.e. using the Matlab software) but having the same raw input """ patterns = MatlabStructDict(self.blkGrp.patterns) # load the replay file for target outcomes target_patternsdata = utils.loadMatFile(self.run.validationDataFile) target_patterns = target_patternsdata.patterns strip_patterns(target_patterns, range(target_i1, target_i2)) cmp_fields = [ 'raw', 'raw_sm', 'raw_sm_filt', 'raw_sm_filt_z', 'phase1Mean', 'phase1Y', 'phase1Std', 'phase1Var', 'regressor' ] res = vutils.compareMatStructs(patterns, target_patterns, field_list=cmp_fields) res_means = {key: value['mean'] for key, value in res.items()} outputlns.append("Validation Means: {}".format(res_means)) # calculate the pierson correlation for raw_sm_filt_z pearson_mean = vutils.pearsons_mean_corr(patterns.raw_sm_filt_z, target_patterns.raw_sm_filt_z) outputlns.append( "Phase1 sm_filt_z mean pearsons correlation {}".format( pearson_mean)) if pearson_mean < .995: # assert pearson_mean > .995, "Pearsons mean {} too low".format(pearson_mean) logging.warn("Pearson mean for raw_sm_filt_z low, %f", pearson_mean)
def getLocalPatternsFile(session, subjectDataDir, runId): if session.findNewestPatterns: # load the newest file patterns patternsFilename = findPatternsDesignFile(session, subjectDataDir, runId) else: idx = getRunIndex(session, runId) if idx >= 0 and len(session.patternsDesignFiles) > idx: patternsFilename = session.patternsDesignFiles[idx] patternsFilename = os.path.join(subjectDataDir, os.path.basename(patternsFilename)) else: # either not enough runs specified or not enough patternsDesignFiles specified if idx < 0: raise ValidationError( "Insufficient runs specified in config file session: " "run {} idx {}".format(runId, idx)) else: raise ValidationError( "Insufficient patternsDesignFiles specified in " "config file session for run {}".format(runId)) # load and parse the pattensDesign file logging.info("Using Local Patterns file: %s", patternsFilename) patterns = loadMatFile(patternsFilename) return patterns, patternsFilename
def validateModel(self, newTrainedModel, outputlns): """Compare the trained model for this block group to a trained model created from a previous run using the same data (i.e. from the Matlab version) """ target_model = utils.loadMatFile(self.run.validationModel) cmp_fields = ['trainLabels', 'weights', 'biases', 'trainPats'] res = vutils.compareMatStructs(newTrainedModel, target_model, field_list=cmp_fields) res_means = {key: value['mean'] for key, value in res.items()} outputlns.append("TrainModel Validation Means: {}".format(res_means)) # calculate the pierson correlation for trainPats pearson_mean = vutils.pearsons_mean_corr(newTrainedModel.trainPats, target_model.trainPats) outputlns.append( "trainPats mean pearsons correlation {}".format(pearson_mean)) if pearson_mean < .995: # assert pearson_mean > .995, "Pearsons mean {} too low".format(pearson_mean) logging.warn("Pearson mean for trainPats low, %f", pearson_mean) # calculate the pierson correlation for model weights pearson_mean = vutils.pearsons_mean_corr(newTrainedModel.weights, target_model.weights) outputlns.append( "trainedWeights mean pearsons correlation {}".format(pearson_mean)) if pearson_mean < .995: # assert pearson_mean > .99, "Pearsons mean {} too low".format(pearson_mean) outputlns.append( "WARN: Pearson mean for trainWeights low, {}".format( pearson_mean))
def get_blockData(subjNumb, day, run): data_files = glob.glob('/data/jag/cnds/amennen/rtAttenPenn/behavgonogo' + '/subject' + str(subjNumb) + '/day' + str(day) + '/run' + str(run) + '/blockdata_' + str(run) + '*.mat') filename = data_files[-1] behav = utils.loadMatFile(filename) data = behav['blockData'] return data
def getOpacityFromFile(subjectNum,day,run,block): """day, run, block are in MATLAB 1-based indices, but block is the run starting with 1 as first real-time run""" rtAttenPath = '/data/jux/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo' subjectDir = rtAttenPath + '/' + 'subject' + str(subjectNum) dayDir = subjectDir + '/' + 'day' + str(day) runDir = dayDir + '/' + 'run' + str(run) file = glob.glob(os.path.join(runDir,'blockdata_*.mat'))[0] d = utils.loadMatFile(file) all_opacity = d.blockData.smoothAttImgProp block_opacity = all_opacity[:,block-1 +4][0][0] return block_opacity
def test_loadMatlabFile(self, testStruct, matTestFilename): print("Test LoadMatlabFile") struct2 = utils.loadMatFile(matTestFilename) assert testStruct.__name__ == struct2.__name__ res = vutils.compareMatStructs(testStruct, struct2) assert vutils.isMeanWithinThreshold(res, 0) with open(matTestFilename, 'rb') as fp: data = fp.read() struct3 = utils.loadMatFileFromBuffer(data) res = vutils.compareMatStructs(testStruct, struct3) assert vutils.isMeanWithinThreshold(res, 0)
def getCSFromBlockData(subjectNum, day, run, block): """day, run, block are in MATLAB 1-based indices, but block is the run starting with 1 as first real-time run""" rtAttenPath = '/data/jux/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo' subjectDir = rtAttenPath + '/' + 'subject' + str(subjectNum) dayDir = subjectDir + '/' + 'day' + str(day) runDir = dayDir + '/' + 'run' + str(run) file = glob.glob(os.path.join(runDir, 'blockdata_*.mat'))[0] d = utils.loadMatFile(file) all_categsep = d.blockData.categsep block_categsep = all_categsep[:, block - 1 + 4][0][0] fileRead = d.blockData.newestFile[:, block - 1 + 4][0][0] x, _ = nonNan(block_categsep, []) return x
def loadImageData(self, filename): fileExtension = Path(filename).suffix if fileExtension == '.mat': data = utils.loadMatFile(filename) else: # Dicom file: if fileExtension != '.dcm': raise StateError( 'loadImageData: fileExtension not .dcm: {}'.format( fileExtension)) data = readDicomFromFile(filename) # Check that pixeldata can be read, will throw exception if not _ = data.pixel_array return data
def getTrainedModel(self, sessionId, runId): """Retrieve a ML model trained in a previous run (runId). First see if it is cached in memory, if not load it from file and add it to the cache. """ model = self.modelCache.get(runId, None) if model is None: # load it from file logging.info("modelCache miss on runId %d", runId) fname = os.path.join(self.dirs.dataDir, getModelFilename(sessionId, runId)) model = utils.loadMatFile(fname) # loadMatFile should either raise an exception or return a value assert model is not None, "Load model returned None: %s" % (fname) if sessionId == self.id_fields.sessionId: self.modelCache[runId] = model return model
def getCSFromBlockData(subjectNum,day,run,block): """day, run, block are in MATLAB 1-based indices, but block is the run starting with 1 as first real-time run""" rtAttenPath = '/data/jux/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo' subjectDir = rtAttenPath + '/' + 'subject' + str(subjectNum) dayDir = subjectDir + '/' + 'day' + str(day) runDir = dayDir + '/' + 'run' + str(run) file = glob.glob(os.path.join(runDir,'blockdata_*.mat'))[0] d = utils.loadMatFile(file) all_categsep = d.blockData.categsep block_categsep = all_categsep[:,block-1 +4][0][0] block_found = d.blockData.classOutputFile[:,block-1+4][0][0] n_failed = 0 test = True for b in np.arange(len(block_found)): this_str = block_found[b][0] if 'notload' in this_str: n_failed += 1 if n_failed > 0: test = False # when_read = np.argwhere(~np.isnan(block_categsep))[:,0] # fileRead = d.blockData.newestFile[:,block-1+4][0][0] # # for f in np.arange(len(when_read)): # # print(fileRead[when_read[f]]) # fInd = np.arange(2,49,2) # fInd = np.arange(8,49,2) # first_file = fileRead[fInd] # all_nums = np.zeros((len(fInd),))*np.nan # for z in np.arange(len(fInd)): # this_num = first_file[z][0] # n = int(this_num[4:7]) # all_nums[z] = n # all_diff = np.diff(all_nums) # test = True # if np.any(all_diff == 0): # test = False # if np.any(all_diff > 1): # test = False # first_file = first_file[0].split('.')[0] # str_file = first_file[0:7] # all_first = np.array([126, 155, 183, 211]) # expected_first = 'vol_{0}'.format(all_first[block-1]) # if first_file <= expected_first: # test = True # elif first_file > expected_first: # test = False x,_ = nonNan(block_categsep,[]) return x,test#,expected_first,first_file,fileRead
def getPrevBlkGrp(self, sessionId, runId, blkGrpId): """Retrieve a block group patterns data, first see if it is cached in memory, if not load it from file and add it to the cache. """ bgKey = getBlkGrpKey(runId, blkGrpId) prev_bg = self.blkGrpCache.get(bgKey, None) if prev_bg is None: # load it from file logging.info("blkGrpCache miss on <runId, blkGrpId> %s", bgKey) fname = os.path.join(self.dirs.dataDir, getBlkGrpFilename(sessionId, runId, blkGrpId)) prev_bg = utils.loadMatFile(fname) # loadMatFile should either raise an exception or return a value assert prev_bg is not None, "Load blkGrp returned None: %s" % ( fname) if sessionId == self.id_fields.sessionId: self.blkGrpCache[bgKey] = prev_bg return prev_bg
def getTrainedModel(self, sessionId, runId): """Retrieve a ML model trained in a previous run (runId). First see if it is cached in memory, if not load it from file and add it to the cache. """ model = self.modelCache.get(runId, None) if model is None: # load it from file logging.info("modelCache miss on runId %d", runId) fname = os.path.join(self.dirs.dataDir, getModelFilename(sessionId, runId)) if self.session.useSessionTimestamp is True: sessionWildcard = re.sub('T.*', 'T*', sessionId) filePattern = getModelFilename(sessionWildcard, runId) fname = utils.findNewestFile(self.dirs.dataDir, filePattern) model = utils.loadMatFile(fname) # loadMatFile should either raise an exception or return a value if model is None: raise StateError("Load model returned None: {}".format(fname)) if sessionId == self.id_fields.sessionId: self.modelCache[runId] = model return model
def getPrevBlkGrp(self, sessionId, runId, blkGrpId): """Retrieve a block group patterns data, first see if it is cached in memory, if not load it from file and add it to the cache. """ bgKey = getBlkGrpKey(runId, blkGrpId) prev_bg = self.blkGrpCache.get(bgKey, None) if prev_bg is None: # load it from file logging.info("blkGrpCache miss on <runId, blkGrpId> %s", bgKey) fname = os.path.join(self.dirs.dataDir, getBlkGrpFilename(sessionId, runId, blkGrpId)) if self.session.useSessionTimestamp is True: sessionWildcard = re.sub('T.*', 'T*', sessionId) filePattern = getBlkGrpFilename(sessionWildcard, runId, blkGrpId) fname = utils.findNewestFile(self.dirs.dataDir, filePattern) prev_bg = utils.loadMatFile(fname) # loadMatFile should either raise an exception or return a value if prev_bg is None: raise StateError("Load blkGrp returned None: {}".format(fname)) if sessionId == self.id_fields.sessionId: self.blkGrpCache[bgKey] = prev_bg return prev_bg
def initSession(self, cfg): self.cfgValidation(cfg) sessionDate = datetime.datetime.now() dateStr = cfg.session.date.lower() if dateStr != 'now' and dateStr != 'today': try: sessionDate = parser.parse(cfg.session.date) except ValueError as err: raise RequestError('Unable to parse date string {} {}'.format( cfg.session.date, err)) if cfg.session.sessionId in ( None, '') or cfg.session.useSessionTimestamp is True: # we didn't specify a session id because we want the session id to be the same as the scan date cfg.session.useSessionTimestamp = True cfg.session.sessionId = dateStr30(sessionDate.timetuple()) else: # cfg.session.useSessionTimestamp if true, then it will look for the newest files # from that same date # if cfg.session.useSessionTimestamp is false, you just want to use only specific files # from the exact session id if cfg.session.sessionId in (None, ''): raise InvocationError( "You need to provide a session Id in the config file or change " "your settings to set cfg.sesion.useSessionTimestamp to true." ) sessionStr = cfg.session.sessionId.lower() if sessionStr == 'now' or sessionStr == 'today': # we do specify now as the session id because we want the session files to be saved # with today's date even if it's different from the scanning date currentDate = datetime.datetime.now() cfg.session.sessionId = dateStr30(currentDate.timetuple()) cfg.session.useSessionTimestamp = True else: # we specified an exact session id that we want the files to be saved as # the exact string will then be used to find the files for training cfg.session.useSessionTimestamp = False logging.log(DebugLevels.L1, "## Start Session: %s, subNum%d subDay%d", cfg.session.sessionId, cfg.session.subjectNum, cfg.session.subjectDay) logging.log(DebugLevels.L1, "Config: %r", cfg) # Set Directories self.dirs.dataDir = getSubjectDataDir(cfg.session.dataDir, cfg.session.subjectNum, cfg.session.subjectDay) if self.webUseRemoteFiles: # Remote fileWatcher dataDir will be the same, but locally we want # the data directory to be a subset of a common output directory. self.dirs.remoteDataDir = self.dirs.dataDir cmd = {'cmd': 'webCommonDir'} retVals = wcutils.clientWebpipeCmd(self.webpipes, cmd) self.webCommonDir = retVals.filename self.dirs.dataDir = os.path.normpath(self.webCommonDir + self.dirs.dataDir) self.dirs.serverDataDir = getSubjectDataDir(cfg.session.serverDataDir, cfg.session.subjectNum, cfg.session.subjectDay) if os.path.abspath(self.dirs.serverDataDir): # strip the leading separator to make it a relative path self.dirs.serverDataDir = self.dirs.serverDataDir.lstrip(os.sep) if not os.path.exists(self.dirs.serverDataDir): os.makedirs(self.dirs.serverDataDir) if cfg.session.buildImgPath: datestr = sessionDate.strftime("%Y%m%d") imgDirName = "{}.{}.{}".format(datestr, cfg.session.subjectName, cfg.session.subjectName) self.dirs.imgDir = os.path.join(cfg.session.imgDir, imgDirName) else: self.dirs.imgDir = cfg.session.imgDir print("fMRI files being read from: {}".format(self.dirs.imgDir)) if self.webUseRemoteFiles: # send initWatch via webpipe initWatchCmd = wcutils.initWatchReqStruct( self.dirs.imgDir, cfg.session.watchFilePattern, cfg.session.minExpectedDicomSize, cfg.session.demoStep) wcutils.clientWebpipeCmd(self.webpipes, initWatchCmd) else: if not os.path.exists(self.dirs.imgDir): os.makedirs(self.dirs.imgDir) if self.fileWatcher is None: raise StateError('initSession: fileWatcher is None') self.fileWatcher.initFileNotifier(self.dirs.imgDir, cfg.session.watchFilePattern, cfg.session.minExpectedDicomSize, cfg.session.demoStep) # Load ROI mask - an array with 1s indicating the voxels of interest maskData = None maskFileName = 'mask_' + str(cfg.session.subjectNum) + '_' + str( cfg.session.subjectDay) + '.mat' if self.webUseRemoteFiles and cfg.session.getMasksFromControlRoom: # get the mask from remote site maskFileName = os.path.join(self.dirs.remoteDataDir, maskFileName) logging.info("Getting Remote Mask file: %s", maskFileName) getFileCmd = wcutils.getFileReqStruct(maskFileName) retVals = wcutils.clientWebpipeCmd(self.webpipes, getFileCmd) maskData = retVals.data print("Using remote mask {}".format(retVals.filename)) else: # read mask locally maskFileName = os.path.join(self.dirs.dataDir, maskFileName) logging.info("Getting Local Mask file: %s", maskFileName) maskData = utils.loadMatFile(maskFileName) print("Using mask {}".format(maskFileName)) roi = maskData.mask if type(roi) != np.ndarray: raise StateError('initSession: ROI type {} is not ndarray'.format( type(roi))) # find indices of non-zero elements in roi in row-major order but sorted by col-major order cfg.session.roiInds = utils.find(roi) cfg.session.roiDims = roi.shape cfg.session.nVoxels = cfg.session.roiInds.size super().initSession(cfg)
def validateTestBlkGrp(self, target_i1, target_i2, outputlns): """Compare the block group patterns file created in this run with that of a previous run (i.e. using the Matlab software) but having the same raw input """ patterns = MatlabStructDict(self.blkGrp.patterns) # load the replay file for target outcomes target_patternsdata = utils.loadMatFile(self.run.validationDataFile) target_patterns = target_patternsdata.patterns strip_patterns(target_patterns, range(target_i1, target_i2)) cmp_fields = [ 'raw', 'raw_sm', 'raw_sm_filt', 'raw_sm_filt_z', 'phase1Mean', 'phase1Y', 'phase1Std', 'phase1Var', 'categoryseparation', 'regressor' ] res = vutils.compareMatStructs(patterns, target_patterns, field_list=cmp_fields) res_means = {key: value['mean'] for key, value in res.items()} outputlns.append("Validation Means: {}".format(res_means)) # Make sure the predict array values are identical # Predict values are (1, 2) in matlab, (0, 1) in python because it # Check if we need to convert from matlab to python values if (not np.all(np.isnan(target_patterns.predict))) and\ np.nanmax(target_patterns.predict) > 1: # convert target.predict to zero based indexing target_patterns.predict = target_patterns.predict - 1 predictions_match = np.allclose(target_patterns.predict, patterns.predict, rtol=0, atol=0, equal_nan=True) if predictions_match: outputlns.append( "All predictions match: {}".format(predictions_match)) else: mask = ~np.isnan(target_patterns.predict) miss_count = np.sum( patterns.predict[mask] != target_patterns.predict[mask]) outputlns.append( "WARNING: predictions differ in {} TRs".format(miss_count)) # calculate the pearson correlation for categoryseparation pearson_mean = vutils.pearsons_mean_corr( patterns.categoryseparation, target_patterns.categoryseparation) outputlns.append( "Phase2 categoryseparation mean pearsons correlation {}".format( pearson_mean)) if pearson_mean < .995: outputlns.append( "WARN: Pearson mean for categoryseparation low, {}".format( pearson_mean)) # calculate the pearson correlation for raw_sm_filt_z pearson_mean = vutils.pearsons_mean_corr(patterns.raw_sm_filt_z, target_patterns.raw_sm_filt_z) outputlns.append( "Phase2 sm_filt_z mean pearsons correlation {}".format( pearson_mean)) if pearson_mean < .995: # assert pearson_mean > .995, "Pearsons mean {} too low".format(pearson_mean) outputlns.append( "WARN: Pearson mean for raw_sm_filt_z low, {}".format( pearson_mean))
def validateModelsMatlabPython(subjectNum, subjectDay, usesamedata): dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/' configFile = dataPath + 'subject' + str( subjectNum) + '/usedscripts/PennCfg_Day' + str(subjectDay) + '.toml' cfg = loadConfigFile(configFile) #subjectDayDir = getSubjectDayDir(cfg.session.subjectNum, cfg.session.subjectDay) subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str( cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay) matDataDir = subjectDayDir #os.path.join(cfg.session.dataDir, subjectDayDir) pyDataDir = matDataDir all_vals = np.zeros((100, 2, cfg.session.Runs[-1] - 1)) usenewmodel = 1 #usesamedata = 1 #whether or not to use same data as with matlab for runId in np.arange(1, cfg.session.Runs[-1]): runDir = 'run' + str(runId) + '/' matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel_train = utils.loadMatFile(matModelFn) # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # INSTEAD MAKE NEW MODEL print(runId) if usenewmodel: lrc1 = LogisticRegression(penalty='l2', solver='sag', max_iter=300) lrc2 = LogisticRegression(penalty='l2', solver='sag', max_iter=300) if usesamedata: lrc1.fit(matModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(matModel_train.trainPats, pyModel_train.trainLabels[:, 1]) else: lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate( (lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate( (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2) newTrainedModel.trainPats = pyModel_train.trainPats newTrainedModel.trainLabels = pyModel_train.trainLabels # now load the models to test on matModelFn = utils.findNewestFile( matDataDir, 'run' + str(runId + 1) + '/' + 'trainedModel_' + str(runId + 1) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat') matModel_test = utils.loadMatFile(matModelFn) pyModel_test = utils.loadMatFile(pyModelFn) nTRTest = 100 mat_test_data = matModel_test.trainPats[nTRTest:, :] py_test_data = pyModel_test.trainPats[nTRTest:, :] test_labels = matModel_test.trainLabels[nTRTest:, :] mat_cs = np.zeros((nTRTest, 1)) py_cs = np.zeros((nTRTest, 1)) for t in np.arange(nTRTest): categ = np.flatnonzero(test_labels[t, :]) otherCateg = (categ + 1) % 2 _, _, _, activations_mat = Test_L2_RLR_realtime( matModel_train, mat_test_data[t, :], test_labels[t, :]) mat_cs[t] = activations_mat[categ] - activations_mat[otherCateg] if not usenewmodel: if not usesamedata: _, _, _, activations_py = Test_L2_RLR_realtime( pyModel_train, py_test_data[t, :], test_labels[t, :]) else: _, _, _, activations_py = Test_L2_RLR_realtime( pyModel_train, mat_test_data[t, :], test_labels[t, :]) else: if not usesamedata: _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, py_test_data[t, :], test_labels[t, :]) else: _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, mat_test_data[t, :], test_labels[t, :]) py_cs[t] = activations_py[categ] - activations_py[otherCateg] all_vals[:, 0, runId - 1] = mat_cs[:, 0] all_vals[:, 1, runId - 1] = py_cs[:, 0] #plt.figure() #if usenewmodel: # plt.plot(matModel_train.weights[:,0],newTrainedModel.weights[:,0], '.') #else: # plt.plot(matModel_train.weights[:,0],pyModel_train.weights[:,0], '.') #plt.xlim([-.02 ,.02]) #plt.ylim([-.02 ,.02]) #plt.xlabel('MATLAB') #plt.ylabel('PYTHON') #plt.show() all_mat_ev = np.reshape(all_vals[:, 0, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) all_py_ev = np.reshape(all_vals[:, 1, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) fix, ax = plt.subplots(figsize=(12, 7)) plt.plot(all_mat_ev, all_py_ev, '.') plt.plot([-5, 5], [-5, 5], '--k') plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0])) plt.xlabel('MATLAB') plt.ylabel('PYTHON') plt.xlim([-1.5, 1.5]) plt.ylim([-1.5, 1.5]) plt.show() plt.figure() plt.hist(all_mat_ev, alpha=0.6, label='matlab') plt.hist(all_py_ev, alpha=0.6, label='python') plt.xlabel('Correct - Incorrect Activation') plt.ylabel('Frequency') plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0])) plt.legend() plt.show()
def crossvalidateModels(matDataDir, pyDataDir, runId): runDir = 'run' + str(runId) + '/' matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel = utils.loadMatFile(matModelFn) pyModel = utils.loadMatFile(pyModelFn) selector = np.concatenate((0 * np.ones((50)), 1 * np.ones( (50)), 2 * np.ones((50)), 3 * np.ones((50))), axis=0) X = np.array([1, 2, 3, 4]) nfold = 4 kf = KFold(nfold) mat_roc = np.zeros((nfold)) py_roc = np.zeros((nfold)) i = 0 for train_index, test_index in kf.split(X): print("TRAIN:", train_index, "TEST:", test_index) trTrain = np.in1d(selector, train_index) trTest = np.in1d(selector, test_index) # matlab first mat_lrc = LogisticRegression(solver='sag', penalty='l2', max_iter=300) categoryTrainLabels = np.argmax(matModel.trainLabels[trTrain, :], axis=1) mat_lrc.fit(matModel.trainPats[trTrain, :], categoryTrainLabels) mat_predict = mat_lrc.predict_proba(matModel.trainPats[trTest, :]) categ_sep = -1 * np.diff(mat_predict, axis=1) C0 = np.argwhere( np.argmax(matModel.trainLabels[trTest, :], axis=1) == 0) C1 = np.argwhere( np.argmax(matModel.trainLabels[trTest, :], axis=1) == 1) correctLabels = np.ones((len(categ_sep))) correctLabels[C1] = -1 mat_roc[i] = roc_auc_score(correctLabels, categ_sep) print("MAT AUC for iteration %i is: %.2f" % (i, mat_roc[i])) # python second py_lrc = LogisticRegression(solver='sag', penalty='l2', max_iter=300) categoryTrainLabels = np.argmax(pyModel.trainLabels[trTrain, :], axis=1) py_lrc.fit(pyModel.trainPats[trTrain, :], categoryTrainLabels) py_predict = py_lrc.predict_proba(pyModel.trainPats[trTest, :]) categ_sep = -1 * np.diff(py_predict, axis=1) C0 = np.argwhere( np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 0) C1 = np.argwhere( np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 1) correctLabels = np.ones((len(categ_sep))) correctLabels[C1] = -1 py_roc[i] = roc_auc_score(correctLabels, categ_sep) print("PY AUC for iteration %i is: %.2f\n" % (i, py_roc[i])) i += 1 print("AVG AUC MAT,PY is: %.2f,%.2f\n" % (np.mean(mat_roc), np.mean(py_roc))) #mat_mean = np.mean(mat_roc) #py_mean = np.mean(py_roc) #all_ROC = np.concatenate((mat_roc[:,np.newaxis],py_roc[:,np.newaxis]),axis=1) #fullfilename = matDataDir + '/' + 'xvalresults.npy' #print("saving to %s\n" % fullfilename) #np.save(fullfilename,all_ROC) return mat_roc, py_roc
if subjectNum == 106: nRuns = 6 elif subjectDay == 2: nRuns = 9 elif subjectDay == 3: nRuns = 8 #nruns = len(cfg.session.Runs) - 1 for runId in np.arange(1,nRuns): print(runId) runDir = 'run'+str(runId)+'/' pyModelFn = utils.findNewestFile(pyDataDir, 'trainedModel_r'+str(runId)+'*_py.mat') # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # INSTEAD MAKE NEW MODEL lrc1 = LogisticRegression(penalty='l2', solver='saga',max_iter=300) lrc2 = LogisticRegression(penalty='l2', solver='saga',max_iter=300) lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate((lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate((lrc1.intercept_, lrc2.intercept_)).reshape(1, 2) newTrainedModel.trainPats = pyModel_train.trainPats newTrainedModel.trainLabels = pyModel_train.trainLabels # now load testing data for CV pyModelFn = utils.findNewestFile(pyDataDir, 'trainedModel_r'+str(runId + 1)+'*_py.mat')
def start_session(self, cfg): self.cfg = cfg validateSessionCfg(cfg) if cfg.session.sessionId is None or cfg.session.sessionId == '': cfg.session.sessionId = utils.dateStr30(time.localtime()) self.modelName = cfg.experiment.model self.id_fields.experimentId = cfg.experiment.experimentId self.id_fields.sessionId = cfg.session.sessionId self.id_fields.subjectNum = cfg.session.subjectNum self.id_fields.subjectDay = cfg.session.subjectDay # Set Directories self.dirs.dataDir = getSubjectDataDir(cfg.session.dataDir, cfg.session.subjectNum, cfg.session.subjectDay) print("Mask and patterns files being read from: {}".format( self.dirs.dataDir)) self.dirs.serverDataDir = os.path.join(cfg.session.serverDataDir, subjectDayDir) if not os.path.exists(self.dirs.serverDataDir): os.makedirs(self.dirs.serverDataDir) if cfg.session.buildImgPath: imgDirDate = datetime.datetime.now() dateStr = cfg.session.date.lower() if dateStr != 'now' and dateStr != 'today': try: imgDirDate = parser.parse(cfg.session.date) except ValueError as err: imgDirDate = datetime.datetime.now() resp = input( "Unable to parse date string, use today's date for image dir? Y/N [N]: " ) if resp.upper() != 'Y': return datestr = imgDirDate.strftime("%Y%m%d") imgDirName = "{}.{}.{}".format(datestr, cfg.session.subjectName, cfg.session.subjectName) self.dirs.imgDir = os.path.join(cfg.session.imgDir, imgDirName) else: self.dirs.imgDir = cfg.session.imgDir if not os.path.exists(self.dirs.imgDir): os.makedirs(self.dirs.imgDir) print("fMRI files being read from: {}".format(self.dirs.imgDir)) self.initFileNotifier(self.dirs.imgDir, cfg.session.watchFilePattern) # Open file for logging processing time measurements logtimeFilename = os.path.join(self.dirs.dataDir, "logtime.txt") self.logtimeFile = open(logtimeFilename, "a", 1) # linebuffered=1 initLogStr = "## Start Session: date:{} subNum:{} subDay:{} ##\n".format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), cfg.session.subjectNum, cfg.session.subjectDay) self.logtimeFile.write(initLogStr) # Load ROI mask - an array with 1s indicating the voxels of interest maskFileName = 'mask_' + str(cfg.session.subjectNum) + '_' + str( cfg.session.subjectDay) + '.mat' maskFileName = os.path.join(self.dirs.dataDir, maskFileName) temp = utils.loadMatFile(maskFileName) roi = temp.mask assert type(roi) == np.ndarray # find indices of non-zero elements in roi in row-major order but sorted by col-major order cfg.session.roiInds = utils.find(roi) cfg.session.roiDims = roi.shape cfg.session.nVoxels = cfg.session.roiInds.size print("Using mask {}".format(maskFileName)) replyId = self.rtatten.StartSession.remote(cfg.session) reply = ray.get(replyId) assert reply.success is True
def getCategSepData(subjectNum): rtAttenPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo' subjectDir = rtAttenPath + '/' + 'subject' + str(subjectNum) # this will be less annoying but will just have to average and remember padding zeros all_matlab_evidence = np.zeros((9, 100, 3)) all_python_evidence = np.zeros((9, 100, 3)) for d in np.arange(3): subjectDay = d + 1 matDataDir = rtAttenPath + '/' + 'subject' + str( subjectNum) + '/' + 'day' + str(subjectDay) pyDataDir = rtAttenPath + '/' + 'subject' + str( subjectNum) + '/' + 'day' + str(subjectDay) if subjectDay == 1: # then we have 7 runs nRuns = 7 if subjectNum == 106: nRuns = 6 elif subjectDay == 2: nRuns = 9 elif subjectDay == 3: nRuns = 8 #n_feedback_runs = nRuns - 1 # no feedback in first run print(np.arange(2, nRuns + 1)) #def getCategSepData(matDataDir, pyDataDir, runId): for RUN in np.arange(2, nRuns + 1): runId = RUN # because 0 index, skip first run runDir = 'run' + str(runId) + '/' # Check how well raw_sm_filt_z values match matPatternsFn = utils.findNewestFile( matDataDir, runDir + 'patternsdata_' + str(runId) + '*.mat') pyBlkGrp1Fn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId) + '_p1_*_py.mat') pyBlkGrp2Fn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId) + '_p2_*_py.mat') print("Getting rt classifer data from: Matlab {}, Python {} {}". format(matPatternsFn, pyBlkGrp1Fn, pyBlkGrp2Fn)) matPatterns = utils.loadMatFile(matPatternsFn) pyBlkGrp1 = utils.loadMatFile(pyBlkGrp1Fn) pyBlkGrp2 = utils.loadMatFile(pyBlkGrp2Fn) mat_nTRs = matPatterns.raw.shape[0] pyp1_nTRs = pyBlkGrp1.raw.shape[0] pyp2_nTRs = pyBlkGrp2.raw.shape[0] py_nTRs = pyp1_nTRs + pyp2_nTRs mat_nVoxels = matPatterns.raw.shape[1] py_nVoxels = pyBlkGrp1.raw.shape[1] if mat_nTRs != py_nTRs or mat_nVoxels != py_nVoxels: raise ValidationError( "Number of TRs or Voxels don't match: nTRs m{} p{}, nVoxels m{} p{}" .format(mat_nTRs, py_nTRs, mat_nVoxels, py_nVoxels)) matPatterns.categoryseparation relevant_TR = np.argwhere(np.sum(matPatterns.regressor, 0))[:, 0] RT_TR = relevant_TR[int(len(relevant_TR) / 2):] mat_RT_CS = matPatterns.categoryseparation[:, RT_TR][0, :] pyCombined_categoryseparation = np.full((py_nTRs, ), np.nan) pyCombined_categoryseparation[ 0:pyp1_nTRs] = pyBlkGrp1.categoryseparation pyCombined_categoryseparation[ pyp1_nTRs:] = pyBlkGrp2.categoryseparation py_RT_CS = pyCombined_categoryseparation[RT_TR] all_matlab_evidence[RUN - 2, :, d] = mat_RT_CS all_python_evidence[RUN - 2, :, d] = py_RT_CS # now you have 2 (100,) arrays with the category separation from that feedback run outfile = subjectDir + '/' 'realtimeevidence' np.savez(outfile, mat=all_matlab_evidence, py=all_python_evidence)
def train_test_python_classifier(subjectNum): ndays = 3 auc_score = np.zeros((8, ndays)) # save larger to fit all days in RT_cs = np.zeros((8, ndays)) dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/' subjectDir = dataPath + '/' + 'subject' + str(subjectNum) print(subjectNum) all_python_evidence = np.zeros( (9, 100, 3)) # time course of classifier evidence for d in np.arange(ndays): print(d) subjectDay = d + 1 configFile = dataPath + 'subject' + str( subjectNum) + '/usedscripts/PennCfg_Day' + str( subjectDay) + '.toml' cfg = loadConfigFile(configFile) subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str( cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay) pyDataDir = subjectDayDir if subjectDay == 1: nRuns = 7 print('here') if str(subjectNum) == '106': nRuns = 6 print('here') else: print(subjectNum) if subjectNum == 106: print('finding it here') print('nothere') elif subjectDay == 2: nRuns = 9 elif subjectDay == 3: nRuns = 8 print('total number of runs: %i' % nRuns) print(subjectNum) print(subjectDay) print(nRuns) #nruns = len(cfg.session.Runs) - 1 #nruns = len(cfg.session.Runs) - 1 for r in np.arange(0, nRuns - 1): runId = r + 1 # now it goes from 0 : n Runs - 1 print(runId) runDir = 'run' + str(runId) + '/' pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # INSTEAD MAKE NEW MODEL lrc1 = LogisticRegression(penalty='l2', solver='saga', max_iter=300) lrc2 = LogisticRegression(penalty='l2', solver='saga', max_iter=300) lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0]) lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1]) newTrainedModel = utils.MatlabStructDict({}, 'trainedModel') newTrainedModel.trainedModel = StructDict({}) newTrainedModel.trainedModel.weights = np.concatenate( (lrc1.coef_.T, lrc2.coef_.T), axis=1) newTrainedModel.trainedModel.biases = np.concatenate( (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2) newTrainedModel.trainPats = pyModel_train.trainPats newTrainedModel.trainLabels = pyModel_train.trainLabels # now load testing data for CV pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat') pyModel_test = utils.loadMatFile(pyModelFn) nTRTest = 100 py_test_data = pyModel_test.trainPats[nTRTest:, :] test_labels = pyModel_test.trainLabels[nTRTest:, :] py_cs = np.zeros((nTRTest, 1)) activations = np.zeros((nTRTest, 2)) for t in np.arange(nTRTest): _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, py_test_data[t, :], test_labels[t, :]) activations[t, :] = activations_py fpr2, tpr2, thresholds2 = metrics.roc_curve(test_labels[:, 1], activations[:, 1] - activations[:, 0], pos_label=1) auc_score[r, d] = metrics.auc( fpr2, tpr2 ) # auc of this data applied to the first half of the next run # now apply to block data-- realtime values pyDataFn = utils.findNewestFile( pyDataDir, 'blkGroup_r' + str(runId + 1) + '_p2_*_py.mat') pyData_test = utils.loadMatFile(pyDataFn) regressor = pyData_test.regressor TRs_to_test = np.argwhere(np.sum(regressor, axis=0)) RT_data = pyData_test.raw_sm_filt_z[TRs_to_test, :] RT_regressor = regressor[:, TRs_to_test].T.reshape(nTRTest, 2) # now do the same thing and test for every TR --> get category separation cs = np.zeros((nTRTest, 1)) for t in np.arange(nTRTest): categ = np.flatnonzero(RT_regressor[t, :]) otherCateg = (categ + 1) % 2 _, _, _, activations_py = Test_L2_RLR_realtime( newTrainedModel, RT_data[t, :].flatten(), RT_regressor[t, :]) cs[t] = activations_py[categ] - activations_py[otherCateg] # take average for this run RT_cs[r, d] = np.mean(cs) all_python_evidence[r, :, d] = cs[:, 0] outfile = subjectDir + '/' 'offlineAUC_RTCS' np.savez(outfile, auc=auc_score, cs=RT_cs, all_ev=all_python_evidence)
def runRun(self, runId, scanNum=-1): # Setup output directory and output file runDataDir = os.path.join(self.dirs.dataDir, 'run' + str(runId)) if not os.path.exists(runDataDir): os.makedirs(runDataDir) outputInfo = StructDict() outputInfo.runId = runId outputInfo.classOutputDir = os.path.join(runDataDir, 'classoutput') if not os.path.exists(outputInfo.classOutputDir): os.makedirs(outputInfo.classOutputDir) outputInfo.logFilename = os.path.join(runDataDir, 'fileprocessing_py.txt') outputInfo.logFileHandle = open(outputInfo.logFilename, 'w+') if self.webpipes is not None: outputInfo.webpipes = self.webpipes if self.webUseRemoteFiles: outputInfo.webUseRemoteFiles = True remoteRunDataDir = os.path.join(self.dirs.remoteDataDir, 'run' + str(runId)) outputInfo.remoteClassOutputDir = os.path.join( remoteRunDataDir, 'classoutput') outputInfo.remoteLogFilename = os.path.join( remoteRunDataDir, 'fileprocessing_py.txt') # Get patterns design file for this run patterns = None if self.webUseRemoteFiles and self.cfg.session.getPatternsFromControlRoom: fileRegex = getPatternsFileRegex(self.cfg.session, self.dirs.remoteDataDir, runId, addRunDir=True) getNewestFileCmd = wcutils.getNewestFileReqStruct(fileRegex) retVals = wcutils.clientWebpipeCmd(self.webpipes, getNewestFileCmd) if retVals.statusCode != 200: raise RequestError('runRun: statusCode not 200: {}'.format( retVals.statusCode)) patterns = retVals.data logging.info("Using Remote Patterns file: %s", retVals.filename) print("Using remote patterns {}".format(retVals.filename)) else: patterns, filename = getLocalPatternsFile(self.cfg.session, self.dirs.dataDir, runId) print("Using patterns {}".format(filename)) run = createRunConfig(self.cfg.session, patterns, runId, scanNum) validateRunCfg(run) self.id_fields.runId = run.runId logging.log(DebugLevels.L4, "Run: %d, scanNum %d", runId, run.scanNum) if self.cfg.session.rtData: # Check if images already exist and warn and ask to continue firstFileName = self.getDicomFileName(run.scanNum, 1) if os.path.exists(firstFileName): logging.log(DebugLevels.L3, "Dicoms already exist") skipCheck = self.cfg.session.skipConfirmForReprocess if skipCheck is None or skipCheck is False: resp = input( 'Files with this scan number already exist. Do you want to continue? Y/N [N]: ' ) if resp.upper() != 'Y': outputInfo.logFileHandle.close() return else: logging.log(DebugLevels.L3, "Dicoms - waiting for") elif self.cfg.session.replayMatFileMode or self.cfg.session.validate: idx = getRunIndex(self.cfg.session, runId) if idx >= 0 and len(self.cfg.session.validationModels) > idx: run.validationModel = os.path.join( self.dirs.dataDir, self.cfg.session.validationModels[idx]) else: raise ValidationError( "Insufficient config runs or validationModels specified: " "runId {}, validationModel idx {}", runId, idx) if idx >= 0 and len(self.cfg.session.validationData) > idx: run.validationDataFile = os.path.join( self.dirs.dataDir, self.cfg.session.validationData[idx]) else: raise ValidationError( "Insufficient config runs or validationDataFiles specified: " "runId {}, validationData idx {}", runId, idx) # ** Experimental Parameters ** # run.seed = time.time() if run.runId > 1: run.rtfeedback = 1 else: run.rtfeedback = 0 runCfg = copy_toplevel(run) reply = self.sendCmdExpectSuccess(MsgEvent.StartRun, runCfg) outputReplyLines(reply.fields.outputlns, outputInfo) if self.cfg.session.replayMatFileMode and not self.cfg.session.rtData: # load previous patterns data for this run p = utils.loadMatFile(run.validationDataFile) run.replay_data = p.patterns.raw # Begin BlockGroups (phases) for blockGroup in run.blockGroups: self.id_fields.blkGrpId = blockGroup.blkGrpId blockGroupCfg = copy_toplevel(blockGroup) logging.log(DebugLevels.L4, "BlkGrp: %d", blockGroup.blkGrpId) reply = self.sendCmdExpectSuccess(MsgEvent.StartBlockGroup, blockGroupCfg) outputReplyLines(reply.fields.outputlns, outputInfo) for block in blockGroup.blocks: self.id_fields.blockId = block.blockId blockCfg = copy_toplevel(block) logging.log(DebugLevels.L4, "Blk: %d", block.blockId) reply = self.sendCmdExpectSuccess(MsgEvent.StartBlock, blockCfg) outputReplyLines(reply.fields.outputlns, outputInfo) for TR in block.TRs: self.id_fields.trId = TR.trId fileNum = TR.vol + run.disdaqs // run.TRTime logging.log(DebugLevels.L3, "TR: %d, fileNum %d", TR.trId, fileNum) if self.cfg.session.rtData: # Assuming the output file volumes are still 1's based trVolumeData = self.getNextTRData(run, fileNum) if trVolumeData is None: if TR.trId == 0: errStr = "First TR {} of run {} missing data, aborting...".format( TR.trId, runId) raise RTError(errStr) logging.warn( "TR {} missing data, sending empty data". format(TR.trId)) TR.data = np.full((self.cfg.session.nVoxels), np.nan) reply = self.sendCmdExpectSuccess( MsgEvent.TRData, TR) continue TR.data = applyMask(trVolumeData, self.cfg.session.roiInds) else: # TR.vol is 1's based to match matlab, so we want vol-1 for zero based indexing TR.data = run.replay_data[TR.vol - 1] processingStartTime = time.time() imageAcquisitionTime = 0.0 pulseBroadcastTime = 0.0 trStartTime = 0.0 gotTTLTime = False if (self.cfg.session.enforceDeadlines is not None and self.cfg.session.enforceDeadlines is True): # capture TTL pulse from scanner to calculate next deadline trStartTime = self.ttlPulseClient.getTimestamp() if trStartTime == 0 or imageAcquisitionTime > run.TRTime: # Either no TTL Pulse time signal or stale time signal # Approximate trStart as current time minus 500ms # because scan reconstruction takes about 500ms gotTTLTime = False trStartTime = time.time() - 0.5 # logging.info("Approx TR deadline: {}".format(trStartTime)) else: gotTTLTime = True imageAcquisitionTime = time.time() - trStartTime pulseBroadcastTime = trStartTime - self.ttlPulseClient.getServerTimestamp( ) # logging.info("TTL TR deadline: {}".format(trStartTime)) # Deadline is TR_Start_Time + time between TRs + # clockSkew adjustment - 1/2 Max Net Round_Trip_Time - # Min RTT because clock skew calculation can be off # by the RTT used for calculation which is Min RTT. TR.deadline = (trStartTime + self.cfg.clockSkew + run.TRTime - (0.5 * self.cfg.maxRTT) - self.cfg.minRTT) reply = self.sendCmdExpectSuccess(MsgEvent.TRData, TR) processingEndTime = time.time() missedDeadline = False if (reply.fields.missedDeadline is not None and reply.fields.missedDeadline is True): # TODO - store reply.fields.threadId in order to get completed reply later # TODO - add a message type that retrieves previous thread results missedDeadline = True else: # classification result outputPredictionFile(reply.fields.predict, outputInfo) # log the TR processing time serverProcessTime = processingEndTime - processingStartTime elapsedTRTime = 0.0 if gotTTLTime is True: elapsedTRTime = time.time() - trStartTime logStr = "TR:{}:{}:{:03}, fileNum {}, server_process_time {:.3f}s, " \ "elapsedTR_time {:.3f}s, image_time {:.3f}s, " \ "pulse_time {:.3f}s, gotTTLPulse {}, missed_deadline {}, " \ "dicom_arrival {:.5f}" \ .format(runId, block.blockId, TR.trId, fileNum, serverProcessTime, elapsedTRTime, imageAcquisitionTime, pulseBroadcastTime, gotTTLTime, missedDeadline, processingStartTime) logging.log(DebugLevels.L3, logStr) outputReplyLines(reply.fields.outputlns, outputInfo) del self.id_fields.trId # End Block if self.webpipes is not None: cmd = {'cmd': 'subjectDisplay', 'bgcolor': '#808080'} wcutils.clientWebpipeCmd(self.webpipes, cmd) reply = self.sendCmdExpectSuccess(MsgEvent.EndBlock, blockCfg) outputReplyLines(reply.fields.outputlns, outputInfo) del self.id_fields.blockId reply = self.sendCmdExpectSuccess(MsgEvent.EndBlockGroup, blockGroupCfg) outputReplyLines(reply.fields.outputlns, outputInfo) # self.retrieveBlkGrp(self.id_fields.sessionId, self.id_fields.runId, self.id_fields.blkGrpId) del self.id_fields.blkGrpId # End Run if self.webpipes is not None: # send instructions to subject window display cmd = { 'cmd': 'subjectDisplay', 'text': 'Waiting for next run to start...' } wcutils.clientWebpipeCmd(self.webpipes, cmd) # Train the model for this Run trainCfg = StructDict() if run.runId == 1: trainCfg.blkGrpRefs = [{ 'run': 1, 'phase': 1 }, { 'run': 1, 'phase': 2 }] elif run.runId == 2: trainCfg.blkGrpRefs = [{ 'run': 1, 'phase': 2 }, { 'run': 2, 'phase': 1 }] else: trainCfg.blkGrpRefs = [{ 'run': run.runId - 1, 'phase': 1 }, { 'run': run.runId, 'phase': 1 }] outlns = [] outlns.append('*********************************************') outlns.append("Train Model {} {}".format(trainCfg.blkGrpRefs[0], trainCfg.blkGrpRefs[1])) outputReplyLines(outlns, outputInfo) processingStartTime = time.time() reply = self.sendCmdExpectSuccess(MsgEvent.TrainModel, trainCfg) processingEndTime = time.time() # log the model generation time logStr = "Model:{} training time {:.3f}s\n".format( runId, processingEndTime - processingStartTime) logging.log(DebugLevels.L3, logStr) outputReplyLines(reply.fields.outputlns, outputInfo) reply = self.sendCmdExpectSuccess(MsgEvent.EndRun, runCfg) outputReplyLines(reply.fields.outputlns, outputInfo) if self.cfg.session.retrieveServerFiles: self.retrieveRunFiles(runId) del self.id_fields.runId outputInfo.logFileHandle.close()
def validateModelsMatlabPython(subjectNum, subjectDay): dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/' configFile = dataPath + 'subject' + str( subjectNum) + '/usedscripts/PennCfg_Day' + str(subjectDay) + '.toml' cfg = loadConfigFile(configFile) #subjectDayDir = getSubjectDayDir(cfg.session.subjectNum, cfg.session.subjectDay) subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str( cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay) matDataDir = subjectDayDir #os.path.join(cfg.session.dataDir, subjectDayDir) pyDataDir = matDataDir all_vals = np.zeros((100, 2, cfg.session.Runs[-1] - 1)) for runId in np.arange(1, cfg.session.Runs[-1]): runDir = 'run' + str(runId) + '/' matModelFn = utils.findNewestFile( matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat') matModel_train = utils.loadMatFile(matModelFn) # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes] # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run # now load testing data from the next run to test it on pyModel_train = utils.loadMatFile(pyModelFn) # now load the models to test on matModelFn = utils.findNewestFile( matDataDir, 'run' + str(runId + 1) + '/' + 'trainedModel_' + str(runId + 1) + '*.mat') pyModelFn = utils.findNewestFile( pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat') matModel_test = utils.loadMatFile(matModelFn) pyModel_test = utils.loadMatFile(pyModelFn) nTRTest = 100 mat_test_data = matModel_test.trainPats[nTRTest:, :] py_test_data = pyModel_test.trainPats[nTRTest:, :] test_labels = matModel_test.trainLabels[nTRTest:, :] mat_cs = np.zeros((nTRTest, 1)) py_cs = np.zeros((nTRTest, 1)) for t in np.arange(nTRTest): categ = np.flatnonzero(test_labels[t, :]) otherCateg = (categ + 1) % 2 _, _, _, activations_mat = Test_L2_RLR_realtime( matModel_train, mat_test_data[t, :], test_labels[t, :]) mat_cs[t] = activations_mat[categ] - activations_mat[otherCateg] _, _, _, activations_py = Test_L2_RLR_realtime( pyModel_train, py_test_data[t, :], test_labels[t, :]) py_cs[t] = activations_py[categ] - activations_py[otherCateg] all_vals[:, 0, runId - 1] = mat_cs[:, 0] all_vals[:, 1, runId - 1] = py_cs[:, 0] all_mat_ev = np.reshape(all_vals[:, 0, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) all_py_ev = np.reshape(all_vals[:, 1, :], ((cfg.session.Runs[-1] - 1) * 100, 1)) #fix,ax = plt.subplots(figsize=(12,7)) #plt.plot(all_mat_ev,all_py_ev, '.') #plt.plot([-5,5],[-5,5], '--k') #plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev,all_py_ev)[0][0])) #plt.xlabel('MATLAB') #plt.ylabel('PYTHON') #plt.xlim([-1.5,1.5]) #plt.ylim([-1.5,1.5]) #plt.show() plt.figure() plt.hist(all_mat_ev, alpha=0.6, label='matlab') plt.hist(all_py_ev, alpha=0.6, label='python') plt.xlabel('Correct - Incorrect Activation') plt.ylabel('Frequency') plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0])) plt.legend() plt.show()