def process_token(self, ref, token): dataset = token['input']['dataset'] network = token['input']['network'] method = token['input']['method'] specific = token['input']['specific'] repeat = token['input']['repeat'] fold = token['input']['fold'] shuffleNr = token['input']['shuffleNr'] print 'dataset:', dataset print 'network', network print 'method', method print 'specific', specific print 'repeat', repeat print 'fold', fold print 'shuffleNr', shuffleNr innerCV = False if 'innerFold' in token['input']: innerCV = True innerfold = token['input']['innerFold'] innerrepeat = token['input']['innerRepeat'] (data, net, featureSelector, classifiers, Dataset2Time) = SetUpRun(dataset, network, method) #For the outer loop wecan use RunInstance immediatley, which will split the data into training and test if not innerCV: if specific == True or specific == False: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector, specific, classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time, specific) else: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector, specific, classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time)
(data, net, featureSelector, classifiers, Dataset2Time) = \ SetUpRun(item[0], item[1][1], item[1][0][0], datafile = os.path.basename(os.environ['DATAPATH']), datapath=dataDir) for token in tokens: dataset = token['input']['dataset'] network = token['input']['network'] method = token['input']['method'] repeat = token['input']['repeat'] fold = token['input']['fold'] print('dataset:', dataset) print('network', network) print('method', method) print('repeat', repeat) print('fold', fold) (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector, None, classifiers, repeat, 5, fold, None, Dataset2Time, None) token['output'] = (dataName, featureExtractorproductName, netName, None, shuffle, featureExtractor, AucAndCi) # Store raw output data in iRODS try: filebase = item[0] + '_' + item[1][0][0] + '_' + item[1][1] except: filebase = item[0] + '_' + item[1][0][0] with open(resultsDir + '/' + filebase + '_raw.json', 'w') as outfile: json.dump(tokens, outfile) #upload raw data print('Write raw data to iRODS: ' + coll.path + '/' + filebase + '_raw.json')
def localProcess(token, db=None): """ Processes the token. Number of folds is fixed to 5! db : specific for couchDB, leave empty or adopt the section after NOTE for a specific output. """ dataset = token['input']['dataset'] network = token['input']['network'] method = token['input']['method'] specific = token['input']['specific'] repeat = token['input']['repeat'] fold = token['input']['fold'] shuffleNr = token['input']['shuffleNr'] print 'dataset:', dataset print 'network', network print 'method', method print 'specific', specific print 'repeat', repeat print 'fold', fold print 'shuffleNr', shuffleNr innerCV = False if 'innerFold' in token['input']: innerCV = True innerfold = token['input']['innerFold'] innerrepeat = token['input']['innerRepeat'] (data, net, featureSelector, classifiers, Dataset2Time) = SetUpRun(dataset, network, method) if not innerCV: if specific == True or specific == False: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector, specific, classifiers, repeat, 5, fold, shuffleNr, Dataset2Time, specific) else: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector, specific, classifiers, repeat, 5, fold, shuffleNr, Dataset2Time) else: dsOuterTraining, dsOuterTesting, _ = splitData(data, repeat, fold, 5) if specific == True or specific == False: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance( dsOuterTraining, net, featureSelector, specific, classifiers, innerrepeat, 5, innerfold, shuffleNr, Dataset2Time, specific) else: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector, specific, classifiers, innerrepeat, 5, innerfold, shuffleNr, Dataset2Time) token['output'] = (dataName, featureExtractorproductName, netName, shuffleNr, shuffle, featureExtractor, AucAndCi) token['done'] = 1 token['lock'] = 1 #NOTE: specific for couchDB if db != None: db.update([token]) return token
def process_token(self, ref, token): dataset = token['input']['dataset'] network = token['input']['network'] method = token['input']['method'] specific = token['input']['specific'] repeat = token['input']['repeat'] fold = token['input']['fold'] shuffleNr = token['input']['shuffleNr'] print 'dataset:', dataset print 'network', network print 'method', method print 'specific', specific print 'repeat', repeat print 'fold', fold print 'shuffleNr', shuffleNr innerCV = False if 'innerFold' in token['input']: innerCV = True innerfold = token['input']['innerFold'] innerrepeat = token['input']['innerRepeat'] (data, net, featureSelector, classifiers, Dataset2Time) = SetUpRun(dataset, network, method) #For the outer loop wecan use RunInstance immediatley, which will split the data into training and test if not innerCV: if specific == True or specific == False: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance( data, net, featureSelector, specific, classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time, specific) else: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector, specific, classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time) #For the inner loop we first have to split the data and then pass the training data to RunInstance else: dsOuterTraining, dsOuterTesting, _ = splitData( data, repeat, fold, self.nrFolds) print 'dsOuterTraining', dsOuterTraining print 'dsOuterTesting', dsOuterTesting if specific == True or specific == False: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector, specific, classifiers, innerrepeat, self.nrFolds, innerfold, shuffleNr, Dataset2Time, specific) else: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector, specific, classifiers, innerrepeat, self.nrFolds, innerfold, shuffleNr, Dataset2Time) # close the token after the computations, setting flag done token = self.modifier.close(token) # store the featureExtractor and AUCs in a new dictionary output token['output'] = (dataName, featureExtractorproductName, netName, shuffleNr, shuffle, featureExtractor, AucAndCi) self.db[token['_id']] = token
def process_token(self, ref, token): dataset = token['input']['dataset'] network = token['input']['network'] method = token['input']['method'] specific = token['input']['specific'] repeat = token['input']['repeat'] fold = token['input']['fold'] shuffleNr = token['input']['shuffleNr'] print 'dataset:', dataset print 'network', network print 'method', method print 'specific', specific print 'repeat', repeat print 'fold', fold print 'shuffleNr', shuffleNr innerCV = False if 'innerFold' in token['input']: innerCV = True innerfold = token['input']['innerFold'] innerrepeat = token['input']['innerRepeat'] (data, net, featureSelector, classifiers, Dataset2Time) = SetUpRun(dataset, network, method) if not innerCV: if specific == True or specific == False: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance( data, net, featureSelector, specific, classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time, specific) else: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector, specific, classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time) else: dsOuterTraining, dsOuterTesting, _ = splitData( data, repeat, fold, self.nrFolds) print 'dsOuterTraining', dsOuterTraining print 'dsOuterTesting', dsOuterTesting if specific == True or specific == False: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector, specific, classifiers, innerrepeat, self.nrFolds, innerfold, shuffleNr, Dataset2Time, specific) else: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector, specific, classifiers, innerrepeat, self.nrFolds, innerfold, shuffleNr, Dataset2Time) token = self.modifier.close(token) token['output'] = (dataName, featureExtractorproductName, netName, shuffleNr, shuffle, featureExtractor, AucAndCi) self.db[token['_id']] = token
if specific == True or specific == False: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector, specific, classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time, specific) else: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector, specific, classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time) #For the inner loop we first have to split the data and then pass the training data to RunInstance else: dsOuterTraining, dsOuterTesting,_ = splitData(data, repeat, fold, self.nrFolds) print 'dsOuterTraining', dsOuterTraining print 'dsOuterTesting', dsOuterTesting if specific == True or specific == False: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector, specific, classifiers, innerrepeat, self.nrFolds, innerfold, shuffleNr, Dataset2Time, specific) else: (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector, specific, classifiers, innerrepeat, self.nrFolds, innerfold, shuffleNr, Dataset2Time) # close the token after the computations, setting flag done token = self.modifier.close(token) # store the featureExtractor and AUCs in a new dictionary output token['output'] = (dataName, featureExtractorproductName, netName, shuffleNr, shuffle, featureExtractor, AucAndCi) self.db[token['_id']] = token def cleanup_run(self, *kargs, **kvargs): pass