예제 #1
0
    def process_token(self, ref, token):
       
        dataset = token['input']['dataset']
        network = token['input']['network']
        method = token['input']['method']
        specific = token['input']['specific']
        repeat = token['input']['repeat']
        fold = token['input']['fold']
        shuffleNr =  token['input']['shuffleNr']

        print 'dataset:', dataset
        print 'network', network
        print 'method', method
        print 'specific', specific
        print 'repeat', repeat
        print 'fold', fold
        print 'shuffleNr', shuffleNr

        innerCV = False
        if 'innerFold' in token['input']:
            innerCV = True
            innerfold = token['input']['innerFold']
            innerrepeat = token['input']['innerRepeat']             

        (data, net, featureSelector, classifiers, Dataset2Time) = SetUpRun(dataset, network, method)

        #For the outer loop wecan use RunInstance immediatley, which will split the data into training and test
        if not innerCV:
            if specific == True or specific == False:
                (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) =  
                    RunInstance(data, net, featureSelector, specific, 
                        classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time, specific)
            else: 
                (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) =  
                    RunInstance(data, net, featureSelector, specific, 
                        classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time) 
예제 #2
0
    (data, net, featureSelector, classifiers, Dataset2Time) = \
        SetUpRun(item[0], item[1][1], item[1][0][0], datafile = os.path.basename(os.environ['DATAPATH']), datapath=dataDir)
    for token in tokens:
        dataset = token['input']['dataset']
        network = token['input']['network']
        method = token['input']['method']
        repeat = token['input']['repeat']
        fold = token['input']['fold']
        print('dataset:', dataset)
        print('network', network)
        print('method', method)
        print('repeat', repeat)
        print('fold', fold)
        (dataName, featureExtractorproductName, netName, shuffle,
         featureExtractor, AucAndCi) = RunInstance(data, net, featureSelector,
                                                   None, classifiers, repeat,
                                                   5, fold, None, Dataset2Time,
                                                   None)
        token['output'] = (dataName, featureExtractorproductName, netName,
                           None, shuffle, featureExtractor, AucAndCi)

    # Store raw output data in iRODS
    try:
        filebase = item[0] + '_' + item[1][0][0] + '_' + item[1][1]
    except:
        filebase = item[0] + '_' + item[1][0][0]
    with open(resultsDir + '/' + filebase + '_raw.json', 'w') as outfile:
        json.dump(tokens, outfile)

    #upload raw data
    print('Write raw data to iRODS: ' + coll.path + '/' + filebase +
          '_raw.json')
예제 #3
0
def localProcess(token, db=None):
    """
    Processes the token. Number of folds is fixed to 5!
    db  :   specific for couchDB, leave empty or adopt the section after NOTE for a specific output.
    """

    dataset = token['input']['dataset']
    network = token['input']['network']
    method = token['input']['method']
    specific = token['input']['specific']
    repeat = token['input']['repeat']
    fold = token['input']['fold']
    shuffleNr = token['input']['shuffleNr']

    print 'dataset:', dataset
    print 'network', network
    print 'method', method
    print 'specific', specific
    print 'repeat', repeat
    print 'fold', fold
    print 'shuffleNr', shuffleNr

    innerCV = False
    if 'innerFold' in token['input']:
        innerCV = True
        innerfold = token['input']['innerFold']
        innerrepeat = token['input']['innerRepeat']

    (data, net, featureSelector, classifiers,
     Dataset2Time) = SetUpRun(dataset, network, method)

    if not innerCV:
        if specific == True or specific == False:
            (dataName, featureExtractorproductName, netName, shuffle,
             featureExtractor,
             AucAndCi) = RunInstance(data, net, featureSelector, specific,
                                     classifiers, repeat, 5, fold, shuffleNr,
                                     Dataset2Time, specific)
        else:
            (dataName, featureExtractorproductName, netName, shuffle,
             featureExtractor,
             AucAndCi) = RunInstance(data, net, featureSelector, specific,
                                     classifiers, repeat, 5, fold, shuffleNr,
                                     Dataset2Time)
    else:
        dsOuterTraining, dsOuterTesting, _ = splitData(data, repeat, fold, 5)
        if specific == True or specific == False:
            (dataName, featureExtractorproductName, netName,
             shuffle, featureExtractor, AucAndCi) = RunInstance(
                 dsOuterTraining, net, featureSelector, specific, classifiers,
                 innerrepeat, 5, innerfold, shuffleNr, Dataset2Time, specific)
        else:
            (dataName, featureExtractorproductName, netName, shuffle,
             featureExtractor,
             AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector,
                                     specific, classifiers, innerrepeat, 5,
                                     innerfold, shuffleNr, Dataset2Time)

    token['output'] = (dataName, featureExtractorproductName, netName,
                       shuffleNr, shuffle, featureExtractor, AucAndCi)
    token['done'] = 1
    token['lock'] = 1
    #NOTE: specific for couchDB
    if db != None:
        db.update([token])

    return token
예제 #4
0
    def process_token(self, ref, token):

        dataset = token['input']['dataset']
        network = token['input']['network']
        method = token['input']['method']
        specific = token['input']['specific']
        repeat = token['input']['repeat']
        fold = token['input']['fold']
        shuffleNr = token['input']['shuffleNr']

        print 'dataset:', dataset
        print 'network', network
        print 'method', method
        print 'specific', specific
        print 'repeat', repeat
        print 'fold', fold
        print 'shuffleNr', shuffleNr

        innerCV = False
        if 'innerFold' in token['input']:
            innerCV = True
            innerfold = token['input']['innerFold']
            innerrepeat = token['input']['innerRepeat']

        (data, net, featureSelector, classifiers,
         Dataset2Time) = SetUpRun(dataset, network, method)

        #For the outer loop wecan use RunInstance immediatley, which will split the data into training and test
        if not innerCV:
            if specific == True or specific == False:
                (dataName, featureExtractorproductName, netName, shuffle,
                 featureExtractor, AucAndCi) = RunInstance(
                     data, net, featureSelector, specific, classifiers, repeat,
                     self.nrFolds, fold, shuffleNr, Dataset2Time, specific)
            else:
                (dataName, featureExtractorproductName, netName, shuffle,
                 featureExtractor,
                 AucAndCi) = RunInstance(data, net, featureSelector, specific,
                                         classifiers, repeat, self.nrFolds,
                                         fold, shuffleNr, Dataset2Time)
        #For the inner loop we first have to split the data and then pass the training data to RunInstance
        else:
            dsOuterTraining, dsOuterTesting, _ = splitData(
                data, repeat, fold, self.nrFolds)
            print 'dsOuterTraining', dsOuterTraining
            print 'dsOuterTesting', dsOuterTesting
            if specific == True or specific == False:
                (dataName, featureExtractorproductName, netName, shuffle,
                 featureExtractor,
                 AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector,
                                         specific, classifiers, innerrepeat,
                                         self.nrFolds, innerfold, shuffleNr,
                                         Dataset2Time, specific)
            else:
                (dataName, featureExtractorproductName, netName, shuffle,
                 featureExtractor,
                 AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector,
                                         specific, classifiers, innerrepeat,
                                         self.nrFolds, innerfold, shuffleNr,
                                         Dataset2Time)

        # close the token after the computations, setting flag done
        token = self.modifier.close(token)
        # store the featureExtractor and AUCs in a new dictionary output
        token['output'] = (dataName, featureExtractorproductName, netName,
                           shuffleNr, shuffle, featureExtractor, AucAndCi)
        self.db[token['_id']] = token
예제 #5
0
    def process_token(self, ref, token):

        dataset = token['input']['dataset']
        network = token['input']['network']
        method = token['input']['method']
        specific = token['input']['specific']
        repeat = token['input']['repeat']
        fold = token['input']['fold']
        shuffleNr = token['input']['shuffleNr']

        print 'dataset:', dataset
        print 'network', network
        print 'method', method
        print 'specific', specific
        print 'repeat', repeat
        print 'fold', fold
        print 'shuffleNr', shuffleNr

        innerCV = False
        if 'innerFold' in token['input']:
            innerCV = True
            innerfold = token['input']['innerFold']
            innerrepeat = token['input']['innerRepeat']

        (data, net, featureSelector, classifiers,
         Dataset2Time) = SetUpRun(dataset, network, method)
        if not innerCV:
            if specific == True or specific == False:
                (dataName, featureExtractorproductName, netName, shuffle,
                 featureExtractor, AucAndCi) = RunInstance(
                     data, net, featureSelector, specific, classifiers, repeat,
                     self.nrFolds, fold, shuffleNr, Dataset2Time, specific)
            else:
                (dataName, featureExtractorproductName, netName, shuffle,
                 featureExtractor,
                 AucAndCi) = RunInstance(data, net, featureSelector, specific,
                                         classifiers, repeat, self.nrFolds,
                                         fold, shuffleNr, Dataset2Time)
        else:
            dsOuterTraining, dsOuterTesting, _ = splitData(
                data, repeat, fold, self.nrFolds)
            print 'dsOuterTraining', dsOuterTraining
            print 'dsOuterTesting', dsOuterTesting
            if specific == True or specific == False:
                (dataName, featureExtractorproductName, netName, shuffle,
                 featureExtractor,
                 AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector,
                                         specific, classifiers, innerrepeat,
                                         self.nrFolds, innerfold, shuffleNr,
                                         Dataset2Time, specific)
            else:
                (dataName, featureExtractorproductName, netName, shuffle,
                 featureExtractor,
                 AucAndCi) = RunInstance(dsOuterTraining, net, featureSelector,
                                         specific, classifiers, innerrepeat,
                                         self.nrFolds, innerfold, shuffleNr,
                                         Dataset2Time)

        token = self.modifier.close(token)
        token['output'] = (dataName, featureExtractorproductName, netName,
                           shuffleNr, shuffle, featureExtractor, AucAndCi)
        self.db[token['_id']] = token
예제 #6
0
            if specific == True or specific == False:
                (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) =  
                    RunInstance(data, net, featureSelector, specific, 
                        classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time, specific)
            else: 
                (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) =  
                    RunInstance(data, net, featureSelector, specific, 
                        classifiers, repeat, self.nrFolds, fold, shuffleNr, Dataset2Time) 
        #For the inner loop we first have to split the data and then pass the training data to RunInstance
        else:
            dsOuterTraining, dsOuterTesting,_ = splitData(data, repeat, fold, self.nrFolds)
            print 'dsOuterTraining', dsOuterTraining
            print 'dsOuterTesting', dsOuterTesting
            if specific == True or specific == False:
                (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) =  
                    RunInstance(dsOuterTraining, net, featureSelector, specific, 
                        classifiers, innerrepeat, self.nrFolds, innerfold, shuffleNr, Dataset2Time, specific)
            else:
                (dataName, featureExtractorproductName, netName, shuffle, featureExtractor, AucAndCi) =  
                    RunInstance(dsOuterTraining, net, featureSelector, specific, 
                        classifiers, innerrepeat, self.nrFolds, innerfold, shuffleNr, Dataset2Time)

        # close the token after the computations, setting flag done            
        token = self.modifier.close(token)
        # store the featureExtractor and AUCs in a new dictionary output
        token['output'] = 
            (dataName, featureExtractorproductName, netName, shuffleNr, shuffle, featureExtractor, AucAndCi)
        self.db[token['_id']] = token

    def cleanup_run(self, *kargs, **kvargs):
        pass