Ejemplo n.º 1
0
    def makePrediction(self,
                       model,
                       testdatacollection,
                       outputDir,
                       ident='',
                       store_labels=False,
                       monkey_class=''):
        import numpy as np
        from root_numpy import array2root
        import os
        monkey_class_obj = None
        if monkey_class:
            module, classname = tuple(monkey_class.split(':'))
            _temp = __import__(module, globals(), locals(), [classname], -1)
            monkey_class_obj = getattr(_temp, classname)

        outputDir = os.path.abspath(outputDir)

        if len(ident) > 0:
            ident = '_' + ident

        self.__sourceroots = []
        self.__predictroots = []
        self.metrics = []

        fullnumpyarray = np.array([])

        for i in range(len(testdatacollection.samples)):
            sample = testdatacollection.samples[i]
            originroot = testdatacollection.originRoots[i]
            outrootfilename = os.path.splitext(
                os.path.basename(originroot))[0] + '_predict' + ident + '.root'

            fullpath = testdatacollection.getSamplePath(sample)
            if monkey_class_obj is not None:
                testdatacollection.dataclass = monkey_class_obj()
            td = testdatacollection.dataclass

            td.readIn(fullpath)

            if hasattr(td, 'customlabels'):
                import copy
                formatstring = copy.deepcopy(td.customlabels)

            else:
                truthclasses = td.getUsedTruth()
                formatstring = []
                if len(truthclasses) > 0 and len(truthclasses[0]) > 0:
                    formatstring = [
                        'prob_%s%s' % (i, ident) for i in truthclasses
                    ]
                regressionclasses = []
                if hasattr(td, 'regressiontargetclasses'):
                    regressionclasses = td.regressiontargetclasses
                #new implementation. Please check with the store_labels option, Mauro
                formatstring.extend(
                    ['reg_%s%s' % (i, ident) for i in regressionclasses])

            features = td.x
            labels = td.y
            weights = td.w[0]

            prediction = model.predict(features)
            if self.use_only:
                prediction = [prediction[i] for i in self.use_only]
            if isinstance(prediction, list):
                all_write = np.concatenate(prediction, axis=1)
            else:
                all_write = prediction

            #if a prediction functor was set, it's output is also added to the final predictions
            if hasattr(td, 'predictionFunctor') and hasattr(
                    td, 'predictionFunctorClasses'):
                print(
                    'Extending the output with the configured prediction functor output'
                )
                formatstring.extend(getattr(td, 'predictionFunctorClasses'))
                all_write = np.concatenate([
                    all_write,
                    [getattr(td, 'predictionFunctor')(p) for p in prediction]
                ],
                                           axis=1)

            if all_write.ndim == 2:
                all_write = np.concatenate([all_write, weights], axis=1)
                formatstring.append('weight')
                if not all_write.shape[1] == len(formatstring):
                    print(formatstring, ' vs ', all_write.shape[1])
                    raise ValueError(
                        'Prediction output does not match with the provided targets!'
                    )

                all_write = np.core.records.fromarrays(
                    np.transpose(all_write), names=','.join(formatstring))
                array2root(all_write,
                           outputDir + '/' + outrootfilename,
                           "tree",
                           mode="recreate")

                #self.metrics.append(metric)
                self.__sourceroots.append(originroot)
                self.__predictroots.append(outputDir + '/' + outrootfilename)
                print(formatstring)
                print('\ncreated prediction friend tree ' + outputDir + '/' +
                      outrootfilename + ' for ' + originroot)
                if self.addnumpyoutput:
                    if len(fullnumpyarray):
                        fullnumpyarray = np.concatenate(
                            (fullnumpyarray, all_write))
                    else:
                        fullnumpyarray = np.array(all_write)
            else:
                c_storeTensor.store(
                    np.ascontiguousarray(all_write,
                                         dtype=np.float32).ctypes.data,
                    list(np.shape(all_write)),
                    outputDir + '/' + outrootfilename)
                self.__sourceroots.append(originroot)
                self.__predictroots.append(outputDir + '/' + outrootfilename)
                if self.addnumpyoutput:
                    if len(fullnumpyarray):
                        fullnumpyarray = np.concatenate(
                            (fullnumpyarray, all_write))
                    else:
                        fullnumpyarray = np.array(all_write)

        if self.addnumpyoutput:
            np.save(outputDir + '/' + 'allprediction.npy', fullnumpyarray)
Ejemplo n.º 2
0
    def makePrediction_compat(self,
                              model,
                              testdatacollection,
                              outputDir,
                              ident='',
                              store_labels=False,
                              monkey_class=''):

        deprwarning = '''

WARNING! makePrediction is being run in compatibilty mode. This mode will soon be deprecated.
The new prediction mode requires to define a function of the TrainData class that takes as input the prediction of the network \
and as output a list of formatted predictions and associated names.
Each list item will be assigned an own branch name in the output tree
def formatPrediction(self, predicted_list):
    
    format_names = [name1, name2, ...]
    out_pred = [predicted_list[1],predicted_list[0]] #e.g. if the order should be swapped

    return out_pred,  format_names
    
        '''

        print(deprwarning)
        import numpy as np
        from root_numpy import array2root
        import os
        monkey_class_obj = None
        if monkey_class:
            module, classname = tuple(monkey_class.split(':'))
            _temp = __import__(module, globals(), locals(), [classname], -1)
            monkey_class_obj = getattr(_temp, classname)

        outputDir = os.path.abspath(outputDir)

        if len(ident) > 0:
            ident = '_' + ident

        self.__sourceroots = []
        self.__predictroots = []
        self.metrics = []

        fullnumpyarray = np.array([])

        for i in range(len(testdatacollection.samples)):
            sample = testdatacollection.samples[i]
            originroot = testdatacollection.originRoots[i]
            outrootfilename = os.path.splitext(
                os.path.basename(originroot))[0] + '_predict' + ident + '.root'

            fullpath = testdatacollection.getSamplePath(sample)
            if monkey_class_obj is not None:
                testdatacollection.dataclass = monkey_class_obj()
            td = testdatacollection.dataclass

            td.readIn(fullpath)

            if hasattr(td, 'customlabels'):
                import copy
                formatstring = copy.deepcopy(td.customlabels)

            else:
                truthclasses = td.getUsedTruth()
                formatstring = []
                if len(truthclasses) > 0 and len(truthclasses[0]) > 0:
                    formatstring = [
                        'prob_%s%s' % (i, ident) for i in truthclasses
                    ]
                regressionclasses = []
                if hasattr(td, 'regressiontargetclasses'):
                    regressionclasses = td.regressiontargetclasses
                #new implementation. Please check with the store_labels option, Mauro
                formatstring.extend(
                    ['reg_%s%s' % (i, ident) for i in regressionclasses])

            features = td.x
            labels = td.y
            weights = None
            if td.w is not None and len(td.w):
                weights = td.w[0]

            prediction = model.predict(features)
            if self.use_only:
                prediction = [prediction[i] for i in self.use_only]
            if isinstance(prediction, list):
                all_write = np.concatenate(prediction, axis=1)
            else:
                all_write = prediction

            #if a prediction functor was set and the corresponding predictionFunctorClasses
            #its output is also added to the final predictions
            #predictionFunctorClasses: an array of strings labelling extra variables to add to the tree
            #predictionFunctor: receives the numpy array with the predictions for N events
            #                   returns MxN numpy array
            #                   where M=len(predictionFunctorClasses)
            #                   and   N=no. events
            if hasattr(td, 'predictionFunctor') and hasattr(
                    td, 'predictionFunctorClasses'):
                print(
                    'Extending the output with the configured prediction functor output'
                )
                formatstring.extend(getattr(td, 'predictionFunctorClasses'))
                all_write = np.concatenate(
                    [all_write,
                     getattr(td, 'predictionFunctor')(prediction)],
                    axis=1)

            if all_write.ndim == 2:
                if weights is not None:
                    all_write = np.concatenate([all_write, weights], axis=1)
                    formatstring.append('weight')
                if not all_write.shape[1] == len(formatstring):
                    print(formatstring, ' vs ', all_write.shape[1])
                    raise ValueError(
                        'Prediction output does not match with the provided targets!'
                    )

                all_write = np.core.records.fromarrays(
                    np.transpose(all_write), names=','.join(formatstring))
                array2root(all_write,
                           outputDir + '/' + outrootfilename,
                           "tree",
                           mode="recreate")

                #self.metrics.append(metric)
                self.__sourceroots.append(originroot)
                self.__predictroots.append(outputDir + '/' + outrootfilename)
                print(formatstring)
                print('\ncreated prediction friend tree ' + outputDir + '/' +
                      outrootfilename + ' for ' + originroot)
                if self.addnumpyoutput:
                    if len(fullnumpyarray):
                        fullnumpyarray = np.concatenate(
                            (fullnumpyarray, all_write))
                    else:
                        fullnumpyarray = np.array(all_write)
            else:
                raise Exception(
                    "tensor output not supported (anymore) in compatibilty prediction mode!"
                )
                c_storeTensor.store(
                    np.ascontiguousarray(all_write,
                                         dtype=np.float32).ctypes.data,
                    list(np.shape(all_write)),
                    outputDir + '/' + outrootfilename)
                self.__sourceroots.append(originroot)
                self.__predictroots.append(outputDir + '/' + outrootfilename)
                if self.addnumpyoutput:
                    if len(fullnumpyarray):
                        fullnumpyarray = np.concatenate(
                            (fullnumpyarray, all_write))
                    else:
                        fullnumpyarray = np.array(all_write)

        if self.addnumpyoutput:
            np.save(outputDir + '/' + 'allprediction.npy', fullnumpyarray)
Ejemplo n.º 3
0
    def makePrediction(self,
                       model,
                       testdatacollection,
                       outputDir,
                       ident='',
                       store_labels=False,
                       monkey_class='',
                       flatten_everything=True):

        if not hasattr(testdatacollection.dataclass, 'formatPrediction'):
            return self.makePrediction_compat(model, testdatacollection,
                                              outputDir, ident, store_labels,
                                              monkey_class)

        if hasattr(testdatacollection.dataclass, 'customlabels'):
            print(
                'Warning: customlabels are replaced by the formatPrediction function and will be ignored'
            )

        import numpy as np
        import os
        monkey_class_obj = None
        if monkey_class:
            module, classname = tuple(monkey_class.split(':'))
            _temp = __import__(module, globals(), locals(), [classname], -1)
            monkey_class_obj = getattr(_temp, classname)

        outputDir = os.path.abspath(outputDir)

        if len(ident) > 0:
            ident = '_' + ident

        self.__sourceroots = []
        self.__predictroots = []
        self.metrics = []

        for i in range(len(testdatacollection.samples)):
            sample = testdatacollection.samples[i]
            originroot = testdatacollection.originRoots[i]
            outrootfilename = os.path.splitext(
                os.path.basename(originroot))[0] + '_predict' + ident + '.root'

            fullpath = testdatacollection.getSamplePath(sample)
            if monkey_class_obj is not None:
                testdatacollection.dataclass = monkey_class_obj()
            td = testdatacollection.dataclass

            td.readIn(fullpath)

            features = td.x
            #labels=td.y #not needed - maybe we can add this as option so all info is in the output trees (no friends)
            weights = None
            if len(td.w):
                weights = td.w

            prediction = model.predict(features)
            if not isinstance(prediction, list):
                prediction = [prediction]

            prediction, formatstring = td.formatPrediction(prediction)
            if len(formatstring) < len(prediction):
                formatstring.extend([
                    'auto_pred_' + str(i)
                    for i in range(len(prediction) - len(formatstring))
                ])

            if weights is not None:
                prediction.append(weights)
                formatstring.append('weight')
            ###### prediction functor needs to be adapted!!

            #######NEW
            if self.use_only:
                prediction = [prediction[i] for i in self.use_only]
                formatstring = [formatstring[i] for i in self.use_only]

            cont_pred = [
                np.ascontiguousarray(prediction[i], dtype=np.float32)
                for i in range(len(prediction))
            ]
            c_storeTensor.store([p.ctypes.data for p in cont_pred],
                                [a.shape for a in prediction],
                                outputDir + '/' + outrootfilename,
                                formatstring, flatten_everything)

            self.__sourceroots.append(originroot)
            self.__predictroots.append(outputDir + '/' + outrootfilename)

            print('written prediction output ' + outputDir + '/' +
                  outrootfilename)

        if self.addnumpyoutput:
            print(
                'addnumpyoutput: currently not supported (anymore), please request re-implementation'
            )
Ejemplo n.º 4
0
    def makePrediction(self, model, testdatacollection, outputDir, 
                       ident='', store_labels = False, monkey_class=''): 
        import numpy as np        
        from root_numpy import array2root
        import os
        monkey_class_obj = None
        if monkey_class:
            module, classname = tuple(monkey_class.split(':'))
            _temp = __import__(module, globals(), locals(), [classname], -1) 
            monkey_class_obj = getattr(_temp, classname)
        
        outputDir=os.path.abspath(outputDir)
        
        if len(ident)>0:
            ident='_'+ident
        
        self.__sourceroots=[]
        self.__predictroots=[]
        self.metrics=[]
        
        fullnumpyarray=np.array([])
        fullx = None
        fulltest = None
        fullpred = None
        
        for i in range(len(testdatacollection.samples)):
            sample=testdatacollection.samples[i]
            originroot=testdatacollection.originRoots[i]
            outrootfilename=os.path.splitext(os.path.basename(originroot))[0]+'_predict'+ident+'.root'
            
            fullpath=testdatacollection.getSamplePath(sample)
            if monkey_class_obj is not None:
                testdatacollection.dataclass = monkey_class_obj()
            td=testdatacollection.dataclass
            
            td.readIn(fullpath)
            
            if hasattr(td, 'customlabels'):
                import copy
                formatstring=copy.deepcopy(td.customlabels)
            
            else:
                truthclasses=td.getUsedTruth()
                formatstring=[]
                if len(truthclasses)>0 and len(truthclasses[0])>0:
                    formatstring = ['prob_%s%s' % (i, ident) for i in truthclasses]
                regressionclasses=[]
                if hasattr(td, 'regressiontargetclasses'):
                    regressionclasses=td.regressiontargetclasses
                #new implementation. Please check with the store_labels option, Mauro
                formatstring.extend(['reg_%s%s' % (i, ident) for i in regressionclasses])

                # add the truth to the friend tree
                formatstring.extend(truthclasses)


            features=td.x
            labels=td.y
            weights=td.w[0]

            #print('x,y,w:')
            #print(features)
            #print(labels)
            #print(weights)
            
            prediction = model.predict(features)

            if fulltest is not None:
                fullx    = np.concatenate([fullx, features[0]], axis=0)
                fulltest = np.concatenate([fulltest, labels[0]], axis=0)
                fullpred = np.concatenate([fullpred, prediction], axis=0)
            else:
                fullx    = features[0]
                fulltest = labels[0]
                fullpred = prediction

            if self.use_only:
                prediction = [prediction[i] for i in self.use_only]
            if isinstance(prediction, list):
                all_write = np.concatenate(prediction, axis=1)
            else:
                all_write = prediction

            if isinstance(labels,list):
                all_labels = np.concatenate(labels, axis=1)
            else:
                all_labels = labels

            all_write = np.concatenate([all_write, all_labels], axis=1)


            #if a prediction functor was set and the corresponding predictionFunctorClasses
            #its output is also added to the final predictions
            #predictionFunctorClasses: an array of strings labelling extra variables to add to the tree 
            #predictionFunctor: receives the numpy array with the predictions for N events
            #                   returns MxN numpy array 
            #                   where M=len(predictionFunctorClasses)
            #                   and   N=no. events
            if hasattr(td, 'predictionFunctor') and hasattr(td,'predictionFunctorClasses'):
                print('Extending the output with the configured prediction functor output')
                formatstring.extend( getattr(td,'predictionFunctorClasses') )
                all_write = np.concatenate([all_write,
                                            getattr(td,'predictionFunctor')(prediction)],                                           
                                           axis=1)

            if all_write.ndim == 2:
                all_write = np.concatenate([all_write, weights], axis=1)
                formatstring.append('weight')
                if not all_write.shape[1] == len(formatstring):
                    print(formatstring, ' vs ', all_write.shape[1])
                    raise ValueError('Prediction output does not match with the provided targets!')
                
                all_write = np.core.records.fromarrays(np.transpose(all_write), names= ','.join(formatstring))
                array2root(all_write,outputDir+'/'+outrootfilename,"tree",mode="recreate")
                
                #self.metrics.append(metric)
                self.__sourceroots.append(originroot)
                self.__predictroots.append(outputDir+'/'+outrootfilename)
                print(formatstring)
                print('\ncreated prediction friend tree '+outputDir+'/'+outrootfilename+ ' for '+originroot)
                if self.addnumpyoutput:
                    if len(fullnumpyarray):
                        fullnumpyarray=np.concatenate((fullnumpyarray,all_write))
                    else:
                        fullnumpyarray=np.array(all_write)
            else:
                c_storeTensor.store(np.ascontiguousarray(all_write, dtype=np.float32).ctypes.data, list(np.shape(all_write)), outputDir+'/'+outrootfilename)
                self.__sourceroots.append(originroot)
                self.__predictroots.append(outputDir+'/'+outrootfilename)
                if self.addnumpyoutput:
                    if len(fullnumpyarray):
                        fullnumpyarray=np.concatenate((fullnumpyarray,all_write))
                    else:
                        fullnumpyarray=np.array(all_write)
                    
        if self.addnumpyoutput:    
            np.save(outputDir+'/'+'allprediction.npy', fullnumpyarray)
                
        # WARNING
        # if you have to do this, you probably ahve aproblem before, need to debug
        # but when you write a row with all 0s, things dont work correctly in the output files
        # skip no truth
        skip = np.all(fulltest==0, axis=1)
        fullx = fullx[~skip]
        fullpred = fullpred[~skip]
        fulltest = fulltest[~skip]
        # skip multi truth
        skip = np.sum(fulltest, axis=1)>1
        fullx = fullx[~skip]
        fullpred = fullpred[~skip]
        fulltest = fulltest[~skip]
        make_confusion(td.getUsedTruth(),fulltest.argmax(axis=1),fullpred.argmax(axis=1),outputDir+'/confusion.png')
        make_rocs(td.getUsedTruth(),fulltest,fullpred,outputDir+'/roc.png')