Ejemplo n.º 1
0
    def crossgrid(self, griddic, cv=None):
        """
        perform a crossvalidation procedure for mparameters in grid and cv-sample cv

        inputs
            grid: dictionary of the form
                dict(ESTIMATORNAME1__PARAMETER1 = PARAMLST1,
                     ESTIMATORNAME2__PARAMETER2 = PARAMLST2,
                     ...
                     )
            cv: crossvalidation array of the form [IDn1, IDn2,...IDnN]
        """
        # initialize cv procedure
        if cv != None:
            # if cv not empty overwrite existing cv procedure
            self.cv = cv
        elif cv == None and self.cv == None:
            # if no cv procedure has been specified set the classical l20o
            print('ATTENTION:\tNo CV procedure specified, proceeding with reduced l20o.')
            cv = cv.leave_x_out(self.Y,20)

        # initialize the _gridsearch attribute
        # need to include how to create the dictionary from the input
        self._gridsearch = sk.grid_search.GridSearchCV(self._pipe, griddic, n_jobs=-1) # @UndefinedVariable

        # fit the CV grid
        self._gridsearch.fit(self.X,self.Y)
Ejemplo n.º 2
0
    pipe.setpipe(['GB'])

    # cvcounter test
    print('Pipe.cvcounter =\t'+str(pipe.cvcounter))

    print("X size: " + str(np.shape(X)))
    print("Y size: " + str(np.shape(Y)))
    # test initialization of grid parameters

    # FFS + RF dic
    # griddic = dict(FFS__k=[50,100],RF__n_estimators=[100,200])
    # FFS + FDA dic
    griddic = dict(GB__n_estimators=[100,200,300],GB__learning_rate=[0.1,0.3,0.5],GB__max_features=["Auto",100.],GB__max_depth=[3,5,10])
    #griddic = dict();
    pipe.crossgrid(griddic,crossval=cv.leave_x_out(pipe.Y, 50, nsamples=100))
    #pipe.crossgrid(griddic,crossval=cv.leave_x_out(pipe.Y, 20, nsamples=300))
    print(pipe.return_score())
    print(pipe._gridsearch.grid_scores_)
    print(pipe._pipe.named_steps.keys())
    print(pipe._pipe)
    pipe.return_rank()
    pipe.return_ranks(.5,printtofile=True)

    # prediction

    df2 = pd.read_csv('data/test.csv')

    X2, IDS, names2 = filtertrain(df2,'test')

    Y2 = pipe._gridsearch.predict_proba(X2)
Ejemplo n.º 3
0
        wids = ['week4','week5','week6','week10']
    elif organ == 'liver' and isTargeted == True:
        wids = ['4w','5w','6w','10w']
    cvweeks = wids[0:2]

    pns,cns,Xdata,Ydata = jmport.importdata(organ,isTargeted=isTargeted,LogConc=LogConc)
    _, X, Y, _ = jmport.filterd(pns,Xdata,Ydata,wids)

    # run automated tests

    #run an initialization test for a pipeline with pca and fda
    pipe = Pipe(X,Y,cns,wids,organ=organ,isTargeted=isTargeted,LogConc=LogConc)
    pipe.setpipe(['FFS','GB'])

    # cvcounter test
    print('Pipe.cvcounter =\t'+str(pipe.cvcounter))

    print(np.shape(np.array(X)))
    # test initialization of grid parameters


    #griddic = dict(FFS__k=[10,20,40,50,100,130,200,750,800],RF__n_estimators=[10,100,200,300],RF__criterion=["gini","entropy"],RF__max_features=["sqrt","log2"])
    griddic = dict(FFS__k=[10,20,40,50,100,130,200,750,800],GB__n_estimators=[100,200,300,600],GB__learning_rate=[0.1,0.3,0.5],GB__max_features=["auto"],GB__max_depth=[3,5,10])
    pipe.crossgrid(griddic,cv=cv.leave_x_out(pipe.Y,20,nsamples=200,testlst=[i for i,n in enumerate(pns) if any(j in n for j in cvweeks)]))

    print(pipe.return_score())
    print(pipe._gridsearch.grid_scores_)
    print(pipe._pipe.named_steps.keys())
    pipe.return_rank()
    pipe.return_ranks(.9,printtofile=True)
Ejemplo n.º 4
0
    # cvcounter test
    print('Pipe.cvcounter =\t' + str(pipe.cvcounter))

    print("X size: " + str(np.shape(X)))
    print("Y size: " + str(np.shape(Y)))
    # test initialization of grid parameters

    # FFS + RF dic
    # griddic = dict(FFS__k=[50,100],RF__n_estimators=[100,200])
    # FFS + FDA dic
    griddic = dict(GB__n_estimators=[100, 200, 300],
                   GB__learning_rate=[0.1, 0.3, 0.5],
                   GB__max_features=["Auto", 100.],
                   GB__max_depth=[3, 5, 10])
    #griddic = dict();
    pipe.crossgrid(griddic, crossval=cv.leave_x_out(pipe.Y, 50, nsamples=100))
    #pipe.crossgrid(griddic,crossval=cv.leave_x_out(pipe.Y, 20, nsamples=300))
    print(pipe.return_score())
    print(pipe._gridsearch.grid_scores_)
    print(pipe._pipe.named_steps.keys())
    print(pipe._pipe)
    pipe.return_rank()
    pipe.return_ranks(.5, printtofile=True)

    # prediction

    df2 = pd.read_csv('data/test.csv')

    X2, IDS, names2 = filtertrain(df2, 'test')

    Y2 = pipe._gridsearch.predict_proba(X2)