Ejemplo n.º 1
0
        """
        Optional alternative loss function for printing or diagnostics.
        """
        return roc_auc_score(y, np.array(self.predict_proba(X))[:, 1])


if __name__ == '__main__':

    from datautil import load_raop_data
    from transformers import ExtractBody, PrepAndVectorize
    from sklearn.pipeline import Pipeline
    all_train_df, all_train_labels, submit_df = load_raop_data()

    wdim = 50
    hdim = 10
    context = 1
    # need to make sure dropout equal to 0 for
    # gradient check, otherwise will not work
    # because it is stochastic
    drop_p = 0.

    X = Pipeline([('body', ExtractBody()), ('vec', PrepAndVectorize(d=wdim))
                  ]).fit_transform(all_train_df[:4], y=1)

    y = all_train_labels[:4]

    nnmx = CNN2(wdim=wdim, hdim=hdim, drop_p=drop_p, context=context)

    print("Numerical gradient check...")
    nnmx.grad_check(X, y)
Ejemplo n.º 2
0
if __name__ == '__main__':

    from datautil import load_raop_data
    from transformers import ExtractBody, PrepAndVectorize
    from sklearn.pipeline import Pipeline
    all_train_df, all_train_labels, submit_df = load_raop_data()

    wdim = 50
    hdim = 10
    # need to make sure dropout equal to 0 for
    # gradient check, otherwise will not work
    # because it is stochastic
    drop_p = 0.

    X = Pipeline([
        ('body', ExtractBody()),
        ('vec', PrepAndVectorize(d=wdim))
        ]).fit_transform(all_train_df[:4],y=1)

    y = all_train_labels[:4]

    nnmx = NNMX2(wdim=wdim,hdim=hdim,drop_p=drop_p)

    print("Numerical gradient check...")
    nnmx.grad_check(X, y)





Ejemplo n.º 3
0
def run(args=None):
    usage = "usage : %prog [options]"
    parser = optparse.OptionParser(usage=usage)

    #parser.add_option("--test",action="store_true",dest="test",default=False)
    #parser.add_option("--plotEpochs",action="store_true",dest="plotEpochs",default=False)
    #parser.add_option("--plotWvecDim",action="store_true",dest="plotWvecDim",default=False)

    # Optimizer
    # minibatch of 0 means no minibatches, just iterate through
    parser.add_option("--minibatch",dest="minibatch",type="int",default=0)
    #parser.add_option("--optimizer",dest="optimizer",type="string",
    #    default="adagrad")
    parser.add_option("--epochs",dest="epochs",type="int",default=50)
    parser.add_option("--printevery",dest="printevery",type="int",default=4e4)
    parser.add_option("--annealevery",dest="annealevery",type="int",default=0) # anneal every this many epochs

    parser.add_option("--alpha",dest="alpha",type="float",default=0.005)
    parser.add_option("--rho",dest="rho",type="float",default=1e-5)
    parser.add_option("--drop_p",dest="drop_p",type="float",default=0.5)

    parser.add_option("--wdim",dest="wdim",type="int",default=50)
    parser.add_option("--hdim",dest="hdim",type="int",default=200)
    parser.add_option("--odim",dest="odim",type="int",default=2)
    parser.add_option("--rseed",dest="rseed",type="int",default=207)
    parser.add_option("--context",dest="context",type="int",default=1)

    #parser.add_option("--outFile",dest="outFile",type="string",
    #    default="models/test.bin")
    #parser.add_option("--inFile",dest="inFile",type="string",
    #    default="models/test.bin")
    #parser.add_option("--data",dest="data",type="string",default="train")

    parser.add_option("--model",dest="model",type="string",default="NNMX")

    (opts,args)=parser.parse_args(args)


    # name of folder to store results in
    resfolder =  '_'.join(
        ['{k}={v}'.format(k=k,v=v) for k,v in vars(opts).items()]
        )

    resfolder += '_timestamp={t}'.format(t=time.strftime('%Y%m%d%H%M%S'))
    resfolder = 'results/'+resfolder
    print(resfolder)

    if not os.path.exists(resfolder):
        os.makedirs(resfolder)

    ### Set up the training and test data to work with throughout the notebook:
    np.random.seed(opts.rseed)

    all_train_df, y, submit_df = load_raop_data()

    # useful for sklearn scoring
    #roc_scorer = make_scorer(roc_auc_score)
    n_all = all_train_df.shape[0]

    # set up kFolds to be used in the rest of the project
    kf = KFold(n_all, n_folds = 10, random_state=opts.rseed)

    body_vecs = Pipeline([
        ('body', ExtractBody()),
        ('vec', PrepAndVectorize(d=opts.wdim))
        ]).fit_transform(X=all_train_df,y=1)

    for train, test in kf:
        nn = init_model(opts)
        if opts.minibatch == 0:
            idxiter = list(train)*opts.epochs
            annealevery=len(train)*opts.annealevery
            printevery=opts.printevery
        else:
            idxiter = NNBase.randomiter(
                N=opts.epochs*len(train)/opts.minibatch,
                pickfrom=train,batch=opts.minibatch)
            annealevery=len(train)*opts.annealevery/opts.minibatch
            printevery=opts.printevery/opts.minibatch

        nn.train_sgd(body_vecs, y, idxiter=idxiter,
                       devidx=test, savepath=resfolder,
                       costevery=printevery, printevery=printevery,
                       annealevery=annealevery)

    save_all_results(resultpath = 'results', savepath = 'result_summary')