""" Optional alternative loss function for printing or diagnostics. """ return roc_auc_score(y, np.array(self.predict_proba(X))[:, 1]) if __name__ == '__main__': from datautil import load_raop_data from transformers import ExtractBody, PrepAndVectorize from sklearn.pipeline import Pipeline all_train_df, all_train_labels, submit_df = load_raop_data() wdim = 50 hdim = 10 context = 1 # need to make sure dropout equal to 0 for # gradient check, otherwise will not work # because it is stochastic drop_p = 0. X = Pipeline([('body', ExtractBody()), ('vec', PrepAndVectorize(d=wdim)) ]).fit_transform(all_train_df[:4], y=1) y = all_train_labels[:4] nnmx = CNN2(wdim=wdim, hdim=hdim, drop_p=drop_p, context=context) print("Numerical gradient check...") nnmx.grad_check(X, y)
if __name__ == '__main__': from datautil import load_raop_data from transformers import ExtractBody, PrepAndVectorize from sklearn.pipeline import Pipeline all_train_df, all_train_labels, submit_df = load_raop_data() wdim = 50 hdim = 10 # need to make sure dropout equal to 0 for # gradient check, otherwise will not work # because it is stochastic drop_p = 0. X = Pipeline([ ('body', ExtractBody()), ('vec', PrepAndVectorize(d=wdim)) ]).fit_transform(all_train_df[:4],y=1) y = all_train_labels[:4] nnmx = NNMX2(wdim=wdim,hdim=hdim,drop_p=drop_p) print("Numerical gradient check...") nnmx.grad_check(X, y)
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) #parser.add_option("--test",action="store_true",dest="test",default=False) #parser.add_option("--plotEpochs",action="store_true",dest="plotEpochs",default=False) #parser.add_option("--plotWvecDim",action="store_true",dest="plotWvecDim",default=False) # Optimizer # minibatch of 0 means no minibatches, just iterate through parser.add_option("--minibatch",dest="minibatch",type="int",default=0) #parser.add_option("--optimizer",dest="optimizer",type="string", # default="adagrad") parser.add_option("--epochs",dest="epochs",type="int",default=50) parser.add_option("--printevery",dest="printevery",type="int",default=4e4) parser.add_option("--annealevery",dest="annealevery",type="int",default=0) # anneal every this many epochs parser.add_option("--alpha",dest="alpha",type="float",default=0.005) parser.add_option("--rho",dest="rho",type="float",default=1e-5) parser.add_option("--drop_p",dest="drop_p",type="float",default=0.5) parser.add_option("--wdim",dest="wdim",type="int",default=50) parser.add_option("--hdim",dest="hdim",type="int",default=200) parser.add_option("--odim",dest="odim",type="int",default=2) parser.add_option("--rseed",dest="rseed",type="int",default=207) parser.add_option("--context",dest="context",type="int",default=1) #parser.add_option("--outFile",dest="outFile",type="string", # default="models/test.bin") #parser.add_option("--inFile",dest="inFile",type="string", # default="models/test.bin") #parser.add_option("--data",dest="data",type="string",default="train") parser.add_option("--model",dest="model",type="string",default="NNMX") (opts,args)=parser.parse_args(args) # name of folder to store results in resfolder = '_'.join( ['{k}={v}'.format(k=k,v=v) for k,v in vars(opts).items()] ) resfolder += '_timestamp={t}'.format(t=time.strftime('%Y%m%d%H%M%S')) resfolder = 'results/'+resfolder print(resfolder) if not os.path.exists(resfolder): os.makedirs(resfolder) ### Set up the training and test data to work with throughout the notebook: np.random.seed(opts.rseed) all_train_df, y, submit_df = load_raop_data() # useful for sklearn scoring #roc_scorer = make_scorer(roc_auc_score) n_all = all_train_df.shape[0] # set up kFolds to be used in the rest of the project kf = KFold(n_all, n_folds = 10, random_state=opts.rseed) body_vecs = Pipeline([ ('body', ExtractBody()), ('vec', PrepAndVectorize(d=opts.wdim)) ]).fit_transform(X=all_train_df,y=1) for train, test in kf: nn = init_model(opts) if opts.minibatch == 0: idxiter = list(train)*opts.epochs annealevery=len(train)*opts.annealevery printevery=opts.printevery else: idxiter = NNBase.randomiter( N=opts.epochs*len(train)/opts.minibatch, pickfrom=train,batch=opts.minibatch) annealevery=len(train)*opts.annealevery/opts.minibatch printevery=opts.printevery/opts.minibatch nn.train_sgd(body_vecs, y, idxiter=idxiter, devidx=test, savepath=resfolder, costevery=printevery, printevery=printevery, annealevery=annealevery) save_all_results(resultpath = 'results', savepath = 'result_summary')