get_ipython().run_line_magic('pinfo2', 'learn.validate') learn.lr_find(start_lr=1e-6,end_lr=1e-2,no_grad_val = False,num_it=63) learn.recorder.plot() learn.fit_one_cycle(cyc_len=8, max_lr=2e-5,moms=(0.95, 0.85), div_factor=10.0, pct_start=0.3, wd=1e-7,no_grad_val=False) flatten_model(learn.model) learn.split(groupness) learn.layer_groups learn.freeze_to(1) learn.lr_find(start_lr=1e-6,end_lr=1e-2,no_grad_val = False,num_it=63) learn.recorder.plot() learn.fit_one_cycle(cyc_len=16, max_lr=2e-4,moms=(0.95, 0.85), div_factor=400.0, pct_start=0.1, wd=1e-7,no_grad_val=False) model.summary() learn.save('surfsmall') learn.sched.plot() learn.loss_func = partial(MSEloss,kf=1,ke=0.01) learn.lr_find(start_lr=1e-10,end_lr=1e-4,no_grad_val = False,num_it=300) learn.recorder.plot() learn.fit_one_cycle(cyc_len=6, max_lr=1e-6,moms=(0.95, 0.85), div_factor=10.0, pct_start=0.1, wd=1e-7,no_grad_val=False)
# else: # learn.model.load_state_dict(torch.load(os.path.join(args.modelpath,sys.argv[1]+".pth"))['model']) # print("weights were loaded") #except: # print("no weights were loaded") #pdb.set_trace() #with torch.autograd.detect_anomaly(): #learn.fit_one_cycle(cyc_len=int(sys.argv[3]), max_lr=lrs,moms=(0.95, 0.85), div_factor=50.0, pct_start=0.30, wd=wds,no_grad_val=False) learn.fit_one_cycle(cyc_len=int(sys.argv[3]), max_lr=lrs, moms=(0.95, 0.85), div_factor=150.0, pct_start=0.05, wd=wds, no_grad_val=False) if args.uncertainty_forces: print( learn.save("bulkVtrain3200" + "_uncertain_forces_" + str(sys.argv[3]), return_path=True)) elif args.uncertainty: print( learn.save("bulkVtrain3200" + "_uncertain_" + str(sys.argv[3]) + "_" + str(args.p) + "_" + str(args.var_coeff), return_path=True)) else: print( learn.save("bulkVtrain3200" + "_" + str(sys.argv[3]), return_path=True))
copyfile(args.split_path, split_path) #from sklearn.model_selection import train_test_split #train,test = train_test_split(df, test_size=0.20, random_state=42,stratify=df['Ebin'].values) print(args.batch_size) train_loader = schnetpack2.custom.data.AtomsLoader(data_train, batch_size=args.batch_size, sampler=RandomSampler(data_train), num_workers=0*torch.cuda.device_count(), pin_memory=True)#9*torch.cuda.device_count() val_loader = schnetpack2.custom.data.AtomsLoader(data_val, batch_size=args.batch_size, num_workers=9*torch.cuda.device_count(), pin_memory=True)#9*torch.cuda.device_count() mean, stddev = train_loader.get_statistics('energy', False) #mean, stddev = torch.tensor([-1.5115]), torch.tensor([1.2643]) #stddev = 29.6958 print(mean,stddev) model = get_model(train_args, atomref=None, mean=torch.FloatTensor([mean]), stddev=torch.FloatTensor([stddev]), train_loader=train_loader, parallelize=args.parallel) data = DataBunch( train_loader, val_loader,collate_fn=schnetpack2.custom.data.collate_atoms) learn = Learner(data, model, model_dir=args.modelpath) learn.purge() learn.opt_func = Adam learn.loss_func = partial(MSEloss,kf=1.0,ke=0.1) learn.metrics=[Emetric,Fmetric] #learn.load('trainbulkVsurf128epochs') torch.cuda.empty_cache() print(sys.argv) learn.fit_one_cycle(cyc_len=int(sys.argv[3]), max_lr=float(sys.argv[4]),moms=(0.95, 0.85), div_factor=150.0, pct_start=0.05, wd=1e-2,no_grad_val=False) print(learn.save( sys.argv[1], return_path=True))
learn.metrics = [ Emetric, Fmetric ] #DipoleMetric, ChargeMetric, CouplingMetric, CouplingMetricKaggle] #for epoch in range(1, int(sys.argv[2]) + 1): # with gpytorch.settings.use_toeplitz(True): # train(epoch) # test() #learn.load('trainbulkVsurf128epochs') torch.cuda.empty_cache() #print(sys.argv) #with gpytorch.settings.use_toeplitz(True): learn.fit_one_cycle(cyc_len=int(sys.argv[2]), max_lr=float(sys.argv[3]), moms=(0.95, 0.85), div_factor=500.0, pct_start=0.05, wd=1e-2, no_grad_val=False) #print(learn.save(os.path.join(args.modelpath,"PhysNet_{0}_{1}_{2}".format(sys.argv[1], sys.argv[2], sys.argv[3])), return_path=True)) #torch.save(model, os.path.join(args.modelpath,"DKL_weights_{0}_{1}_{2}".format(sys.argv[1], sys.argv[2], sys.argv[3]))) #if args.uncertainty_forces: # print(learn.save( "bulkVtrain3200"+"_uncertain_forces_"+str(sys.argv[3]), return_path=True)) if args.num_inducing_points: print( learn.save("bulkVtrain3200" + "_induced_" + str(sys.argv[3]), return_path=True)) else: print(learn.save("bulkVtrain3200_" + str(sys.argv[3]), return_path=True))