def get_model(data_dir, mat_prop, classification=False, batch_size=None, transfer=None, verbose=True): # Get the TorchedCrabNet architecture loaded model = Model(CrabNet(compute_device=compute_device).to(compute_device), model_name=f'{mat_prop}', verbose=verbose) # Train network starting at pretrained weights if transfer is not None: model.load_network(f'{transfer}.pth') model.model_name = f'{mat_prop}' # Apply BCEWithLogitsLoss to model output if binary classification is True if classification: model.classification = True # Get the datafiles you will learn from train_data = f'{data_dir}/{mat_prop}/train.csv' try: val_data = f'{data_dir}/{mat_prop}/val.csv' except: print('Please ensure you have train (train.csv) and validation data', f'(val.csv) in folder "data/materials_data/{mat_prop}"') # Load the train and validation data before fitting the network data_size = pd.read_csv(train_data).shape[0] batch_size = 2**round(np.log2(data_size) - 4) if batch_size < 2**7: batch_size = 2**7 if batch_size > 2**12: batch_size = 2**12 model.load_data(train_data, batch_size=batch_size, train=True) print(f'training with batchsize {model.batch_size} ' f'(2**{np.log2(model.batch_size):0.3f})') model.load_data(val_data, batch_size=batch_size) # Set the number of epochs, decide if you want a loss curve to be plotted model.fit(epochs=40, losscurve=False) # Save the network (saved as f"{model_name}.pth") model.save_network() return model
def get_model(mat_prop, i, classification=False, batch_size=None, transfer=None, verbose=True): # Get the TorchedCrabNet architecture loaded model = Model(CrabNet(compute_device=compute_device).to(compute_device), model_name=f'{mat_prop}{i}', verbose=verbose) # Train network starting at pretrained weights if transfer is not None: model.load_network(f'{transfer}.pth') model.model_name = f'{mat_prop}' # Apply BCEWithLogitsLoss to model output if binary classification is True if classification: model.classification = True # Get the datafiles you will learn from train_data = rf'data\matbench_cv\{mat_prop}\train{i}.csv' val_data = rf'data\matbench_cv\{mat_prop}\val{i}.csv' # Load the train and validation data before fitting the network data_size = pd.read_csv(train_data).shape[0] batch_size = 2**round(np.log2(data_size) - 4) if batch_size < 2**7: batch_size = 2**7 if batch_size > 2**12: batch_size = 2**12 # batch_size = 2**7 model.load_data(train_data, batch_size=batch_size, train=True) print(f'training with batchsize {model.batch_size} ' f'(2**{np.log2(model.batch_size):0.3f})') model.load_data(val_data, batch_size=batch_size) # Set the number of epochs, decide if you want a loss curve to be plotted model.fit(epochs=300, losscurve=False) # Save the network (saved as f"{model_name}.pth") model.save_network() return model