def mock(filename, data, Model, h1, h2, h3): df = pd.read_csv(data) data_np = df.to_numpy().astype(dtype='float64') data = separate_train_val_test(data_np) test = data['val'] xTest = test['x'] yTest = test['y'].reshape(-1, 1) model = Model(xTest.shape[1], h1, yTest.shape[1], H2=h2, H3=h3) print(filename) try: loaded = torch.load(filename) except: print('Unable to load model') try: model_state_dict = loaded['model_state_dict'] except: print('Unable to get model_state_dict') try: model.load_state_dict(model_state_dict) except: print('Unable to load model_state_dict') model.eval() model.double() with torch.no_grad(): plt.ion() tensors = torch.from_numpy(xTest) preds = model(tensors).double() diff = abs(preds - yTest) num_correct = len(np.where(diff < OFF)[0]) counters.append(num_correct) print() print(*list(range(len(counters))), sep='\t') print(*counters, f"length:{len(counters)}", sep='\t') if num_correct > 0: # 10 save_file = open(f"Outputs/{name}/understand_array.csv", 'a', newline='') wr = csv.writer(save_file) wr.writerow([filename, num_correct]) print(f'wrote to file: {save_file}')
dfs = [] # Removing the unwanted features from the df before it's a numpy array if len(remove_features_arr) > 0: for val in remove_features_arr: remove_cols = df.columns[val] dfs.append(df.drop(remove_cols, axis=1)) df = dfs[0] xs = [] ys = [] # for _df in dfs: data_np = df.to_numpy().astype(dtype='float64') data = separate_train_val_test(data_np) test = data['val'] xTest = test['x'] yTest = test['y'].reshape(-1, 1) # xs.append(xTest) # ys.append(yTest) nns = [] for i, model_file in zip(range(len(models)), models): # print(model_file[-10:]) try: # print('Model1') model = Model1(xTest.shape[1], 128, yTest.shape[1], H2=256, H3=None) model.load_state_dict(torch.load(model_file)['model_state_dict']) # for yTest, xTest in zip(ys, xs):
def __init__(self, data_file, epochs=20000, batch_size=10, Model=Model1, H1=128, H2=None, H3=None, learning_rate=1e-4, Optim=torch.optim.Adam, y_col=0, goal_loss=0.0000005, percent_disp=1.05, round_to=3, fast=True, shuffle=False, index_col=None, test_size=50, save=True, DIR=None, NAME=None, scale=False, headers=None, STD=False): self.fast = fast self.DIR = DIR self.NAME = NAME self.data_file = data_file self.scale = scale self.STD = STD self.preds = {'real': [], 'pred': []} self.loss = {'train': [], 'val': []} self.filename = f"{DIR}{NAME}_epoch_{epochs}_batch_{batch_size}_Model_{Model.__name__}_h1_{H1}_h2_{H2}_h3_{H3}_lr_{learning_rate}_optim_{Optim.__name__}" if save: print(save, 'save') if not os.path.exists(self.DIR): print('not exists') os.makedirs(self.DIR) self.START_TIME = datetime.now() if isinstance(data_file, str): self.df = pd.read_csv(data_file, index_col=index_col, header=headers) self.npa = np.array(self.df) elif isinstance(data_file, pd.DataFrame): self.df = data_file self.npa = np.array(data_file) self.npa = self.npa.astype(dtype='float64') self.shuffle = shuffle if self.shuffle: random.shuffle(self.npa) self.data = separate_train_val_test(self.npa) self.info = f"Total: {len(self.npa)}, " \ f"Train: {len(self.data['train']['x'])}, " \ f"Validate: {len(self.data['val']['x'])}, " \ f"Test: {len(self.data['test']['x'])}\n" if not self.fast: print(self.info) print(f"Predicting Column :: {self.df.columns[y_col]} ::") self.train = self.data['train'] self.x = self.train['x'] self.y = self.train['y'] self.y = self.y.reshape(-1, 1) self.val = self.data['val'] self.xVal = self.val['x'] self.yVal = self.val['y'] self.yVal = self.yVal.reshape(-1, 1) if not self.fast: self.test = self.data['test'] self.xtest = self.test['x'] self.ytest = self.test['y'] self.ytest = self.ytest.reshape(-1, 1) self._in = self.x.shape[1] self._out = self.y.shape[1] self.H1 = H1 self.H2 = H2 self.H3 = H3 self.learning_rate = learning_rate self.epochs = epochs self.BATCH_SIZE = batch_size self.GOAL_LOSS = goal_loss # this is just something I use in the accuracy stuff at the bottom, # if it's too low it'll cause problems self.ROUND_TO = round_to self.SAVE = save self.PERCENT_DISP = percent_disp # I don't print out the loss every time, I keep track of it in this # and I print it when it is the new lowest loss # but I also left it there if you wanted to un-comment it out self.loss_df = pd.DataFrame(columns=["loss"]) self.lowest_val_loss = 10000000000 self.MODEL = Model self.model = self.MODEL(self._in, self.H1, self._out, H2=self.H2, H3=self.H3) self.OPTIM = Optim self.criterion = nn.MSELoss() self.optimizer = self.OPTIM(self.model.parameters(), lr=learning_rate, weight_decay=0.5) if torch.cuda.is_available(): self.model.cuda() # plt.ion() # plt.show() # # fig, (self.ax1, self.ax2) = plt.subplots(2, 1) print('Model Initialized')