def __init__(self, archiveRoot, datasetName, labelIndex, idIndex, iteration=0, datasetType="TRAIN", transform=None): self.samples = [] self.labels = [] self.ids = [] print(os.listdir(archiveRoot)) iterationName = datasetName if iteration != 0: iterationName = 'iter-%s' % (str(iteration)) dataset = [i for i in os.listdir(archiveRoot) if i == iterationName] print(dataset) if dataset: print('dataset is found') if iteration == 0: data = pd.read_csv(archiveRoot + '/' + dataset[0], header=None, index_col=None) else: data = pd.read_csv(archiveRoot + '/' + dataset[0] + '/' + datasetName + '_' + datasetType + '.csv', header=None, index_col=None) print(data.shape) print(data.head(5)) self.labels = torch.Tensor(data.values[:, labelIndex]).long() self.ids = torch.Tensor(data.values[:, idIndex]).long() self.targets = self.labels self.samples = data.drop(columns=[labelIndex, idIndex]).to_numpy() print(self.samples.shape) self.data = self.samples std_ = self.samples.std(axis=1, keepdims=True) std_[std_ == 0] = 1.0 self.samples = (self.samples - self.samples.mean(axis=1, keepdims=True)) / std_ else: raise FileNotFoundError
# for sequence only """ model_stuff = torch.load(os.path.join(datasets_dir, "results/models/model201300.pkl")) """ # for metadata model_stuff = torch.load( os.path.join(datasets_dir, "results/models/MLP_full_with_metadata_model300300.pkl")) # Input the test or validation data data = pickle.load( open(os.path.join(datasets_dir, 'tts/val_x_no_nan.pkl'), "rb")) data.head() num_rows = len(data) # In[4]: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_stuff # In[5]: # For no metadata, uncomment this block, ow use the below """ model_params = model_stuff['params'] print(model_params) # model_params['my_device'] = torch.device('cpu') model = mytorch.myLSTM(model_stuff['params']) model.load_state_dict(model_stuff['state_dict'])
print(data_path) selections.head() selections.plot(x='quarter_of_day', y='selected_object') dummy_fields = ['directing_page', 'selected_object'] # set one hot values for these fields for each in dummy_fields: dummies = pd.get_dummies(selections[each], prefix=each, drop_first=False) selections = pd.concat([selections, dummies], axis=1) fields_to_drop = ['directing_page', 'selected_object'] # drop fields that have been converted to one hot data = selections.drop(fields_to_drop, axis=1) print(data.head()) quant_features = ['quarter_of_day'] # define quantity features and normalize scaled_features = {} for each in quant_features: mean, std = data[each].mean(), data[each].std() scaled_features[each] = [mean, std] data.loc[:, each] = (data[each] - mean) / std data.head() print(data.describe()) # Set test data set and training data # Here the test data is the last 100 values test_data = data[-100:] # Now remove the test data from the data set