Example #1
0
 def __init__(self,
              archiveRoot,
              datasetName,
              labelIndex,
              idIndex,
              iteration=0,
              datasetType="TRAIN",
              transform=None):
     self.samples = []
     self.labels = []
     self.ids = []
     print(os.listdir(archiveRoot))
     iterationName = datasetName
     if iteration != 0:
         iterationName = 'iter-%s' % (str(iteration))
     dataset = [i for i in os.listdir(archiveRoot) if i == iterationName]
     print(dataset)
     if dataset:
         print('dataset is found')
         if iteration == 0:
             data = pd.read_csv(archiveRoot + '/' + dataset[0],
                                header=None,
                                index_col=None)
         else:
             data = pd.read_csv(archiveRoot + '/' + dataset[0] + '/' +
                                datasetName + '_' + datasetType + '.csv',
                                header=None,
                                index_col=None)
         print(data.shape)
         print(data.head(5))
         self.labels = torch.Tensor(data.values[:, labelIndex]).long()
         self.ids = torch.Tensor(data.values[:, idIndex]).long()
         self.targets = self.labels
         self.samples = data.drop(columns=[labelIndex, idIndex]).to_numpy()
         print(self.samples.shape)
         self.data = self.samples
         std_ = self.samples.std(axis=1, keepdims=True)
         std_[std_ == 0] = 1.0
         self.samples = (self.samples -
                         self.samples.mean(axis=1, keepdims=True)) / std_
     else:
         raise FileNotFoundError
# for sequence only
"""
model_stuff = torch.load(os.path.join(datasets_dir, "results/models/model201300.pkl"))
"""

# for metadata

model_stuff = torch.load(
    os.path.join(datasets_dir,
                 "results/models/MLP_full_with_metadata_model300300.pkl"))

# Input the test or validation data
data = pickle.load(
    open(os.path.join(datasets_dir, 'tts/val_x_no_nan.pkl'), "rb"))
data.head()

num_rows = len(data)
# In[4]:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_stuff

# In[5]:
# For no metadata, uncomment this block, ow use the below
"""
model_params = model_stuff['params']
print(model_params)
# model_params['my_device'] = torch.device('cpu')
model = mytorch.myLSTM(model_stuff['params'])
model.load_state_dict(model_stuff['state_dict'])
Example #3
0
print(data_path)

selections.head()

selections.plot(x='quarter_of_day', y='selected_object')

dummy_fields = ['directing_page', 'selected_object']  # set one hot values for these fields

for each in dummy_fields:
    dummies = pd.get_dummies(selections[each], prefix=each, drop_first=False)
    selections = pd.concat([selections, dummies], axis=1)

fields_to_drop = ['directing_page', 'selected_object']  # drop fields that have been converted to one hot
data = selections.drop(fields_to_drop, axis=1)

print(data.head())

quant_features = ['quarter_of_day']  # define quantity features and normalize
scaled_features = {}
for each in quant_features:
    mean, std = data[each].mean(), data[each].std()
    scaled_features[each] = [mean, std]
    data.loc[:, each] = (data[each] - mean) / std

data.head()
print(data.describe())
# Set test data set and training data
# Here the test data is the last 100 values
test_data = data[-100:]

# Now remove the test data from the data set