Ejemplo n.º 1
0
 def valid(self, epoch):
     self.dataset.mode = 'valid'
     dataloader = DataLoad(self.dataset, batch_size = self.batch_size, shuffle = False)
     self.model.eval()
     total_acc = 0
     total_loss = 0
     cnt = 0
     for batch_idx, data in enumerate(dataloader):
         contexts, questions, answers = data
         batch_size = len(answers)
         output = self.model(contexts, questions, self.dataset.train_tree_dict)
         predict = self.model.predict(output, questions, self.dataset.train_tree_dict)
         acc = self.metrics(predict, answers)
         m = nn.LogSoftmax(dim = 1)
         loss = self.criteria(m(output), answers)
         print('|', batch_size, 'acc:', acc, 'loss:', loss,'|')
         total_loss += loss.item()
         total_acc += acc * batch_size
         cnt += batch_size 
     total_acc = total_acc / cnt
     if total_acc > self.best_acc:
         self.best_acc = total_acc
         self.best_state = self.model.state_dict()
         self.early_stopping_cnt = 0
     else:
         self.early_stopping_cnt += 1
         if self.early_stopping_cnt >= 256:
             self.early_stopping_flag = True
     with open(self.LOG_PATH, 'a+') as fp:
         fp.write(f'[Run {self.run}, Task {self.task_id}, Epoch {epoch}] [Validate] Accuracy : {total_acc: {5}.{4}} Loss : {total_loss / cnt: {5}.{4}}' + '\n')
     return total_acc
Ejemplo n.º 2
0
 def train(self, epoch):
     self.dataset.mode = 'train'
     # 每一次取出 batch  对 batch 再按照contexts长度进行分组
     dataloader = DataLoad(self.dataset, batch_size = self.batch_size)
     self.model.train()
     total_acc = 0
     total_loss = 0
     cnt = 0
     # 0-6 7-12 13-18... 有多少组循环多少次  每个data里的context长度相似
     for batch_idx, data in enumerate(dataloader):
         self.optimizer.zero_grad()
         contexts, questions, answers = data
         batch_size = len(answers)
         '''
         batch_idx 0
         contexts ([0, 1, 3, 4, 6, 7, 9, 10, 12, 13], [0, 1, 3, 4, 6, 7], [15, 16, 18, 19, 21, 22], [15, 16, 18, 19, 21, 22, 24, 25], [0, 1, 3, 4, 6, 7, 9, 10])
         questions tensor([[ 10,  11,  14],
                 [ 10,  11,  12],
                 [ 10,  11,  14],
                 [ 10,  11,   7],
                 [ 10,  11,  12]])
         answers tensor([  5,   9,   5,  16,  16])
         '''
         output = self.model(contexts, questions, self.dataset.train_tree_dict)
         predict = self.model.predict(output, questions, self.dataset.train_tree_dict)
         m = nn.LogSoftmax(dim = 1)
         loss = self.criteria(m(output), answers)
         total_loss += loss.item()
         acc = self.metrics(predict, answers)
         total_acc += acc * batch_size
         cnt += batch_size 
         loss.backward()
         self.optimizer.step()
     with open(self.LOG_PATH, 'a+') as fp:
         fp.write(f'[Run {self.run}, Task {self.task_id}, Epoch {epoch}] [Training] loss : {total_loss / cnt: {10}.{8}}, acc : {total_acc / cnt: {5}.{4}}, batch_idx : {batch_idx}' + '\n')
Ejemplo n.º 3
0
    def test(self):
        self.dataset.mode = 'test'
        dataloader = DataLoad(self.dataset,
                              batch_size=self.batch_size,
                              shuffle=False)
        self.model.load_state_dict(self.best_state)
        self.model.eval()
        total_acc = 0
        cnt = 0

        for batch_idx, data in enumerate(dataloader):
            contexts, questions, answers = data
            batch_size = len(answers)
            output = self.model(contexts, questions,
                                self.dataset.test_tree_dict)
            predict = self.model.predict(output, questions,
                                         self.dataset.test_tree_dict)
            acc = self.metrics(predict, answers)
            total_acc += acc * batch_size
            cnt += batch_size
        with open(self.LOG_PATH, 'a+') as fp:
            fp.write(
                f'[Run {self.run}, Task {self.task_id}, Epoch {epoch}] [Test] Accuracy : {total_acc / cnt: {5}.{4}}'
                + '\n')
        os.makedirs('models', exist_ok=True)
        with open(
                f'models/task{self.task_id}_epoch{epoch}_run{self.run}_acc{total_acc/cnt}.pth',
                'wb') as fp:
            torch.save(self.model.state_dict(), fp)
Ejemplo n.º 4
0
        D['X_val'] = np.load(datadirec + 'X_val.npy')
        D['y_val'] = np.load(datadirec + 'y_val.npy')
        print('Read data from pickles')
    except:
        print('One or more files are missing from ' + datadirec)
else:
    os.makedirs(datadirec)
    names = 3
    filenames = []
    for i in range(names):
        filenames.append(
            ('/home/rob/Dropbox/ml_projects/deepclust/data/male' + str(i + 1) +
             '.wav', '/home/rob/Dropbox/ml_projects/deepclust/data/female' +
             str(i + 1) + '.wav'))

    dl = DataLoad(seq_len, Nn, low_freq=6000)
    dl.read_data(filenames)
    dl.strip_zero()
    D = dl.return_data()

    for key, value in D.iteritems():
        np.save(datadirec + key + '.npy', value)
    print('Finished saving data')

#Obtain some sizes
N = D['X_train'].shape[0]
Nval = D['X_val'].shape[0]
"""Visualize some data"""
row = 10
ind = np.random.choice(D['X_train'].shape[0], row)
f, axarr = plt.subplots(row, 1)
Ejemplo n.º 5
0
 def test_file_exists(self):
     # A dataframe must be returned when the path exists
     print('testing valid file...')
     DL = DataLoad()
     df = DL.load_csv('data/ross_train.csv', show_details=False)
     self.assertIsInstance(df, pd.DataFrame)
Ejemplo n.º 6
0
 def test_invalid_format(self):
     print('testing invalid file format - not CSV...')
     DL = DataLoad()
     df = DL.load_csv('data/transportation.json')
     self.assertEqual(df, None)
Ejemplo n.º 7
0
 def test_dir(self):
     print('testing dir instead of file...')
     DL = DataLoad()
     df = DL.load_csv('data')
     self.assertEqual(df, None)
Ejemplo n.º 8
0
 def test_corrupted_file(self):
     print('testing corrupted file...')
     DL = DataLoad()
     df = DL.load_csv('data/corrupted.csv')
     self.assertEqual(df, None)
Ejemplo n.º 9
0
 def test_file_does_not_exist(self):
     print('testing inexistent file...')
     DL = DataLoad()
     df = DL.load_csv('data/ross_trai.csv')
     self.assertEqual(df, None)
Ejemplo n.º 10
0
from dataload import DataLoad
from evaluation import EvaluateModel
from preprocessing import DataSplit
from models import Exponential_Smoothing, AutoArima, EnsembleModels, FBProphet
import matplotlib.pyplot as plt
import pandas as pd
import warnings

warnings.simplefilter("ignore")
from datapreparation import DataVisualization

print('Time Series model composition')

# Loading Data
DL = DataLoad()
df = DL.load_csv('data/ross_train.csv')
df = df[df['Store'] == 1]
df = df[['Date', 'Sales']]
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df.set_index('Date', inplace=True)
df.sort_index(inplace=True)

print(df)
# Analyzing using different frequencies
x = []
y = []
title = []
# Daily
x.append(df.index)
y.append(df.Sales)
title.append('Daily')
Ejemplo n.º 11
0
from models import Exponential_Smoothing, Arima, EnsembleModels, FBProphet, RandomForest, XGBoost, SupportVectorRegressor
import warnings

from dataload import DataLoad
from analytics import DataVisualization
from evaluation import EvaluateModel
from preprocessing import FeatureEngineering

warnings.simplefilter("ignore")
print('Time Series model composition')

# Loading Data
DL = DataLoad()
df = DL.load_csv(file_path='data/ross_train.csv',
                 date='Date',
                 target='Sales',
                 show_details=True)

print(df)
# Analyzing using different frequencies
x = []
y = []
title = []
# Daily
x.append(df.index)
y.append(df.Sales)
title.append('Daily')
freq = {'W': 'Weekly', 'M': 'Monthly', 'Y': 'Yearly'}
for f in freq.keys():
    sampled_df = df.resample(f).sum()
    x.append(sampled_df.index)