def __init__(self, mode='train', transform=None, preload=False, name=None, data=None, mask_threshold=0):
        Dataset.__init__(self)
        self.mode = mode
        self.transform = transform
        if name is not None:
            self.name = name
        else:
            self.name = mode

        self.mask_threshold = mask_threshold
        if data is None:
            self.data = load_data(self.name, self.mode, preload, self.mask_threshold)
        else:
            self.data = data
Exemple #2
0
    def __init__(self, file, need_prepro, pipeline, max_len, mode, d_type):
        Dataset.__init__(self)
        self.cnt = 0

        # need preprocessing
        if need_prepro:
            with open(file, 'r', encoding='utf-8') as f:
                lines = csv.reader(f, delimiter='\t', quotechar='"')

                # supervised dataset
                if d_type == 'sup':
                    if mode == 'eval':
                        sentences = []
                    data = []

                    for instance in self.get_sup(lines):
                        if mode == 'eval':
                            sentences.append([instance[1]])
                        for proc in pipeline:
                            instance = proc(instance, d_type)
                        data.append(instance)

                    self.tensors = [
                        torch.tensor(x, dtype=torch.long) for x in zip(*data)
                    ]
                    if mode == 'eval':
                        self.tensors.append(sentences)

                # unsupervised dataset
                elif d_type == 'unsup':
                    data = {'ori': [], 'aug': []}
                    for ori, aug in self.get_unsup(lines):
                        for proc in pipeline:
                            ori = proc(ori, d_type)
                            aug = proc(aug, d_type)
                        self.cnt += 1
                        if self.cnt == 10:
                            break
                        data['ori'].append(ori)  # drop label_id
                        data['aug'].append(aug)  # drop label_id
                    ori_tensor = [
                        torch.tensor(x, dtype=torch.long)
                        for x in zip(*data['ori'])
                    ]
                    aug_tensor = [
                        torch.tensor(x, dtype=torch.long)
                        for x in zip(*data['aug'])
                    ]
                    self.tensors = ori_tensor + aug_tensor
        # already preprocessed
        else:
            f = open(file, 'r', encoding='utf-8')
            data = pd.read_csv(f, sep='\t')

            # supervised dataset
            if d_type == 'sup':
                # input_ids, segment_ids(input_type_ids), input_mask, input_label
                input_columns = [
                    'input_ids', 'input_type_ids', 'input_mask', 'label_ids'
                ]
                self.tensors = [torch.tensor(data[c].apply(lambda x: ast.literal_eval(x)), dtype=torch.long)    \
                                                                                for c in input_columns[:-1]]
                self.tensors.append(
                    torch.tensor(data[input_columns[-1]], dtype=torch.long))

            # unsupervised dataset
            elif d_type == 'unsup':
                input_columns = [
                    'ori_input_ids', 'ori_input_type_ids', 'ori_input_mask',
                    'aug_input_ids', 'aug_input_type_ids', 'aug_input_mask'
                ]
                self.tensors = [torch.tensor(data[c].apply(lambda x: ast.literal_eval(x)), dtype=torch.long)    \
                                                                                for c in input_columns]

            else:
                raise "d_type error. (d_type have to sup or unsup)"
Exemple #3
0
            if self.sort:
                cut_list.append(index_arr[i * real_batchsize:(i + 1) *
                                          real_batchsize])
            else:
                cut_list.append(
                    np.arange(i * real_batchsize, (i + 1) * real_batchsize))

        output = list()
        for i in range(4):
            output.append(self.reprocess(batch, cut_list[i]))

        return output

if __name__ == "__main__":
    # Test
    dataset = Dataset('val.txt')
    training_loader = DataLoader(dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 collate_fn=dataset.collate_fn,
                                 drop_last=True,
                                 num_workers=0)
    total_step = hp.epochs * len(training_loader) * hp.batch_size

    cnt = 0
    for i, batchs in enumerate(training_loader):
        for j, data_of_batch in enumerate(batchs):
            mel_target = torch.from_numpy(
                data_of_batch["mel_target"]).float().to(device)
            D = torch.from_numpy(data_of_batch["D"]).int().to(device)
            if mel_target.shape[1] == D.sum().item():
Exemple #4
0
 def __init__(self, x, y=None, transform=None):
     Dataset.__init__(self, x, y)  # inherit constructor
     if y is not None:
         self.y = torch.LongTensor(y)
     self.transform = transform
Exemple #5
0
def sample_data_loader():
    dataset = Dataset()
    return DataLoader(dataset)
Exemple #6
0
class Dataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return len(self.x)


# In[22]:

dataset = Dataset(x, y)
len(dataset)

# In[24]:

train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                           batch_size=32,
                                           shuffle=True)

# In[25]:

train_loader

# In[34]:

Exemple #7
0
    # Test
    import torch
    import yaml
    from torch.utils.data import DataLoader
    from utils.utils import to_device

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    preprocess_config = yaml.load(open("./config/LJSpeech/preprocess.yaml",
                                       "r"),
                                  Loader=yaml.FullLoader)
    train_config = yaml.load(open("./config/LJSpeech/train.yaml", "r"),
                             Loader=yaml.FullLoader)

    train_dataset = Dataset("train.txt",
                            preprocess_config,
                            train_config,
                            sort=True,
                            drop_last=True)
    val_dataset = Dataset("val.txt",
                          preprocess_config,
                          train_config,
                          sort=False,
                          drop_last=False)
    train_loader = DataLoader(
        train_dataset,
        batch_size=train_config["optimizer"]["batch_size"] * 4,
        shuffle=True,
        collate_fn=train_dataset.collate_fn,
    )
    val_loader = DataLoader(
        val_dataset,
    plt.close('all')
    with open('./image/' + outfile_name + '.json', 'w') as file_object:
        json.dump(chart_data, file_object)


if __name__ == '__main__':
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(torch.cuda.get_device_name(0))

    batch_size = 1024
    hidden_size = 128
    drop_pro = 0.1
    learning_rate = 0.001

    torch.manual_seed(3)
    dataset = Dataset('task1_re_train.csv')
    valset = Dataset('task1_re_val.csv', val=False)
    train_loader = DataLoader(dataset, batch_size=batch_size)

    teacher = Teacher(9, hidden_size, len(dataset.label_id))
    model = Model(9 - 1, hidden_size, len(dataset.label_id))

    teacher.load_state_dict(torch.load('task3_moreF7_2.pkl'))
    model.load_state_dict(torch.load('task3_student_4.pkl'))

    teacher = teacher.to(device)
    model = model.to(device)

    # optimizer = optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-5)
    optimizer = optim.Adam(model.classifier.parameters(),
                           lr=learning_rate,
Exemple #9
0
DEVICE = 'cuda' if cuda else 'cpu'


def _collate_fn(data):
    # data.sort(key=lambda x: x.shape[1], reverse=True)
    # x = []
    # lens = []
    # for d in data:
    #     x += [d[0].permute(1, 0)]
    # clean = pad_sequence(x, batch_first=True, padding_value=0)
    return data[0]


train_x = np.load("adv-medium-to-long.npy", allow_pickle=True)

train_dataset = Dataset(train_x)
# print(train_dataset)
train_loader_args = dict(shuffle=False,
                         batch_size=1,
                         num_workers=0,
                         pin_memory=True,
                         collate_fn=_collate_fn) if cuda \
    else dict(shuffle=True, batch_size=1, collate_fn=_collate_fn)
train_loader = DataLoader(train_dataset, **train_loader_args)
train_clean = np.load("original-medium.npy", allow_pickle=True)
train_clean_dataset = Dataset(train_clean)
# print(train_clean_dataset)
train_clean_loader = DataLoader(train_clean_dataset, **train_loader_args)


class MLP(nn.Module):
    predicted_labels = []
    for X, _ in loader:
        output = model(X)
        _, predicted = torch.max(output.data, 1)
        predicted_labels.extend(predicted)
    return [x.item() for x in predicted_labels]


if __name__ == "__main__":
    trainfilename = "./data/train.data"
    devfilename = "./data/dev.data"
    testfilename = "./data/test.data"
    outputfilename = "./outputs/extensionthree.output"

    train_X, train_y = readInDataFeatureVector(trainfilename)
    train_dataset = Dataset(train_X, train_y.reshape(-1, 1))
    train_loader = DataLoader(train_dataset)

    test_X, test_y = readInDataFeatureVector(testfilename)
    test_dataset = Dataset(test_X, test_X)
    test_loader = DataLoader(test_dataset, shuffle=False)

    model = FNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adagrad(model.parameters(), lr=LEARNING_RATE)

    train(model, train_loader, criterion, optimizer)

    predictions = predict(model, test_loader)

    with open(outputfilename, 'w', encoding='utf8') as f:
Exemple #11
0
import torch
import matplotlib.pyplot as plt
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
from DatasetCreation import *

model = torch.load("entire_model.pt")

Image_size = 16
composed = transforms.Compose(
    [transforms.Resize((Image_size, Image_size)),
     transforms.ToTensor()])

val_data = Dataset(train=False, transforms=composed)
val_loader = DataLoader(val_data, batch_size=1)
i = 0
for x, y in val_loader:
    if i > 10:
        break
    z = model(x)
    _, yhat = torch.max(z.data, 1)
    plt.imshow(x.numpy().reshape(Image_size, Image_size), cmap='gray')
    plt.title(f"y = {y}, Predicted = {yhat}")
    plt.show()
    print((yhat == y))
    i += 1
Exemple #12
0
 def __init__(self, dataset: Dataset):
     super(MyDataSet, self).__init__()
     dataset.transform = transforms.Compose([transforms.ToTensor()])
     self.dataset = dataset
Exemple #13
0
def prepare_data(config):
    # prepare the train/test data for the model
    print('Loading data......')
    with open(config['load_file'], 'r', encoding='utf8') as f:
        data = f.read().splitlines()
    if config['mode'] == 'train':
        sents, labels = get_single_data(data, config)
    elif config['mode'] == 'test':
        sents, labels = split_single_data(data, config)
    elif config['mode'] == 'valid':
        with open(config['valid_file'], 'r', encoding='utf8') as f:
            data = f.read().splitlines()
        sents, labels = split_single_data(data, config)
    elif config['mode'] == 'multi-train':
        sents, labels = get_multi_data(data, config)
    elif config['mode'] == 'multi-test':
        sents, labels = split_multi_data(data, config)
    elif config['mode'] == 'multi-valid':
        with open(config['valid_file'], 'r', encoding='utf8') as f:
            data = f.read().splitlines()
        sents, labels = split_multi_data(data, config)

    print('Load data done!')
    tokenizer = BertTokenizer.from_pretrained(config['model_name'])
    vocab = tokenizer.vocab
    config['vocab_size'] = len(vocab)
    idx = list()
    for sent in sents:
        if type(sent) != list:
            continue
        tokenized_text = copy.deepcopy(sent)
        for i, c in enumerate(tokenized_text):
            if c in config['token_mapping_rule']:
                tokenized_text[i] = config['token_mapping_rule'][c]
            elif c not in vocab:
                tokenized_text[i] = '[UNK]'
        tokenized_text.insert(0, '[CLS]')
        tokenized_text.append('[SEP]')
        indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        idx.append(indexed_tokens)
    if config['mode'] == 'train' or config['mode'] == 'multi-train':
        train_data = Dataset(idx, labels)
        train_data_loader = DataLoader(dataset=train_data,
                                       batch_size=config['batch_size'],
                                       shuffle=config['shuffle'],
                                       collate_fn=collate_fn)
        return train_data_loader
    elif config['mode'] == 'test' or config['mode'] == 'multi-test' or config[
            'mode'] == 'valid' or config['mode'] == 'multi-valid':
        test_data = Dataset(idx, labels)
        test_data_loader = DataLoader(dataset=test_data,
                                      batch_size=config['batch_size'],
                                      shuffle=False,
                                      collate_fn=collate_fn)
        if 'multi' in config['mode']:
            for sent in sents:
                if type(sent) != list:
                    pass
                else:
                    del sent[-1]
                    del sent[0]
        return sents, test_data_loader
 def __init__(self, cfg, root, angle, transforms=None):
     # super(HpatchesV2, self).__init__()
     Dataset.__init__(self)
     self.img_paths = get_img_path(root)
     self.transforms = transforms
     self.angle = angle
Exemple #15
0
'''
if __name__ == "__main__":

    # Check for GPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Running On:", device)

    # Define Constants
    num_classes = 10
    num_epochs = 50
    learning_rate = 0.01
    batch_size = 200

    # Load data
    train_data = Dataset(file_name_X="data/x_train.npy",
                         file_name_Y="data/y_train.npy")
    test_data = Dataset(file_name_X="data/x_test.npy",
                        file_name_Y="data/y_test.npy")

    # only do batching and shuffling on train_data
    train_data_loader = DataLoader(dataset=train_data,
                                   batch_size=batch_size,
                                   shuffle=True)

    #instance of the Conv Net
    cnn = CNN(num_classes)
    cnn.to(device)

    # Cross Entropy Loss and Gradient Descent
    crossEntropy = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(cnn.parameters(),
        #out = F.relu(self.conv1(x))
        #out = F.relu(self.conv2(out))
        
        out=x.view(x.size(0),-1)
        
        out = F.sigmoid(self.fc1(out))
        out=F.sigmoid(self.fc2(out))
        
       
        return out

# Specify the newtork architecture
net = Net()

# Specify the training dataset
dataset = Dataset()
dataset_test=Dataset_test()

train_loader = DataLoader(dataset=dataset,
                          batch_size=64,
                          shuffle=True)


test_loader= DataLoader(dataset=dataset_test,batch_size=64,shuffle=True)

# Visualize the dataset
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.title('Visualize the dataset')
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
#
for i in image_datasets2['val'].imgs:
    partition['val'].append(i[0])
    labels2[i[0]] = get_class_id(i[0])

# for i in image_datasets2['val_photo'].imgs:
#     partition['val_photo'].append(i[0])
#     labels2[i[0]] = get_class_id(i[0])

batch_size = 10
params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 6}

#train_set = Dataset(partition['train'], labels2, transform = data_transforms['train'])
#train_generator = data2.DataLoader(train_set, **params)

val_set = Dataset(partition['val'], labels2, transform=data_transforms['val'])
val_generator = data2.DataLoader(val_set, **params)

# val_photo_set = Dataset(partition['val_photo'], labels2, transform = data_transforms['val_photo'])
# val_photo_generator = data2.DataLoader(val_photo_set, **params)

dataloaders_3 = {}
#dataloaders_3['train'] = train_generator
#dataloaders_3['val'] = val_generator
# dataloaders_3['val_photo'] = val_photo_generator
test_dataloader = val_generator
# test_dataloader2 = val_photo_generator

print("Finished loading data")
"""
best_acc = 0.0
Exemple #18
0
        if args.control:
            word_tokens_train, pos_tokens_train = tasks.make_control(tokenizer, word_tokens_train, pos_tokens_train, args.embsize)

        torch_ids_train, torch_masks_train, torch_token_starts, torch_labels_train = r.prepare_data(tokenizer, word_tokens_train, pos_tokens_train)

        # data for training
        split = int(0.75 * len(torch_ids_train))
        #dataset_train = Dataset(torch_ids_train[:split], torch_masks_train[:split], torch_labels_train[:split])
        #dataset_dev = Dataset(torch_ids_train[split:], torch_masks_train[split:], torch_labels_train[split:])
        config = T5Config.from_pretrained("t5-small", output_hidden_states=True, output_attentions=True)
        model = T5ForConditionalGeneration.from_pretrained("t5-small", config=config)
        model.to(device)
        #train(model, dataset_train, dataset_dev, torch_token_starts[split:], tokenizer)

        # 100 values test
        dataset_train = Dataset(torch_ids_train[:200], torch_masks_train[:200], torch_labels_train[:200])
        dataset_dev = Dataset(torch_ids_train[200:400], torch_masks_train[200:400], torch_labels_train[200:400])

        train(model, dataset_train, dataset_dev, torch_token_starts[200:400], tokenizer)

        print("done!")

    else:
        print("starting to evaluate")
        tokenizer = T5Tokenizer.from_pretrained("t5-small")
        word_tokens_test, pos_tokens_test = tasks.pos("UD_English-EWT/en_ewt-ud-test.conllu")
        if args.control:
            word_tokens_test, pos_tokens_test = tasks.make_control(tokenizer, word_tokens_test, pos_tokens_test, args.embsize)
        torch_ids_test, torch_masks_test, torch_token_starts, torch_labels_test = r.prepare_data(tokenizer, word_tokens_test, pos_tokens_test)

        # data for evluating
        sample = {
            'image': img_file,
            'label': self.inverse_transform[img_name.split('/')[1]]
        }

        return sample


# In[4]:

# Change type to tensor
trans_1 = transforms.ToTensor()
trans_2 = transforms.RandomCrop((111, 111), padding=(0, 0), pad_if_needed=True)
trans = transforms.Compose([trans_1])

transformed_dataset = Dataset(trans)

# In[65]:

dataloader = DataLoader(transformed_dataset, batch_size=16, shuffle=True)

# In[70]:

model = LeNet()
model.cuda()

# In[67]:

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
import json
import torch
from torch.utils.data import Dataset
import torch.utils.data
from models import *
from utils import *

train_loader = torch.utils.data.DataLoader(Dataset(),
                                           batch_size=100,
                                           shuffle=True,
                                           pin_memory=True)

d_model = 512
heads = 8
num_layers = 6
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 10

with open('WORDMAP_corpus.json', 'r') as j:
    word_map = json.load(j)

transformer = Transformer(d_model=d_model,
                          heads=heads,
                          num_layers=num_layers,
                          word_map=word_map)
transformer = transformer.to(device)
adam_optimizer = torch.optim.Adam(transformer.parameters(),
                                  lr=0,
                                  betas=(0.9, 0.98),
                                  eps=1e-9)
transformer_optimizer = AdamWarmup(model_size=d_model,
Exemple #21
0
    Dataset = CaltechDataset
    collate_fn = default_collate
    csv_file = 'test_pairs_caltech_with_category.csv'
elif args.eval_dataset=='tss':
    Dataset = TSSDataset
    collate_fn = default_collate
    csv_file = 'test_pairs_tss.csv'
elif args.eval_dataset=='pascal-parts':
    Dataset = PascalPartsDataset
    collate_fn = collate_custom
    csv_file = 'test_pairs_pascal_parts.csv'
    
cnn_image_size=(args.image_size,args.image_size)

dataset = Dataset(csv_file=os.path.join(args.eval_dataset_path, csv_file),
                  dataset_path=args.eval_dataset_path,
                  transform=NormalizeImageDict(['source_image','target_image']),
                  output_size=cnn_image_size)

if use_cuda:
    batch_size=8
else:
    batch_size=1

dataloader = DataLoader(dataset, batch_size=batch_size,
                        shuffle=False, num_workers=4,
                        collate_fn=collate_fn)

batch_tnf = BatchTensorToVars(use_cuda=use_cuda)


if args.eval_dataset=='pf' or args.eval_dataset=='pf-pascal':  
Exemple #22
0
def make_data(hm_channels=1, channel_size=5, min_seq_len=50, max_seq_len=75, data_size=200, from_file=None, from_obj=None):
    return Dataset(hm_channels, channel_size, min_seq_len, max_seq_len, data_size, from_file, from_obj)
Exemple #23
0
print('using Adam optimizer')
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                       lr=args.lr)

cnn_image_size = (args.image_size, args.image_size)

Dataset = ImagePairDataset
train_csv = 'train_pairs.csv'
test_csv = 'val_pairs.csv'
normalization_tnf = NormalizeImageDict(['source_image', 'target_image'])
batch_preprocessing_fn = BatchTensorToVars(use_cuda=use_cuda)

# Dataset and dataloader
dataset = Dataset(transform=normalization_tnf,
                  dataset_image_path=args.dataset_image_path,
                  dataset_csv_path=args.dataset_csv_path,
                  dataset_csv_file=train_csv,
                  output_size=cnn_image_size)

dataloader = DataLoader(dataset,
                        batch_size=args.batch_size,
                        shuffle=True,
                        num_workers=0)

dataset_test = Dataset(transform=normalization_tnf,
                       dataset_image_path=args.dataset_image_path,
                       dataset_csv_path=args.dataset_csv_path,
                       dataset_csv_file=test_csv,
                       output_size=cnn_image_size)

dataloader_test = DataLoader(dataset_test,
def fit(df, df_kfold):
    skf = StratifiedKFold(n_splits=CFG.num_fold,
                          shuffle=True,
                          random_state=CFG.seed)

    KFOLD = [(idxT, idxV) for i, (idxT, idxV) in enumerate(
        skf.split(np.arange(df_kfold.shape[0]), df_kfold['stratify_group']))]

    # for i in range(5):
    #     (idxT, idxV) = folds[i]
    #     print(f'fold {i + 1}')
    #     print('train')
    #     print((df_kfold['source'].iloc[idxT]).value_counts())
    #     print('val')
    #     print((df_kfold['source'].iloc[idxV]).value_counts())

    for fold, (idxT, idxV) in enumerate(KFOLD, 1):
        LOGGER.info(f"Training starts ... KFOLD: {fold}/{CFG.num_fold}")

        train = df.loc[idxT, :].reset_index(drop=True)
        val = df.loc[idxV, :].reset_index(drop=True)

        dataset_train = Dataset(df=train,
                                data_dir=CFG.data_dir,
                                transform=transform_train())
        loader_train = DataLoader(dataset_train,
                                  batch_size=CFG.batch_size,
                                  sampler=RandomSampler(dataset_train),
                                  pin_memory=False,
                                  drop_last=True,
                                  num_workers=CFG.num_workers,
                                  collate_fn=collate_fn)

        dataset_val = Dataset(df=val,
                              data_dir=CFG.data_dir,
                              transform=transform_val())
        loader_val = DataLoader(dataset_val,
                                batch_size=CFG.batch_size,
                                sampler=SequentialSampler(dataset_val),
                                pin_memory=False,
                                drop_last=True,
                                num_workers=CFG.num_workers,
                                collate_fn=collate_fn)

        config = get_efficientdet_config('tf_efficientdet_d5')
        net = EfficientDet(config, pretrained_backbone=False)
        checkpoint = torch.load(
            '/home/kerrykim/jupyter_notebook/008.wheat_detection/efficientdet/pretrained/efficientdet_d5-ef44aea8.pth'
        )

        # the pretrained weights need to be loaded first,
        # before you modify the head, or you need to modify the state dict to exclude the old head tensors if.
        # you want to do it after.
        config.num_classes = 1
        config.image_size = 512
        net.load_state_dict(checkpoint)
        net.class_net = HeadNet(config,
                                num_outputs=config.num_classes,
                                norm_kwargs=dict(eps=.001, momentum=.01))

        net = (DetBenchTrain(net, config)).to(device)

        optim = torch.optim.AdamW(net.parameters(), lr=CFG.lr)
        scheduler = ReduceLROnPlateau(optim, **CFG.scheduler_params)

        # default value
        st_epoch = 0
        best_loss = 1e20

        for epoch in range(st_epoch + 1, CFG.num_epoch + 1):
            # if epoch < 5:
            #     continue
            start_time = time.time()

            # train
            avg_train_loss = train_one_epoch(loader_train, net, optim, epoch,
                                             device)

            # val
            avg_val_loss = val_one_epoch(loader_val, net, device)

            scheduler.step(metrics=avg_val_loss)

            # scoring
            elapsed = time.time() - start_time

            LOGGER.info(
                f'Epoch {epoch} - avg_train_loss: {avg_train_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s'
            )

            # save best model
            save_argument = best_loss > avg_val_loss
            best_loss = min(avg_val_loss, best_loss)

            LOGGER.info(
                f'Epoch {epoch} - Save Best Loss: {best_loss:.4f} Model')

            save_model(ckpt_dir=CFG.ckpt_dir,
                       net=net,
                       num_epoch=CFG.num_epoch,
                       fold=fold,
                       epoch=epoch,
                       batch=CFG.batch_size,
                       save_argument=save_argument)
    with open('./image/' + outfile_name + '.json', 'w') as file_object:
        json.dump(chart_data, file_object)


drop_pro = 0.01
if __name__ == '__main__':
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(torch.cuda.get_device_name(0))

    batch_size = 256
    hidden_size = 128
    drop_pro = 0.01
    learning_rate = 0.001

    torch.manual_seed(3)
    dataset = Dataset('train.csv')
    valset = Dataset('task2_val.csv', val=False)
    train_loader = DataLoader(dataset, batch_size=batch_size)

    model = Model(14, hidden_size, len(dataset.label_id))
    # model.load_state_dict(torch.load('task2_moreF15_2.pkl'))
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=1e-5)
    max_acc = get_acc(valset, model)
    print(max_acc)
    # get_upload(model)
    # print(get_acc(dataset,model))
    # exit()
    chart_data = {
 def __init__(self, config: ModelConfigBase) -> None:
     super().__init__()
     self.config = config
     self.train_data: Dataset = Dataset()
     self.val_data: Dataset = Dataset()
     self.test_data: Dataset = Dataset()
Exemple #27
0
    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2)
        loss_contrastive = torch.mean(
            (1 - label) * torch.pow(euclidean_distance, 2) + (label) *
            torch.pow(torch.clamp(self.margin -
                                  euclidean_distance, min=0.0), 2))
        return loss_contrastive


if train == True:
    ## Training Data ##
    trainset = Dataset(train=True, split_size=.8)
    trainloader = DataLoader(dataset=trainset,
                             shuffle=True,
                             batch_size=batch_size)

    model = SiameseNetwork().to(device)
    model.train()

    optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.5, 0.9))
    criterion = nn.BCELoss()

    for epoch in range(epochs):
        avg_loss = []
        for i, (a, b, c) in tqdm(enumerate(trainloader)):
            a, b, c = (a.permute(0, 3, 1, 2).to(device).type(torch.float32),
                       b.permute(0, 3, 1, 2).to(device).type(torch.float32),
Exemple #28
0
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
#from torchvision import datasets, transforms
from torch.autograd import Variable

# Training settings
batch_size = 64

# MNIST Dataset
train_dataset = Dataset.MNIST(root='./data/',
                              train=True,
                              transform=transforms.ToTensor(),
                              download=True)

test_dataset = Dataset.MNIST(root='./data/',
                             train=False,
                             transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)
Exemple #29
0

#torchaudio.transforms.Spectrogram(n_fft=400, win_length=None, hop_length=None, pad=0, window_fn=None, power=2, normalized=False, wkwargs=None)
import matplotlib.pyplot as plt
from scipy.io.wavfile import read

if __name__ == '__main__':
    import numpy as np
    win_length = 128
    window = np.sqrt(scipy.signal.get_window('hann', win_length))
    window = torch.tensor(window).float()
    window_fn = torch.hann_window
    def window_fn():
        return torch.sqrt(torch.hann_window)
    input_dir = '.'
    ds_mix_PT = Dataset(input_dir=input_dir, data_scp='wav.scp', transform=STFT_PT(128, window=window, abs_val=False, log_val=False, transpose=False))
    # batch size
    bs = 4
    dl_mix = DataLoader(ds_mix_PT, batch_size=bs, shuffle=False, num_workers=1, collate_fn=custom_collate_cp)

    # Loop through data
    for i_batch, sample_batched in enumerate(dl_mix):
        print("{} is dtype, {} is data shape".format(sample_batched[0].dtype,sample_batched[0].shape))
        print("Batch index: {} (batch size {})".format(i_batch, bs))
        # Loop throug a batch
        for b in sample_batched:

            stft_b = b.float()
            reconstructed = torchaudio.functional.istft(
                    torch.reshape(stft_b, stft_b.shape), n_fft=win_length,
                    win_length=win_length, hop_length=int(win_length / 2), window=window)
Exemple #30
0
    def __init__(self, obj):
        self.len = len(obj)
        self.x = torch.LongTensor(obj[:, 0])
        self.y = torch.LongTensor(obj[:, 1])

    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return self.len


###############################################
# train

dataset = Dataset(pair_ids)
data = DataLoader(dataset=dataset,
                  batch_size=int(dataset.len / 3),
                  shuffle=True)

model = SkipGram(len(uniques), 10)
# import ipdb; ipdb.set_trace()
criterion = torch.nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

trainer = create_supervised_trainer(model, optimizer, criterion)
evaluator = create_supervised_evaluator(model,
                                        metrics={"accuracy": Accuracy()})


@trainer.on(Events.EPOCH_COMPLETED)
Exemple #31
0
 def test_dataloader(self):
     dataset = Dataset()
     return DataLoader(dataset,
                       batch_size=self.train_batchsize,
                       shuffle=True,
                       num_workers=self.num_workers)