def __init__(self, mode='train', transform=None, preload=False, name=None, data=None, mask_threshold=0): Dataset.__init__(self) self.mode = mode self.transform = transform if name is not None: self.name = name else: self.name = mode self.mask_threshold = mask_threshold if data is None: self.data = load_data(self.name, self.mode, preload, self.mask_threshold) else: self.data = data
def __init__(self, file, need_prepro, pipeline, max_len, mode, d_type): Dataset.__init__(self) self.cnt = 0 # need preprocessing if need_prepro: with open(file, 'r', encoding='utf-8') as f: lines = csv.reader(f, delimiter='\t', quotechar='"') # supervised dataset if d_type == 'sup': if mode == 'eval': sentences = [] data = [] for instance in self.get_sup(lines): if mode == 'eval': sentences.append([instance[1]]) for proc in pipeline: instance = proc(instance, d_type) data.append(instance) self.tensors = [ torch.tensor(x, dtype=torch.long) for x in zip(*data) ] if mode == 'eval': self.tensors.append(sentences) # unsupervised dataset elif d_type == 'unsup': data = {'ori': [], 'aug': []} for ori, aug in self.get_unsup(lines): for proc in pipeline: ori = proc(ori, d_type) aug = proc(aug, d_type) self.cnt += 1 if self.cnt == 10: break data['ori'].append(ori) # drop label_id data['aug'].append(aug) # drop label_id ori_tensor = [ torch.tensor(x, dtype=torch.long) for x in zip(*data['ori']) ] aug_tensor = [ torch.tensor(x, dtype=torch.long) for x in zip(*data['aug']) ] self.tensors = ori_tensor + aug_tensor # already preprocessed else: f = open(file, 'r', encoding='utf-8') data = pd.read_csv(f, sep='\t') # supervised dataset if d_type == 'sup': # input_ids, segment_ids(input_type_ids), input_mask, input_label input_columns = [ 'input_ids', 'input_type_ids', 'input_mask', 'label_ids' ] self.tensors = [torch.tensor(data[c].apply(lambda x: ast.literal_eval(x)), dtype=torch.long) \ for c in input_columns[:-1]] self.tensors.append( torch.tensor(data[input_columns[-1]], dtype=torch.long)) # unsupervised dataset elif d_type == 'unsup': input_columns = [ 'ori_input_ids', 'ori_input_type_ids', 'ori_input_mask', 'aug_input_ids', 'aug_input_type_ids', 'aug_input_mask' ] self.tensors = [torch.tensor(data[c].apply(lambda x: ast.literal_eval(x)), dtype=torch.long) \ for c in input_columns] else: raise "d_type error. (d_type have to sup or unsup)"
if self.sort: cut_list.append(index_arr[i * real_batchsize:(i + 1) * real_batchsize]) else: cut_list.append( np.arange(i * real_batchsize, (i + 1) * real_batchsize)) output = list() for i in range(4): output.append(self.reprocess(batch, cut_list[i])) return output if __name__ == "__main__": # Test dataset = Dataset('val.txt') training_loader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=dataset.collate_fn, drop_last=True, num_workers=0) total_step = hp.epochs * len(training_loader) * hp.batch_size cnt = 0 for i, batchs in enumerate(training_loader): for j, data_of_batch in enumerate(batchs): mel_target = torch.from_numpy( data_of_batch["mel_target"]).float().to(device) D = torch.from_numpy(data_of_batch["D"]).int().to(device) if mel_target.shape[1] == D.sum().item():
def __init__(self, x, y=None, transform=None): Dataset.__init__(self, x, y) # inherit constructor if y is not None: self.y = torch.LongTensor(y) self.transform = transform
def sample_data_loader(): dataset = Dataset() return DataLoader(dataset)
class Dataset(Dataset): def __init__(self, x, y): self.x = x self.y = y def __getitem__(self, index): return self.x[index], self.y[index] def __len__(self): return len(self.x) # In[22]: dataset = Dataset(x, y) len(dataset) # In[24]: train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=32, shuffle=True) # In[25]: train_loader # In[34]:
# Test import torch import yaml from torch.utils.data import DataLoader from utils.utils import to_device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") preprocess_config = yaml.load(open("./config/LJSpeech/preprocess.yaml", "r"), Loader=yaml.FullLoader) train_config = yaml.load(open("./config/LJSpeech/train.yaml", "r"), Loader=yaml.FullLoader) train_dataset = Dataset("train.txt", preprocess_config, train_config, sort=True, drop_last=True) val_dataset = Dataset("val.txt", preprocess_config, train_config, sort=False, drop_last=False) train_loader = DataLoader( train_dataset, batch_size=train_config["optimizer"]["batch_size"] * 4, shuffle=True, collate_fn=train_dataset.collate_fn, ) val_loader = DataLoader( val_dataset,
plt.close('all') with open('./image/' + outfile_name + '.json', 'w') as file_object: json.dump(chart_data, file_object) if __name__ == '__main__': device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(torch.cuda.get_device_name(0)) batch_size = 1024 hidden_size = 128 drop_pro = 0.1 learning_rate = 0.001 torch.manual_seed(3) dataset = Dataset('task1_re_train.csv') valset = Dataset('task1_re_val.csv', val=False) train_loader = DataLoader(dataset, batch_size=batch_size) teacher = Teacher(9, hidden_size, len(dataset.label_id)) model = Model(9 - 1, hidden_size, len(dataset.label_id)) teacher.load_state_dict(torch.load('task3_moreF7_2.pkl')) model.load_state_dict(torch.load('task3_student_4.pkl')) teacher = teacher.to(device) model = model.to(device) # optimizer = optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-5) optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate,
DEVICE = 'cuda' if cuda else 'cpu' def _collate_fn(data): # data.sort(key=lambda x: x.shape[1], reverse=True) # x = [] # lens = [] # for d in data: # x += [d[0].permute(1, 0)] # clean = pad_sequence(x, batch_first=True, padding_value=0) return data[0] train_x = np.load("adv-medium-to-long.npy", allow_pickle=True) train_dataset = Dataset(train_x) # print(train_dataset) train_loader_args = dict(shuffle=False, batch_size=1, num_workers=0, pin_memory=True, collate_fn=_collate_fn) if cuda \ else dict(shuffle=True, batch_size=1, collate_fn=_collate_fn) train_loader = DataLoader(train_dataset, **train_loader_args) train_clean = np.load("original-medium.npy", allow_pickle=True) train_clean_dataset = Dataset(train_clean) # print(train_clean_dataset) train_clean_loader = DataLoader(train_clean_dataset, **train_loader_args) class MLP(nn.Module):
predicted_labels = [] for X, _ in loader: output = model(X) _, predicted = torch.max(output.data, 1) predicted_labels.extend(predicted) return [x.item() for x in predicted_labels] if __name__ == "__main__": trainfilename = "./data/train.data" devfilename = "./data/dev.data" testfilename = "./data/test.data" outputfilename = "./outputs/extensionthree.output" train_X, train_y = readInDataFeatureVector(trainfilename) train_dataset = Dataset(train_X, train_y.reshape(-1, 1)) train_loader = DataLoader(train_dataset) test_X, test_y = readInDataFeatureVector(testfilename) test_dataset = Dataset(test_X, test_X) test_loader = DataLoader(test_dataset, shuffle=False) model = FNN() criterion = nn.CrossEntropyLoss() optimizer = optim.Adagrad(model.parameters(), lr=LEARNING_RATE) train(model, train_loader, criterion, optimizer) predictions = predict(model, test_loader) with open(outputfilename, 'w', encoding='utf8') as f:
import torch import matplotlib.pyplot as plt from torchvision.transforms import transforms from torch.utils.data import Dataset, DataLoader from DatasetCreation import * model = torch.load("entire_model.pt") Image_size = 16 composed = transforms.Compose( [transforms.Resize((Image_size, Image_size)), transforms.ToTensor()]) val_data = Dataset(train=False, transforms=composed) val_loader = DataLoader(val_data, batch_size=1) i = 0 for x, y in val_loader: if i > 10: break z = model(x) _, yhat = torch.max(z.data, 1) plt.imshow(x.numpy().reshape(Image_size, Image_size), cmap='gray') plt.title(f"y = {y}, Predicted = {yhat}") plt.show() print((yhat == y)) i += 1
def __init__(self, dataset: Dataset): super(MyDataSet, self).__init__() dataset.transform = transforms.Compose([transforms.ToTensor()]) self.dataset = dataset
def prepare_data(config): # prepare the train/test data for the model print('Loading data......') with open(config['load_file'], 'r', encoding='utf8') as f: data = f.read().splitlines() if config['mode'] == 'train': sents, labels = get_single_data(data, config) elif config['mode'] == 'test': sents, labels = split_single_data(data, config) elif config['mode'] == 'valid': with open(config['valid_file'], 'r', encoding='utf8') as f: data = f.read().splitlines() sents, labels = split_single_data(data, config) elif config['mode'] == 'multi-train': sents, labels = get_multi_data(data, config) elif config['mode'] == 'multi-test': sents, labels = split_multi_data(data, config) elif config['mode'] == 'multi-valid': with open(config['valid_file'], 'r', encoding='utf8') as f: data = f.read().splitlines() sents, labels = split_multi_data(data, config) print('Load data done!') tokenizer = BertTokenizer.from_pretrained(config['model_name']) vocab = tokenizer.vocab config['vocab_size'] = len(vocab) idx = list() for sent in sents: if type(sent) != list: continue tokenized_text = copy.deepcopy(sent) for i, c in enumerate(tokenized_text): if c in config['token_mapping_rule']: tokenized_text[i] = config['token_mapping_rule'][c] elif c not in vocab: tokenized_text[i] = '[UNK]' tokenized_text.insert(0, '[CLS]') tokenized_text.append('[SEP]') indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) idx.append(indexed_tokens) if config['mode'] == 'train' or config['mode'] == 'multi-train': train_data = Dataset(idx, labels) train_data_loader = DataLoader(dataset=train_data, batch_size=config['batch_size'], shuffle=config['shuffle'], collate_fn=collate_fn) return train_data_loader elif config['mode'] == 'test' or config['mode'] == 'multi-test' or config[ 'mode'] == 'valid' or config['mode'] == 'multi-valid': test_data = Dataset(idx, labels) test_data_loader = DataLoader(dataset=test_data, batch_size=config['batch_size'], shuffle=False, collate_fn=collate_fn) if 'multi' in config['mode']: for sent in sents: if type(sent) != list: pass else: del sent[-1] del sent[0] return sents, test_data_loader
def __init__(self, cfg, root, angle, transforms=None): # super(HpatchesV2, self).__init__() Dataset.__init__(self) self.img_paths = get_img_path(root) self.transforms = transforms self.angle = angle
''' if __name__ == "__main__": # Check for GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Running On:", device) # Define Constants num_classes = 10 num_epochs = 50 learning_rate = 0.01 batch_size = 200 # Load data train_data = Dataset(file_name_X="data/x_train.npy", file_name_Y="data/y_train.npy") test_data = Dataset(file_name_X="data/x_test.npy", file_name_Y="data/y_test.npy") # only do batching and shuffling on train_data train_data_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) #instance of the Conv Net cnn = CNN(num_classes) cnn.to(device) # Cross Entropy Loss and Gradient Descent crossEntropy = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(cnn.parameters(),
#out = F.relu(self.conv1(x)) #out = F.relu(self.conv2(out)) out=x.view(x.size(0),-1) out = F.sigmoid(self.fc1(out)) out=F.sigmoid(self.fc2(out)) return out # Specify the newtork architecture net = Net() # Specify the training dataset dataset = Dataset() dataset_test=Dataset_test() train_loader = DataLoader(dataset=dataset, batch_size=64, shuffle=True) test_loader= DataLoader(dataset=dataset_test,batch_size=64,shuffle=True) # Visualize the dataset def imshow(img): img = img / 2 + 0.5 # unnormalize npimg = img.numpy() plt.title('Visualize the dataset') plt.imshow(np.transpose(npimg, (1, 2, 0)))
# for i in image_datasets2['val'].imgs: partition['val'].append(i[0]) labels2[i[0]] = get_class_id(i[0]) # for i in image_datasets2['val_photo'].imgs: # partition['val_photo'].append(i[0]) # labels2[i[0]] = get_class_id(i[0]) batch_size = 10 params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 6} #train_set = Dataset(partition['train'], labels2, transform = data_transforms['train']) #train_generator = data2.DataLoader(train_set, **params) val_set = Dataset(partition['val'], labels2, transform=data_transforms['val']) val_generator = data2.DataLoader(val_set, **params) # val_photo_set = Dataset(partition['val_photo'], labels2, transform = data_transforms['val_photo']) # val_photo_generator = data2.DataLoader(val_photo_set, **params) dataloaders_3 = {} #dataloaders_3['train'] = train_generator #dataloaders_3['val'] = val_generator # dataloaders_3['val_photo'] = val_photo_generator test_dataloader = val_generator # test_dataloader2 = val_photo_generator print("Finished loading data") """ best_acc = 0.0
if args.control: word_tokens_train, pos_tokens_train = tasks.make_control(tokenizer, word_tokens_train, pos_tokens_train, args.embsize) torch_ids_train, torch_masks_train, torch_token_starts, torch_labels_train = r.prepare_data(tokenizer, word_tokens_train, pos_tokens_train) # data for training split = int(0.75 * len(torch_ids_train)) #dataset_train = Dataset(torch_ids_train[:split], torch_masks_train[:split], torch_labels_train[:split]) #dataset_dev = Dataset(torch_ids_train[split:], torch_masks_train[split:], torch_labels_train[split:]) config = T5Config.from_pretrained("t5-small", output_hidden_states=True, output_attentions=True) model = T5ForConditionalGeneration.from_pretrained("t5-small", config=config) model.to(device) #train(model, dataset_train, dataset_dev, torch_token_starts[split:], tokenizer) # 100 values test dataset_train = Dataset(torch_ids_train[:200], torch_masks_train[:200], torch_labels_train[:200]) dataset_dev = Dataset(torch_ids_train[200:400], torch_masks_train[200:400], torch_labels_train[200:400]) train(model, dataset_train, dataset_dev, torch_token_starts[200:400], tokenizer) print("done!") else: print("starting to evaluate") tokenizer = T5Tokenizer.from_pretrained("t5-small") word_tokens_test, pos_tokens_test = tasks.pos("UD_English-EWT/en_ewt-ud-test.conllu") if args.control: word_tokens_test, pos_tokens_test = tasks.make_control(tokenizer, word_tokens_test, pos_tokens_test, args.embsize) torch_ids_test, torch_masks_test, torch_token_starts, torch_labels_test = r.prepare_data(tokenizer, word_tokens_test, pos_tokens_test) # data for evluating
sample = { 'image': img_file, 'label': self.inverse_transform[img_name.split('/')[1]] } return sample # In[4]: # Change type to tensor trans_1 = transforms.ToTensor() trans_2 = transforms.RandomCrop((111, 111), padding=(0, 0), pad_if_needed=True) trans = transforms.Compose([trans_1]) transformed_dataset = Dataset(trans) # In[65]: dataloader = DataLoader(transformed_dataset, batch_size=16, shuffle=True) # In[70]: model = LeNet() model.cuda() # In[67]: criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01)
import json import torch from torch.utils.data import Dataset import torch.utils.data from models import * from utils import * train_loader = torch.utils.data.DataLoader(Dataset(), batch_size=100, shuffle=True, pin_memory=True) d_model = 512 heads = 8 num_layers = 6 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") epochs = 10 with open('WORDMAP_corpus.json', 'r') as j: word_map = json.load(j) transformer = Transformer(d_model=d_model, heads=heads, num_layers=num_layers, word_map=word_map) transformer = transformer.to(device) adam_optimizer = torch.optim.Adam(transformer.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9) transformer_optimizer = AdamWarmup(model_size=d_model,
Dataset = CaltechDataset collate_fn = default_collate csv_file = 'test_pairs_caltech_with_category.csv' elif args.eval_dataset=='tss': Dataset = TSSDataset collate_fn = default_collate csv_file = 'test_pairs_tss.csv' elif args.eval_dataset=='pascal-parts': Dataset = PascalPartsDataset collate_fn = collate_custom csv_file = 'test_pairs_pascal_parts.csv' cnn_image_size=(args.image_size,args.image_size) dataset = Dataset(csv_file=os.path.join(args.eval_dataset_path, csv_file), dataset_path=args.eval_dataset_path, transform=NormalizeImageDict(['source_image','target_image']), output_size=cnn_image_size) if use_cuda: batch_size=8 else: batch_size=1 dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=collate_fn) batch_tnf = BatchTensorToVars(use_cuda=use_cuda) if args.eval_dataset=='pf' or args.eval_dataset=='pf-pascal':
def make_data(hm_channels=1, channel_size=5, min_seq_len=50, max_seq_len=75, data_size=200, from_file=None, from_obj=None): return Dataset(hm_channels, channel_size, min_seq_len, max_seq_len, data_size, from_file, from_obj)
print('using Adam optimizer') optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) cnn_image_size = (args.image_size, args.image_size) Dataset = ImagePairDataset train_csv = 'train_pairs.csv' test_csv = 'val_pairs.csv' normalization_tnf = NormalizeImageDict(['source_image', 'target_image']) batch_preprocessing_fn = BatchTensorToVars(use_cuda=use_cuda) # Dataset and dataloader dataset = Dataset(transform=normalization_tnf, dataset_image_path=args.dataset_image_path, dataset_csv_path=args.dataset_csv_path, dataset_csv_file=train_csv, output_size=cnn_image_size) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=0) dataset_test = Dataset(transform=normalization_tnf, dataset_image_path=args.dataset_image_path, dataset_csv_path=args.dataset_csv_path, dataset_csv_file=test_csv, output_size=cnn_image_size) dataloader_test = DataLoader(dataset_test,
def fit(df, df_kfold): skf = StratifiedKFold(n_splits=CFG.num_fold, shuffle=True, random_state=CFG.seed) KFOLD = [(idxT, idxV) for i, (idxT, idxV) in enumerate( skf.split(np.arange(df_kfold.shape[0]), df_kfold['stratify_group']))] # for i in range(5): # (idxT, idxV) = folds[i] # print(f'fold {i + 1}') # print('train') # print((df_kfold['source'].iloc[idxT]).value_counts()) # print('val') # print((df_kfold['source'].iloc[idxV]).value_counts()) for fold, (idxT, idxV) in enumerate(KFOLD, 1): LOGGER.info(f"Training starts ... KFOLD: {fold}/{CFG.num_fold}") train = df.loc[idxT, :].reset_index(drop=True) val = df.loc[idxV, :].reset_index(drop=True) dataset_train = Dataset(df=train, data_dir=CFG.data_dir, transform=transform_train()) loader_train = DataLoader(dataset_train, batch_size=CFG.batch_size, sampler=RandomSampler(dataset_train), pin_memory=False, drop_last=True, num_workers=CFG.num_workers, collate_fn=collate_fn) dataset_val = Dataset(df=val, data_dir=CFG.data_dir, transform=transform_val()) loader_val = DataLoader(dataset_val, batch_size=CFG.batch_size, sampler=SequentialSampler(dataset_val), pin_memory=False, drop_last=True, num_workers=CFG.num_workers, collate_fn=collate_fn) config = get_efficientdet_config('tf_efficientdet_d5') net = EfficientDet(config, pretrained_backbone=False) checkpoint = torch.load( '/home/kerrykim/jupyter_notebook/008.wheat_detection/efficientdet/pretrained/efficientdet_d5-ef44aea8.pth' ) # the pretrained weights need to be loaded first, # before you modify the head, or you need to modify the state dict to exclude the old head tensors if. # you want to do it after. config.num_classes = 1 config.image_size = 512 net.load_state_dict(checkpoint) net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01)) net = (DetBenchTrain(net, config)).to(device) optim = torch.optim.AdamW(net.parameters(), lr=CFG.lr) scheduler = ReduceLROnPlateau(optim, **CFG.scheduler_params) # default value st_epoch = 0 best_loss = 1e20 for epoch in range(st_epoch + 1, CFG.num_epoch + 1): # if epoch < 5: # continue start_time = time.time() # train avg_train_loss = train_one_epoch(loader_train, net, optim, epoch, device) # val avg_val_loss = val_one_epoch(loader_val, net, device) scheduler.step(metrics=avg_val_loss) # scoring elapsed = time.time() - start_time LOGGER.info( f'Epoch {epoch} - avg_train_loss: {avg_train_loss:.4f} avg_val_loss: {avg_val_loss:.4f} time: {elapsed:.0f}s' ) # save best model save_argument = best_loss > avg_val_loss best_loss = min(avg_val_loss, best_loss) LOGGER.info( f'Epoch {epoch} - Save Best Loss: {best_loss:.4f} Model') save_model(ckpt_dir=CFG.ckpt_dir, net=net, num_epoch=CFG.num_epoch, fold=fold, epoch=epoch, batch=CFG.batch_size, save_argument=save_argument)
with open('./image/' + outfile_name + '.json', 'w') as file_object: json.dump(chart_data, file_object) drop_pro = 0.01 if __name__ == '__main__': device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(torch.cuda.get_device_name(0)) batch_size = 256 hidden_size = 128 drop_pro = 0.01 learning_rate = 0.001 torch.manual_seed(3) dataset = Dataset('train.csv') valset = Dataset('task2_val.csv', val=False) train_loader = DataLoader(dataset, batch_size=batch_size) model = Model(14, hidden_size, len(dataset.label_id)) # model.load_state_dict(torch.load('task2_moreF15_2.pkl')) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) max_acc = get_acc(valset, model) print(max_acc) # get_upload(model) # print(get_acc(dataset,model)) # exit() chart_data = {
def __init__(self, config: ModelConfigBase) -> None: super().__init__() self.config = config self.train_data: Dataset = Dataset() self.val_data: Dataset = Dataset() self.test_data: Dataset = Dataset()
def __init__(self, margin=2.0): super(ContrastiveLoss, self).__init__() self.margin = margin def forward(self, output1, output2, label): euclidean_distance = F.pairwise_distance(output1, output2) loss_contrastive = torch.mean( (1 - label) * torch.pow(euclidean_distance, 2) + (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)) return loss_contrastive if train == True: ## Training Data ## trainset = Dataset(train=True, split_size=.8) trainloader = DataLoader(dataset=trainset, shuffle=True, batch_size=batch_size) model = SiameseNetwork().to(device) model.train() optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.5, 0.9)) criterion = nn.BCELoss() for epoch in range(epochs): avg_loss = [] for i, (a, b, c) in tqdm(enumerate(trainloader)): a, b, c = (a.permute(0, 3, 1, 2).to(device).type(torch.float32), b.permute(0, 3, 1, 2).to(device).type(torch.float32),
import argparse import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import Dataset, DataLoader from torchvision import transforms #from torchvision import datasets, transforms from torch.autograd import Variable # Training settings batch_size = 64 # MNIST Dataset train_dataset = Dataset.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = Dataset.MNIST(root='./data/', train=False, transform=transforms.ToTensor()) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
#torchaudio.transforms.Spectrogram(n_fft=400, win_length=None, hop_length=None, pad=0, window_fn=None, power=2, normalized=False, wkwargs=None) import matplotlib.pyplot as plt from scipy.io.wavfile import read if __name__ == '__main__': import numpy as np win_length = 128 window = np.sqrt(scipy.signal.get_window('hann', win_length)) window = torch.tensor(window).float() window_fn = torch.hann_window def window_fn(): return torch.sqrt(torch.hann_window) input_dir = '.' ds_mix_PT = Dataset(input_dir=input_dir, data_scp='wav.scp', transform=STFT_PT(128, window=window, abs_val=False, log_val=False, transpose=False)) # batch size bs = 4 dl_mix = DataLoader(ds_mix_PT, batch_size=bs, shuffle=False, num_workers=1, collate_fn=custom_collate_cp) # Loop through data for i_batch, sample_batched in enumerate(dl_mix): print("{} is dtype, {} is data shape".format(sample_batched[0].dtype,sample_batched[0].shape)) print("Batch index: {} (batch size {})".format(i_batch, bs)) # Loop throug a batch for b in sample_batched: stft_b = b.float() reconstructed = torchaudio.functional.istft( torch.reshape(stft_b, stft_b.shape), n_fft=win_length, win_length=win_length, hop_length=int(win_length / 2), window=window)
def __init__(self, obj): self.len = len(obj) self.x = torch.LongTensor(obj[:, 0]) self.y = torch.LongTensor(obj[:, 1]) def __getitem__(self, index): return (self.x[index], self.y[index]) def __len__(self): return self.len ############################################### # train dataset = Dataset(pair_ids) data = DataLoader(dataset=dataset, batch_size=int(dataset.len / 3), shuffle=True) model = SkipGram(len(uniques), 10) # import ipdb; ipdb.set_trace() criterion = torch.nn.NLLLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001) trainer = create_supervised_trainer(model, optimizer, criterion) evaluator = create_supervised_evaluator(model, metrics={"accuracy": Accuracy()}) @trainer.on(Events.EPOCH_COMPLETED)
def test_dataloader(self): dataset = Dataset() return DataLoader(dataset, batch_size=self.train_batchsize, shuffle=True, num_workers=self.num_workers)