def __init__(self, batch=1): self.batch = batch trainDataset = CommonVoice( root_dir= '/media/pranjal/Seagate Backup Plus Drive/DataASR/Russian/ru', type='train') self.trainloader = DL(trainDataset, batch_size=self.batch, shuffle=True, num_workers=2) testDataset = CommonVoice( root_dir= '/media/pranjal/Seagate Backup Plus Drive/DataASR/Russian/ru', type='test') self.testloader = DL(testDataset, batch_size=self.batch, shuffle=True, num_workers=2)
def get_data_loaders(args): # Data augmentation and normalization for training # Just resizing and normalization for val and test trans_dict = { 'train': transforms.Compose([ transforms.RandomResizedCrop(args.input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), cfg.NORMALIZE ]), 'val': transforms.Compose([ transforms.Resize(args.input_size), transforms.CenterCrop(args.input_size), transforms.ToTensor(), cfg.NORMALIZE ]), 'test': transforms.Compose([ transforms.Resize(args.input_size), transforms.CenterCrop(args.input_size), transforms.ToTensor(), cfg.NORMALIZE ]) } # Create training, validation and test datasets img_folders = { x: IF(os.path.join(cfg.DATA_DIR, 'prepared', x), trans_dict[x]) for x in cfg.PHASES } # Create training, validation and test dataloaders # When using CUDA, set num_workers=1 and pin_memory=True data_loaders = { x: DL(img_folders[x], batch_size=args.batch_size, shuffle=True, num_workers=int(args.device == 'cuda'), pin_memory=(args.device == 'cuda')) for x in cfg.PHASES } if args.display_images: display_single(args, img_folders, img_id=153) # 153 is arbitrarily chosen display_multiple(args, data_loaders, N=4) # display 4 images return data_loaders
def renew(self, resl): print( '[*] Renew data_loader configuration, load image from {}.'.format( self.root)) self.batch_size = int(self.batch_table[pow(2, resl)]) self.im_size = int(pow(2, resl)) print('[*] batch_size = {} , im_size = ({},{})'.format( self.batch_size, self.im_size, self.im_size)) self.dataset.transforms = transforms.Compose([ transforms.Resize(size=(self.im_size, self.im_size), interpolation=Image.NEAREST), transforms.ToTensor(), ]) print('[*] transforms = {})'.format(self.dataset.transforms)) self.data_loader = DL(dataset=self.dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
class Dataset(Dataset): def __init__(self, x, y): self.x = x self.y = y def __getitem__(self, index): return self.x[index], self.y[index] def __len__(self): return len(self.x) dataset = Dataset(X, y) # Loading Data: train_loader = DL(dataset = dataset, batch_size = 32, shuffle = True) # Neural Network: class NeuralNet(nn.Module): def __init__(self, input_features, hidden_layer1, hidden_layer2, hidden_layer3, output_features): super(NeuralNet, self).__init__() # inheriting from the parent class # layers: self.input_l = nn.Linear(input_features, hidden_layer1) self.hidden_l1 = nn.Linear(hidden_layer1, hidden_layer2) self.hidden_l2 = nn.Linear(hidden_layer2, hidden_layer3) self.hidden_l3 = nn.Linear(hidden_layer3, output_features) # activation functions: self.sigmoid = nn.Sigmoid() self.tanh = nn.Tanh()
def accuracy_check(classifiers, logdir, as_bit=True, as_half=False, iscxvpn2016_pcap="D://Datasets/ISCXVPN2016/", iscxvpn2016_h5="D://Datasets/packets-50k/", ustctfc2016_pcap="D://Datasets/USTC-TFC2016/", ustctfc2016_h5="D://Datasets/USTC-TFC2016-packets-50k/", max_packets_on_cache=50000, random_seed=2020, batch_size=20000, n_support=10): iscxvpn2016_loader = iscxvpn2016(pcap_dir=iscxvpn2016_pcap, h5_dir=iscxvpn2016_h5, max_packets_on_cache=max_packets_on_cache, as_bit=as_bit, verbose=True) ustctfc2016_loader = ustctfc2016(pcap_dir=ustctfc2016_pcap, h5_dir=ustctfc2016_h5, max_packets_on_cache=max_packets_on_cache, as_bit=as_bit, verbose=True) dataset_collections = { "seen": { "aim": iscxvpn2016_loader("aim"), "email": iscxvpn2016_loader("email", "gmail"), "facebook": iscxvpn2016_loader("facebook"), "ftps": iscxvpn2016_loader("ftps"), "gmail": iscxvpn2016_loader("gmail"), "hangouts": iscxvpn2016_loader("hangouts"), "icq": iscxvpn2016_loader("icq"), "youtube": iscxvpn2016_loader("youtube"), "netflix": iscxvpn2016_loader("netflix"), "scp": iscxvpn2016_loader("scp"), "sftp": iscxvpn2016_loader("sftp"), "skype": iscxvpn2016_loader("skype"), "spotify": iscxvpn2016_loader("spotify"), "vimeo": iscxvpn2016_loader("vimeo"), "torrent": iscxvpn2016_loader("torrent"), }, "unseen": { k: ustctfc2016_loader(k) for k in ustctfc2016_loader.metadata.names() }, "vpntest": { "vpn": iscxvpn2016_loader("vpn"), "novpn": iscxvpn2016_loader(None, "vpn"), }, "maltest": { "benign": ustctfc2016_loader( filter(ustctfc2016_loader.metadata.is_benign, ustctfc2016_loader.metadata.names())), "malware": ustctfc2016_loader( filter(ustctfc2016_loader.metadata.is_malware, ustctfc2016_loader.metadata.names())), } } for dataset_name, seen_dic in dataset_collections.items(): dataset_manager = FewshotDatasetManager( seen_classes=seen_dic, # unseen_classes={k: ustctfc2016_loader(k) for k in ustctfc2016_loader.metadata.names()}, n_classes=5, n_support=10, n_queries=batch_size, all_classes_val=True) # datasets_alias = { # seen_datasets_name: "seen", # unseen_datasets_name: "unseen", # "vpn,novpn": "vpntest", # "benign,malware": "malwaretest" # } for classifier_name, classifier in classifiers.items(): print( "----------------------------------------------------------------------------------" ) print(classifier_name) ckpt_file = glob(f"{logdir}/" + classifier_name + "/default/**/checkpoints/*.ckpt") if len(ckpt_file) == 0: print("CHECKPOINT not found:", ckpt_file) if len(list(classifier.parameters())) == 0: print("BUT it has no paraeters so ITS OKAY") else: continue else: ckpt_file = ckpt_file[-1] if isinstance(classifier, pl.LightningModule): solver = classifier else: solver = FewshotSolver(classifier) if isinstance(ckpt_file, str): solver.load_state_dict(torch.load(ckpt_file)["state_dict"]) solver = solver.cuda() test_loader_list = [] proto_loader_list = [] print("Seen Classes") for key in dataset_manager.datasets_test_seen.keys(): a = dataset_manager.datasets_test_seen[key] dataload = DL(a, batch_size=batch_size) test_loader_list.append(dataload) print(key) print("----------------------------------------------------") print("Val Classes") for key in dataset_manager.datasets_val_seen.keys(): b = dataset_manager.datasets_val_seen[key] diff_loader = iter( DL(b, batch_size=n_support, sampler=InfiniteSampler(len(b)))) proto_loader_list.append(diff_loader) num_classes = len(proto_loader_list) predictions = torch.zeros(num_classes, num_classes) rootname = f"{'bit' if as_bit else 'byte'}-{'f16' if as_half else 'f32'}-{classifier_name}" with torch.no_grad(): for d_idx in tqdm(range(len(test_loader_list))): for i, d in enumerate(tqdm(test_loader_list[d_idx])): d = d.cuda() support = [] for p_list in proto_loader_list: hotdog = p_list.next() support.append(hotdog.cuda()) s = solver(d, *support) s = torch.argmax(s, dim=1) s = torch.bincount(s, minlength=num_classes).cpu() predictions[d_idx] = predictions[d_idx] + s dirpath = f"./raw_predictions/{dataset_name}" os.makedirs(dirpath, exist_ok=True) torch.save(predictions, f"{dirpath}/{rootname}.pt") with open(f"{dirpath}/{rootname}.txt", "w+") as f: f.write("\n".join( list(dataset_manager.datasets_test_seen.keys())))
t.ToTensor(), t.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_data = c([ t.Resize(255), t.CenterCrop(224), t.ToTensor(), t.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) training_dataset = d.ImageFolder(train_dir, transform=training_data) valid_dataset = d.ImageFolder(valid_dir, transform=valid_data) test_dataset = d.ImageFolder(test_dir, transform=test_data) train_loader = DL(training_dataset, batch_size=32, shuffle=True) validation_loader = DL(valid_dataset, batch_size=32) test_loader = DL(test_dataset, batch_size=32) import json with open('cat_to_name.json', 'r') as f: cat_to_name = json.load(f) # TODO: Build and train your network # Load a pre-trained network # (If you need a starting point, the VGG networks work great and are straightforward to use) from torchvision import models
def load_data(data_dir): ''' Load data set with torchvision's ImageFolder Parameters: data_dir: path to the image folder. Required subdirectories are "train", "valid", and "test" Returns: parse_args() - data structure that stores the CLA object ''' # Path data_dir = 'flowers' train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' data_transforms = { 'training': transforms.Compose([ transforms.RandomRotation(45), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'validation': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'testing': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } #Image datasets image_datasets = { 'training': datasets.ImageFolder(train_dir, transform=data_transforms['training']), 'validation': datasets.ImageFolder(valid_dir, transform=data_transforms['validation']), 'testing': datasets.ImageFolder(test_dir, transform=data_transforms['testing']), } #Dataloaders dataloaders = { 'training': DL(image_datasets['training'], batch_size=32, shuffle=True), 'validation': DL(image_datasets['validation'], batch_size=16, shuffle=False), 'testing': DL(image_datasets['testing'], batch_size=16, shuffle=False) } return image_datasets['training'], dataloaders['training'], dataloaders[ 'validation']
if args.local_rank in [-1, 0]: if not os.path.exists(args.id): os.mkdir(args.id) tb_writer = SummaryWriter( log_dir='tensorboard/GPT2-{}'.format(args.model)) dataset = GPTTableDataset2('data/train_lm_preprocessed.json', tokenizer, args.max_len) if args.local_rank == -1: sampler = RandomSampler(dataset) else: sampler = DistributedSampler(dataset) train_dataloader = DL(dataset, sampler=sampler, batch_size=args.batch_size, num_workers=0) model.train() optimizer = optim.Adam(model.parameters(), args.learning_rate) avg_loss = 0 global_step = 0 if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) else:
tarnsform = transforms.ToTensor() noise = np.load(noise_dir) fx = ( AudioEffectsChain().highshelf().reverb().phaser() # .delay() .lowshelf()) model = UNet(config) checkpoint = torch.load('Exp/19250_2_checkpoint.pth.tar') model.load_state_dict(checkpoint['state_dict']) model.eval() # model.eval() dataset = CommonVoice() trainLoader = DL(dataset, batch_size=1, shuffle=False, num_workers=2) # print(model) for no, data in enumerate(trainLoader): print(data.size()) data = data.to('cuda') model.cuda() output = model(data) output = output.detach().cpu().numpy() data = data.detach().cpu().numpy()
batch_size = 32 #### 准备数据集 transform_test = torchvision.transforms.Compose([ torchvision.transforms.Resize(256), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize( [0.485, 0.456, 0.406], [0.229, 0.224, 0.225], ), ]) # 封装 test_data_path = os.path.join(data_dir, test_dir) test_data = IF(test_data_path, transform=transform_test) test_iter = DL(test_data, batch_size, shuffle=False) #### 加载模型 model = models.resnet18(pretrained=False) model.fc = nn.Linear(512, 2) model.load_state_dict(torch.load(PATH)) # print(model.state_dict()) #### 预测结果 preds = [] for X, _ in test_iter: y_hat = model(X) preds.extend(y_hat.cpu()) # 将每个样本的预测值计算出来。预测结果如 0,1 print(preds[0]) # #### 映射到具体类