def __init__(self, batch=1):
        self.batch = batch

        trainDataset = CommonVoice(
            root_dir=
            '/media/pranjal/Seagate Backup Plus Drive/DataASR/Russian/ru',
            type='train')
        self.trainloader = DL(trainDataset,
                              batch_size=self.batch,
                              shuffle=True,
                              num_workers=2)

        testDataset = CommonVoice(
            root_dir=
            '/media/pranjal/Seagate Backup Plus Drive/DataASR/Russian/ru',
            type='test')
        self.testloader = DL(testDataset,
                             batch_size=self.batch,
                             shuffle=True,
                             num_workers=2)
Ejemplo n.º 2
0
def get_data_loaders(args):
    # Data augmentation and normalization for training
    # Just resizing and normalization for val and test
    trans_dict = {
        'train':
        transforms.Compose([
            transforms.RandomResizedCrop(args.input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), cfg.NORMALIZE
        ]),
        'val':
        transforms.Compose([
            transforms.Resize(args.input_size),
            transforms.CenterCrop(args.input_size),
            transforms.ToTensor(), cfg.NORMALIZE
        ]),
        'test':
        transforms.Compose([
            transforms.Resize(args.input_size),
            transforms.CenterCrop(args.input_size),
            transforms.ToTensor(), cfg.NORMALIZE
        ])
    }

    # Create training, validation and test datasets
    img_folders = {
        x: IF(os.path.join(cfg.DATA_DIR, 'prepared', x), trans_dict[x])
        for x in cfg.PHASES
    }

    # Create training, validation and test dataloaders
    # When using CUDA, set num_workers=1 and pin_memory=True
    data_loaders = {
        x: DL(img_folders[x],
              batch_size=args.batch_size,
              shuffle=True,
              num_workers=int(args.device == 'cuda'),
              pin_memory=(args.device == 'cuda'))
        for x in cfg.PHASES
    }

    if args.display_images:
        display_single(args, img_folders,
                       img_id=153)  # 153 is arbitrarily chosen
        display_multiple(args, data_loaders, N=4)  # display 4 images

    return data_loaders
Ejemplo n.º 3
0
 def renew(self, resl):
     print(
         '[*] Renew data_loader configuration, load image from {}.'.format(
             self.root))
     self.batch_size = int(self.batch_table[pow(2, resl)])
     self.im_size = int(pow(2, resl))
     print('[*] batch_size = {} , im_size = ({},{})'.format(
         self.batch_size, self.im_size, self.im_size))
     self.dataset.transforms = transforms.Compose([
         transforms.Resize(size=(self.im_size, self.im_size),
                           interpolation=Image.NEAREST),
         transforms.ToTensor(),
     ])
     print('[*] transforms = {})'.format(self.dataset.transforms))
     self.data_loader = DL(dataset=self.dataset,
                           batch_size=self.batch_size,
                           shuffle=True,
                           num_workers=self.num_workers)
class Dataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)
    
dataset = Dataset(X, y)

# Loading Data:
train_loader = DL(dataset = dataset,
                  batch_size = 32,
                  shuffle = True)

# Neural Network:

class NeuralNet(nn.Module):
    def __init__(self, input_features, hidden_layer1, hidden_layer2, hidden_layer3, output_features):
        super(NeuralNet, self).__init__()     # inheriting from the parent class
    # layers:
        self.input_l = nn.Linear(input_features, hidden_layer1)
        self.hidden_l1 = nn.Linear(hidden_layer1, hidden_layer2)
        self.hidden_l2 = nn.Linear(hidden_layer2, hidden_layer3)
        self.hidden_l3 = nn.Linear(hidden_layer3, output_features)
    # activation functions:
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
Ejemplo n.º 5
0
def accuracy_check(classifiers,
                   logdir,
                   as_bit=True,
                   as_half=False,
                   iscxvpn2016_pcap="D://Datasets/ISCXVPN2016/",
                   iscxvpn2016_h5="D://Datasets/packets-50k/",
                   ustctfc2016_pcap="D://Datasets/USTC-TFC2016/",
                   ustctfc2016_h5="D://Datasets/USTC-TFC2016-packets-50k/",
                   max_packets_on_cache=50000,
                   random_seed=2020,
                   batch_size=20000,
                   n_support=10):

    iscxvpn2016_loader = iscxvpn2016(pcap_dir=iscxvpn2016_pcap,
                                     h5_dir=iscxvpn2016_h5,
                                     max_packets_on_cache=max_packets_on_cache,
                                     as_bit=as_bit,
                                     verbose=True)
    ustctfc2016_loader = ustctfc2016(pcap_dir=ustctfc2016_pcap,
                                     h5_dir=ustctfc2016_h5,
                                     max_packets_on_cache=max_packets_on_cache,
                                     as_bit=as_bit,
                                     verbose=True)

    dataset_collections = {
        "seen": {
            "aim": iscxvpn2016_loader("aim"),
            "email": iscxvpn2016_loader("email", "gmail"),
            "facebook": iscxvpn2016_loader("facebook"),
            "ftps": iscxvpn2016_loader("ftps"),
            "gmail": iscxvpn2016_loader("gmail"),
            "hangouts": iscxvpn2016_loader("hangouts"),
            "icq": iscxvpn2016_loader("icq"),
            "youtube": iscxvpn2016_loader("youtube"),
            "netflix": iscxvpn2016_loader("netflix"),
            "scp": iscxvpn2016_loader("scp"),
            "sftp": iscxvpn2016_loader("sftp"),
            "skype": iscxvpn2016_loader("skype"),
            "spotify": iscxvpn2016_loader("spotify"),
            "vimeo": iscxvpn2016_loader("vimeo"),
            "torrent": iscxvpn2016_loader("torrent"),
        },
        "unseen": {
            k: ustctfc2016_loader(k)
            for k in ustctfc2016_loader.metadata.names()
        },
        "vpntest": {
            "vpn": iscxvpn2016_loader("vpn"),
            "novpn": iscxvpn2016_loader(None, "vpn"),
        },
        "maltest": {
            "benign":
            ustctfc2016_loader(
                filter(ustctfc2016_loader.metadata.is_benign,
                       ustctfc2016_loader.metadata.names())),
            "malware":
            ustctfc2016_loader(
                filter(ustctfc2016_loader.metadata.is_malware,
                       ustctfc2016_loader.metadata.names())),
        }
    }

    for dataset_name, seen_dic in dataset_collections.items():
        dataset_manager = FewshotDatasetManager(
            seen_classes=seen_dic,
            # unseen_classes={k: ustctfc2016_loader(k) for k in ustctfc2016_loader.metadata.names()},
            n_classes=5,
            n_support=10,
            n_queries=batch_size,
            all_classes_val=True)

        # datasets_alias = {
        #     seen_datasets_name: "seen",
        #     unseen_datasets_name: "unseen",
        #     "vpn,novpn": "vpntest",
        #     "benign,malware": "malwaretest"
        # }

        for classifier_name, classifier in classifiers.items():

            print(
                "----------------------------------------------------------------------------------"
            )
            print(classifier_name)

            ckpt_file = glob(f"{logdir}/" + classifier_name +
                             "/default/**/checkpoints/*.ckpt")
            if len(ckpt_file) == 0:
                print("CHECKPOINT not found:", ckpt_file)
                if len(list(classifier.parameters())) == 0:
                    print("BUT it has no paraeters so ITS OKAY")
                else:
                    continue
            else:
                ckpt_file = ckpt_file[-1]

            if isinstance(classifier, pl.LightningModule):
                solver = classifier
            else:
                solver = FewshotSolver(classifier)

            if isinstance(ckpt_file, str):
                solver.load_state_dict(torch.load(ckpt_file)["state_dict"])
            solver = solver.cuda()

            test_loader_list = []
            proto_loader_list = []

            print("Seen Classes")
            for key in dataset_manager.datasets_test_seen.keys():
                a = dataset_manager.datasets_test_seen[key]
                dataload = DL(a, batch_size=batch_size)
                test_loader_list.append(dataload)
                print(key)
            print("----------------------------------------------------")

            print("Val Classes")
            for key in dataset_manager.datasets_val_seen.keys():
                b = dataset_manager.datasets_val_seen[key]
                diff_loader = iter(
                    DL(b,
                       batch_size=n_support,
                       sampler=InfiniteSampler(len(b))))
                proto_loader_list.append(diff_loader)

            num_classes = len(proto_loader_list)
            predictions = torch.zeros(num_classes, num_classes)

            rootname = f"{'bit' if as_bit else 'byte'}-{'f16' if as_half else 'f32'}-{classifier_name}"
            with torch.no_grad():
                for d_idx in tqdm(range(len(test_loader_list))):
                    for i, d in enumerate(tqdm(test_loader_list[d_idx])):
                        d = d.cuda()
                        support = []
                        for p_list in proto_loader_list:
                            hotdog = p_list.next()
                            support.append(hotdog.cuda())

                        s = solver(d, *support)
                        s = torch.argmax(s, dim=1)
                        s = torch.bincount(s, minlength=num_classes).cpu()
                        predictions[d_idx] = predictions[d_idx] + s

                dirpath = f"./raw_predictions/{dataset_name}"
                os.makedirs(dirpath, exist_ok=True)
                torch.save(predictions, f"{dirpath}/{rootname}.pt")
                with open(f"{dirpath}/{rootname}.txt", "w+") as f:
                    f.write("\n".join(
                        list(dataset_manager.datasets_test_seen.keys())))
Ejemplo n.º 6
0
    t.ToTensor(),
    t.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_data = c([
    t.Resize(255),
    t.CenterCrop(224),
    t.ToTensor(),
    t.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

training_dataset = d.ImageFolder(train_dir, transform=training_data)
valid_dataset = d.ImageFolder(valid_dir, transform=valid_data)
test_dataset = d.ImageFolder(test_dir, transform=test_data)

train_loader = DL(training_dataset, batch_size=32, shuffle=True)
validation_loader = DL(valid_dataset, batch_size=32)
test_loader = DL(test_dataset, batch_size=32)

import json

with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

# TODO: Build and train your network
# Load a pre-trained network
# (If you need a starting point, the VGG networks work great and are straightforward to use)

from torchvision import models

Ejemplo n.º 7
0
def load_data(data_dir):
    '''
    Load data set with torchvision's ImageFolder
    Parameters:
        data_dir: path to the image folder. Required subdirectories are "train", "valid", and "test"
    Returns:
        parse_args() - data structure that stores the CLA object
    '''

    # Path
    data_dir = 'flowers'
    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/valid'
    test_dir = data_dir + '/test'

    data_transforms = {
        'training':
        transforms.Compose([
            transforms.RandomRotation(45),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'validation':
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'testing':
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    #Image datasets
    image_datasets = {
        'training':
        datasets.ImageFolder(train_dir, transform=data_transforms['training']),
        'validation':
        datasets.ImageFolder(valid_dir,
                             transform=data_transforms['validation']),
        'testing':
        datasets.ImageFolder(test_dir, transform=data_transforms['testing']),
    }

    #Dataloaders
    dataloaders = {
        'training':
        DL(image_datasets['training'], batch_size=32, shuffle=True),
        'validation':
        DL(image_datasets['validation'], batch_size=16, shuffle=False),
        'testing':
        DL(image_datasets['testing'], batch_size=16, shuffle=False)
    }

    return image_datasets['training'], dataloaders['training'], dataloaders[
        'validation']
Ejemplo n.º 8
0
        if args.local_rank in [-1, 0]:
            if not os.path.exists(args.id):
                os.mkdir(args.id)
            tb_writer = SummaryWriter(
                log_dir='tensorboard/GPT2-{}'.format(args.model))

        dataset = GPTTableDataset2('data/train_lm_preprocessed.json',
                                   tokenizer, args.max_len)

        if args.local_rank == -1:
            sampler = RandomSampler(dataset)
        else:
            sampler = DistributedSampler(dataset)

        train_dataloader = DL(dataset,
                              sampler=sampler,
                              batch_size=args.batch_size,
                              num_workers=0)

        model.train()
        optimizer = optim.Adam(model.parameters(), args.learning_rate)

        avg_loss = 0
        global_step = 0

        if args.local_rank != -1:
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[args.local_rank],
                output_device=args.local_rank,
                find_unused_parameters=True)
        else:
Ejemplo n.º 9
0
tarnsform = transforms.ToTensor()

noise = np.load(noise_dir)
fx = (
    AudioEffectsChain().highshelf().reverb().phaser()
    # .delay()
    .lowshelf())

model = UNet(config)
checkpoint = torch.load('Exp/19250_2_checkpoint.pth.tar')
model.load_state_dict(checkpoint['state_dict'])
model.eval()
# model.eval()

dataset = CommonVoice()
trainLoader = DL(dataset, batch_size=1, shuffle=False, num_workers=2)

# print(model)

for no, data in enumerate(trainLoader):

    print(data.size())

    data = data.to('cuda')

    model.cuda()
    output = model(data)

    output = output.detach().cpu().numpy()
    data = data.detach().cpu().numpy()
Ejemplo n.º 10
0
batch_size = 32

#### 准备数据集
transform_test = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225],
    ),
])
# 封装
test_data_path = os.path.join(data_dir, test_dir)
test_data = IF(test_data_path, transform=transform_test)
test_iter = DL(test_data, batch_size, shuffle=False)

#### 加载模型
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(512, 2)
model.load_state_dict(torch.load(PATH))
# print(model.state_dict())

#### 预测结果
preds = []
for X, _ in test_iter:
    y_hat = model(X)
    preds.extend(y_hat.cpu())  # 将每个样本的预测值计算出来。预测结果如 0,1
print(preds[0])

# #### 映射到具体类