def get_coco(root, image_set, transforms, mode='instances'): anno_file_template = "{}_{}2017.json" PATHS = { "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) } t = [ConvertCocoPolysToMask()] if transforms is not None: t.append(transforms) transforms = T.Compose(t) img_folder, ann_file = PATHS[image_set] img_folder = os.path.join(root, img_folder) ann_file = os.path.join(root, ann_file) dataset = CocoDetection(img_folder, ann_file, transforms=transforms) if image_set == "train": dataset = _coco_remove_images_without_annotations(dataset) # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) return dataset
def BOT_recognition(model_BOT, img): transform = T.Compose([ T.Resize((256, 128)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) loader = DataLoader( Load_person(img, transform=transform), batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=False, ) model_BOT.eval() with torch.no_grad(): for batch_idx, img2 in enumerate(loader): if torch.cuda.is_available(): img2 = img2.cuda() # score = model_BOT(img2) gender_outputs, staff_outputs, customer_outputs, stand_outputs, sit_outputs, phone_outputs = model_BOT( img2) staff_con = float(Confidence(staff_outputs, 1)) customer_con = float(Confidence(customer_outputs, 1)) stand_con = float(Confidence(stand_outputs, 1)) sit_con = float(Confidence(sit_outputs, 1)) play_with_phone_con = float(Confidence(phone_outputs, 1)) male_con = float(Confidence(gender_outputs, 0)) female_con = float(Confidence(gender_outputs, 1)) staff_con = 1 if staff_con > 0.5 else 0 customer_con = 1 if customer_con > 0.5 else 0 stand_con = 1 if stand_con > 0.5 else 0 sit_con = 1 if sit_con > 0.5 else 0 play_with_phone_con = 1 if play_with_phone_con > 0.1 else 0 male_con = 1 if male_con > 0.5 else 0 female_con = 1 if female_con > 0.5 else 0 return staff_con, customer_con, stand_con, sit_con, play_with_phone_con, male_con, female_con
def get_transform(args): melkwargs = { 'n_mels': args.nfilt, 'n_fft': args.nfft, 'win_length': int(args.winlen * args.signal_sr), 'hop_length': int(args.winstep * args.signal_sr), } args.signal_samples = int(args.signal_sr * args.signal_len) args.signal_width = int( np.ceil((args.signal_len - args.winlen) / args.winstep) + 1) if args.features_name.lower() == 'logfbes': features = transforms.Compose([ transforms.LogFBEs(args.signal_sr, args.winlen, args.winstep, args.nfilt, args.nfft, args.preemph), transforms.ToTensor(), ]) args.nfeature = args.nfilt args.signal_width = int( np.ceil((args.signal_len - args.winlen) / args.winstep) + 1) elif args.features_name.lower() == 'mfccs': features = transforms.Compose([ transforms.MFCCs(args.signal_sr, args.winlen, args.winstep, args.numcep, args.nfilt, args.nfft, args.preemph, args.ceplifter), transforms.ToTensor(), ]) args.nfeature = args.numcep args.signal_width = int( np.ceil((args.signal_len - args.winlen) / args.winstep) + 1) elif args.features_name.lower() == 'ta.mfccs': features = transforms.Compose([ # transforms.ToTensor(), torchaudio.transforms.MFCC(sample_rate=args.signal_sr, n_mfcc=args.numcep, melkwargs=melkwargs), ]) args.nfeature = args.numcep args.signal_width = 1 + args.signal_samples // melkwargs['hop_length'] # args.signal_width = 81; elif args.features_name.lower() == 'ta.logfbes': log_offset = 1e-6 features = transforms.Compose([ # transforms.ToTensor(), torchaudio.transforms.MelSpectrogram(sample_rate=args.signal_sr, **melkwargs), transforms.Lambda(lambda t: torch.log(t + log_offset)), ]) args.nfeature = args.nfilt args.signal_width = 1 + args.signal_samples // melkwargs['hop_length'] # args.signal_width = 81; else: raise Exception( '--features_name should be one of {LogFBEs | MFCCs | ta.MFCCs | ta.LogFBEs}' ) # Composing transforms test_trasform = transforms.Compose([ features, transforms.Lambda(lambda x: x.unsqueeze(0)), ]) silence_transform = transforms.Compose([ transforms.Lambda(lambda x: x * random.uniform(0.0, args.silence_vol)), test_trasform, ]) args.signal_samples = int(args.signal_sr * args.signal_len) args.bkg_noise_path = 'background_noise' train_transform = transforms.Compose([ # transforms.TimeShifting2(shift_min=args.shift_min, shift_max=args.shift_max), # transforms.RandomApplyTransform(p=args.p_transform, transform=transforms.AddNoise3( # os.path.join(args.data_root, args.bkg_noise_path), # args.noise_vol, args.signal_samples, args.signal_sr)), test_trasform, # torchaudio.transforms.TimeMasking(args.mask_time), # torchaudio.transforms.TimeMasking(args.mask_time), # torchaudio.transforms.FrequencyMasking(args.mask_freq), ]) return { 'train': train_transform, 'val': test_trasform, 'test': test_trasform }, silence_transform
def get_transform(train): transforms = [] transforms.append(T.ToTensor()) if train: transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms)
def get_transform(args): melkwargs = { 'n_mels': args.nfilt, 'n_fft': args.nfft, 'win_length': int(args.winlen * args.signal_sr), 'hop_length': int(args.winstep * args.signal_sr), } args.signal_samples = int(args.signal_sr * args.signal_len) args.signal_width = 1 + args.signal_samples // melkwargs['hop_length'] args.signal_samples = int(args.signal_sr * args.signal_len) args.bkg_noise_path = 'background_noise' if args.features_name.lower() == 'mfccs': features = transforms.Compose([ torchaudio.transforms.MFCC(sample_rate=args.signal_sr, n_mfcc=args.numcep, log_mels=True, melkwargs=melkwargs), transforms.Lambda(lambda x: x[1:]), ]) args.nfeature = args.numcep - 1 elif args.features_name.lower() == 'logfbes': log_offset = 1e-6 features = transforms.Compose([ torchaudio.transforms.MelSpectrogram(sample_rate=args.signal_sr, **melkwargs), transforms.Lambda(lambda t: torch.log(t + log_offset)), ]) args.nfeature = args.nfilt else: raise Exception('--features_name should be one of {LogFBEs | MFCCs}') def debug(x): print(x.size()) return x # Composing transforms test_trasform = transforms.Compose([ features, transforms.Lambda(lambda x: x.unsqueeze(0)), ]) silence_transform = transforms.Compose([ transforms.Lambda(lambda x: x * random.uniform(0.0, args.silence_vol)), test_trasform, ]) if args.no_augmentations: train_transform = test_trasform else: train_transform = transforms.Compose([ transforms.TimeShifting2(shift_min=args.shift_min, shift_max=args.shift_max), transforms.AddNoise3( os.path.join(args.data_root, args.bkg_noise_path), args.noise_vol, args.signal_samples, args.signal_sr), test_trasform, torchaudio.transforms.TimeMasking(args.mask_time), torchaudio.transforms.TimeMasking(args.mask_time), torchaudio.transforms.FrequencyMasking(args.mask_freq), ]) return {'train': train_transform, 'val': test_trasform, 'test': test_trasform}, silence_transform
handlers=[ logging.FileHandler( os.path.join( log_dir, f'{datetime.now().strftime("%Y%m%d-%H%M%S")}_{args.model}.log') ), logging.StreamHandler() ]) ####### BASE TRANSFORMS ########## base_transforms = T.Compose([ T.DataCreator( ), # convert value-dictionary to instance of MultiNodeData-class T.DataLabeler(min_occurrence=0.5), # label nodes based on tag occurrences T.NodeSelector(include_nodes=args.include_nodes ), # only use subset of nodes if required T.MetricSelector(include_metrics=args.include_metrics ), # only use subset of metrics if required T.AnomalySelector(exclude_anomalies=args.exclude_anomalies ), # filter anomalies if exclusions are specified T.NodeConnector( use_synthetic_data=use_synthetic_data) # add adj-matrix (sparse) ]) logging.info(f"Base transforms: {base_transforms}") ########## PREPARE DATA ########## logging.info("Read dataset...") data_root = args.data_root dataset = MultiNodeDataset(data_root, args.input_dimension, args.stride, use_synthetic_data=use_synthetic_data, start_time=args.start_time,