def train_classifier(device, args): encoder = FeatureExtractor() encoder.load_state_dict(torch.load(args.encoder_path)) encoder.eval() classifier = Classifier(encoder) classifier.to(device) all_chunks = [] all_labels = [] for label in filesystem.listdir_complete(filesystem.train_audio_chunks_dir): chunks = filesystem.listdir_complete(label) all_chunks = all_chunks + chunks all_labels = all_labels + [label.split('/')[-1]] * len(chunks) train_chunks, eval_chunks, train_labels, eval_labels = train_test_split(all_chunks, all_labels, test_size=args.eval_size) # transforms and dataset trf = normalize # dataset generation labels_encoder = LabelsEncoder(pd.read_csv(filesystem.labels_encoding_file)) train_dataset = DiscriminativeDataset(train_chunks, train_labels, labels_encoder, transforms=trf) eval_dataset = DiscriminativeDataset(eval_chunks, eval_labels, labels_encoder, transforms=trf) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, collate_fn=None,pin_memory=True) eval_dataloader = DataLoader(eval_dataset, batch_size=1, shuffle=True, num_workers=4, collate_fn=None,pin_memory=True) optimizer = optim.Adam(classifier.parameters(), lr=args.lr) loss_criterion = nn.CrossEntropyLoss() train_count = 0 eval_count = 0 for epoch in range(args.n_epochs): print('Epoch:', epoch, '/', args.n_epochs) train_count = train_step_classification(classifier, train_dataloader, optimizer, loss_criterion, args.verbose_epochs, device, train_count) torch.save(classifier.state_dict(), os.path.join(wandb.run.dir, 'model_checkpoint.pt')) eval_count = eval_step_classification(classifier, eval_dataloader, loss_criterion, args.verbose_epochs, device, eval_count)
def main(args): # Image preprocessing transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) image_dir = "data/" json_path = image_dir + "annotations/captions_train2014.json" root_dir = image_dir + "train2014" dataset = CocoDataset(json_path=json_path, root_dir=root_dir, transform=transform) data_loader = get_data_loader(dataset, batch_size=32) # Build models encoder = FeatureExtractor(args.embed_size).eval( ) # eval mode (batchnorm uses moving mean/variance) decoder = CaptionGenerator(args.embed_size, args.hidden_size, len(dataset.vocabulary), args.num_layers) encoder = encoder.to(device) decoder = decoder.to(device) # Load the trained model parameters encoder.load_state_dict(torch.load(args.encoder_path)) decoder.load_state_dict(torch.load(args.decoder_path)) # Prepare an image image = load_image(args.image, transform) image_tensor = image.to(device) # Generate an caption from the image feature = encoder(image_tensor) sampled_ids = decoder.sample(feature) sampled_ids = sampled_ids[0].cpu().numpy( ) # (1, max_seq_length) -> (max_seq_length) # Convert word_ids to words sampled_caption = [] for word_id in sampled_ids: word = data_loader.dataset.id_to_word[word_id] sampled_caption.append(word) if word == '<end>': break sentence = ' '.join(sampled_caption) # Print out the image and the generated caption print(sentence) image = Image.open(args.image) plt.imshow(np.asarray(image))
def main(): parser = get_parser() args = parser.parse_args() model_path = args.model input_path = args.input sound_path = args.output model = FeatureExtractor() model.load_state_dict(torch.load(model_path)) device = torch.device('cuda') cpu_device = torch.device('cpu') model.to(device) #data = normalize(torchaudio.load(input_path)[0][0].reshape(1, -1)) data = torch.from_numpy(normalize(torch.randn(1, 132480))).float().to(device) data = data.reshape(1, 1, -1) model.eval() sound = model(data) print(functional.mse_loss(sound, data).item()) sound = sound.to(cpu_device) torchaudio.save(sound_path, sound.reshape(-1), 44100)
# for subject_index in range(11): # total_user_data.extend(list_total_user_data[subject_index]) # total_user_labels.extend(list_total_user_labels[subject_index]) total_user_data = np.array(list_total_user_data, dtype=np.float32) # <np.ndarray> (11, 180, 400, 8) total_user_labels = np.array(list_total_user_labels, dtype=np.int64) # <np.ndarray> (11, 180) # ----------------------------------------- Init Network -------------------------------------------------------- # feature_extractor = FeatureExtractor().cuda() domain_classifier = DomainClassifier().cuda() label_predictor = LabelPredictor().cuda() feature_extractor.load_state_dict( torch.load(r'saved_model\feature_extractor_CE_8_subjects.pkl')) domain_classifier.load_state_dict( torch.load(r'saved_model\domain_classifier_CE_8_subjects.pkl')) label_predictor.load_state_dict( torch.load(r'saved_model\label_predictor_CE_8_subjects.pkl')) # ------------------------------------------ Testing Stage -------------------------------------------------------- # window_size = 52 stride = 1 max_fit = 30 jump = 1 threshold = 60 / 128 feature_extractor.eval() label_predictor.eval()
# 旋轉15度內 (Augmentation),旋轉後空的地方補0 transforms.RandomRotation(15), #, fill=(0,)), # 最後轉成Tensor供model使用。 transforms.ToTensor(), ]) source_dataset = ImageFolder('real_or_drawing/train_data', transform=source_transform) target_dataset = ImageFolder('real_or_drawing/test_data', transform=target_transform) source_dataloader = DataLoader(source_dataset, batch_size=128, shuffle=False) test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False) feature_extractor = FeatureExtractor().cuda() feature_extractor.load_state_dict(torch.load('noDANN_extractor_1000.bin')) feature_extractor.eval() result_test = [] result_train = [] for i, (test_data, _) in enumerate(test_dataloader): test_data = test_data.cuda() feature = feature_extractor(test_data).cpu().detach().numpy() #x = torch.argmax(class_logits, dim=1).cpu().detach().numpy() result_test.append(feature) result_test = np.concatenate(result_test)
transforms.Grayscale(), # 縮放: 因為source data是32x32,我們將target data的28x28放大成32x32。 transforms.Resize((32, 32)), # 水平翻轉 (Augmentation) transforms.RandomHorizontalFlip(), # 旋轉15度內 (Augmentation),旋轉後空的地方補0 transforms.RandomRotation(15),#, fill=(0,)), # 最後轉成Tensor供model使用。 transforms.ToTensor(), ]) target_dataset = ImageFolder('real_or_drawing/test_data', transform=target_transform) test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False) feature_extractor = FeatureExtractor().cuda() feature_extractor.load_state_dict(torch.load('strong1_extractor_model_1000.bin')) label_predictor = LabelPredictor().cuda() label_predictor.load_state_dict(torch.load('strong1_predictor_model_1000.bin')) domain_classifier = DomainClassifier().cuda() #domain_classifier.load_state_dict(torch.load('extractor_model_300.bin')) feature_extractor.eval() label_predictor.eval() label_dict = {} for i in range(10): label_dict[i] = [] for i, (test_data, _) in enumerate(test_dataloader): test_data = test_data.cuda() class_logits = label_predictor(feature_extractor(test_data)) x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()