Ejemplo n.º 1
0
def train_classifier(device, args):
    encoder = FeatureExtractor()
    encoder.load_state_dict(torch.load(args.encoder_path))
    encoder.eval()
    classifier = Classifier(encoder)
    classifier.to(device)
    all_chunks = []
    all_labels = []
    for label in filesystem.listdir_complete(filesystem.train_audio_chunks_dir):
        chunks = filesystem.listdir_complete(label)
        all_chunks = all_chunks + chunks
        all_labels = all_labels + [label.split('/')[-1]] * len(chunks)
    train_chunks, eval_chunks, train_labels, eval_labels = train_test_split(all_chunks, all_labels, test_size=args.eval_size)

    # transforms and dataset
    trf = normalize
    # dataset generation
    labels_encoder = LabelsEncoder(pd.read_csv(filesystem.labels_encoding_file))
    train_dataset = DiscriminativeDataset(train_chunks, train_labels, labels_encoder, transforms=trf)
    eval_dataset = DiscriminativeDataset(eval_chunks, eval_labels, labels_encoder, transforms=trf)
    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True,
                                num_workers=4, collate_fn=None,pin_memory=True)
    eval_dataloader = DataLoader(eval_dataset, batch_size=1, shuffle=True,
                                num_workers=4, collate_fn=None,pin_memory=True)

    optimizer = optim.Adam(classifier.parameters(), lr=args.lr)
    loss_criterion = nn.CrossEntropyLoss()
    train_count = 0
    eval_count = 0
    for epoch in range(args.n_epochs):
        print('Epoch:', epoch, '/', args.n_epochs)
        train_count = train_step_classification(classifier, train_dataloader, optimizer, loss_criterion, args.verbose_epochs, device, train_count)
        torch.save(classifier.state_dict(), os.path.join(wandb.run.dir, 'model_checkpoint.pt'))
        eval_count = eval_step_classification(classifier, eval_dataloader, loss_criterion, args.verbose_epochs, device, eval_count)
Ejemplo n.º 2
0
def main(args):
    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    image_dir = "data/"
    json_path = image_dir + "annotations/captions_train2014.json"
    root_dir = image_dir + "train2014"

    dataset = CocoDataset(json_path=json_path,
                          root_dir=root_dir,
                          transform=transform)

    data_loader = get_data_loader(dataset, batch_size=32)

    # Build models
    encoder = FeatureExtractor(args.embed_size).eval(
    )  # eval mode (batchnorm uses moving mean/variance)
    decoder = CaptionGenerator(args.embed_size, args.hidden_size,
                               len(dataset.vocabulary), args.num_layers)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Load the trained model parameters
    encoder.load_state_dict(torch.load(args.encoder_path))
    decoder.load_state_dict(torch.load(args.decoder_path))

    # Prepare an image
    image = load_image(args.image, transform)
    image_tensor = image.to(device)

    # Generate an caption from the image
    feature = encoder(image_tensor)
    sampled_ids = decoder.sample(feature)
    sampled_ids = sampled_ids[0].cpu().numpy(
    )  # (1, max_seq_length) -> (max_seq_length)

    # Convert word_ids to words
    sampled_caption = []
    for word_id in sampled_ids:
        word = data_loader.dataset.id_to_word[word_id]
        sampled_caption.append(word)
        if word == '<end>':
            break
    sentence = ' '.join(sampled_caption)

    # Print out the image and the generated caption
    print(sentence)
    image = Image.open(args.image)
    plt.imshow(np.asarray(image))
Ejemplo n.º 3
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    model_path = args.model
    input_path = args.input
    sound_path = args.output
    model = FeatureExtractor()
    model.load_state_dict(torch.load(model_path))
    device = torch.device('cuda')
    cpu_device = torch.device('cpu')
    model.to(device)
    #data = normalize(torchaudio.load(input_path)[0][0].reshape(1, -1))
    data = torch.from_numpy(normalize(torch.randn(1,
                                                  132480))).float().to(device)
    data = data.reshape(1, 1, -1)
    model.eval()
    sound = model(data)
    print(functional.mse_loss(sound, data).item())
    sound = sound.to(cpu_device)
    torchaudio.save(sound_path, sound.reshape(-1), 44100)
Ejemplo n.º 4
0
# for subject_index in range(11):
#     total_user_data.extend(list_total_user_data[subject_index])
#     total_user_labels.extend(list_total_user_labels[subject_index])

total_user_data = np.array(list_total_user_data,
                           dtype=np.float32)  # <np.ndarray> (11, 180, 400, 8)
total_user_labels = np.array(list_total_user_labels,
                             dtype=np.int64)  # <np.ndarray> (11, 180)

# ----------------------------------------- Init Network -------------------------------------------------------- #

feature_extractor = FeatureExtractor().cuda()
domain_classifier = DomainClassifier().cuda()
label_predictor = LabelPredictor().cuda()

feature_extractor.load_state_dict(
    torch.load(r'saved_model\feature_extractor_CE_8_subjects.pkl'))
domain_classifier.load_state_dict(
    torch.load(r'saved_model\domain_classifier_CE_8_subjects.pkl'))
label_predictor.load_state_dict(
    torch.load(r'saved_model\label_predictor_CE_8_subjects.pkl'))

# ------------------------------------------ Testing Stage -------------------------------------------------------- #

window_size = 52
stride = 1
max_fit = 30
jump = 1
threshold = 60 / 128

feature_extractor.eval()
label_predictor.eval()
Ejemplo n.º 5
0
    # 旋轉15度內 (Augmentation),旋轉後空的地方補0
    transforms.RandomRotation(15),  #, fill=(0,)),
    # 最後轉成Tensor供model使用。
    transforms.ToTensor(),
])

source_dataset = ImageFolder('real_or_drawing/train_data',
                             transform=source_transform)
target_dataset = ImageFolder('real_or_drawing/test_data',
                             transform=target_transform)

source_dataloader = DataLoader(source_dataset, batch_size=128, shuffle=False)
test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False)

feature_extractor = FeatureExtractor().cuda()
feature_extractor.load_state_dict(torch.load('noDANN_extractor_1000.bin'))

feature_extractor.eval()

result_test = []
result_train = []

for i, (test_data, _) in enumerate(test_dataloader):
    test_data = test_data.cuda()

    feature = feature_extractor(test_data).cpu().detach().numpy()

    #x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()
    result_test.append(feature)

result_test = np.concatenate(result_test)
Ejemplo n.º 6
0
    transforms.Grayscale(),
    # 縮放: 因為source data是32x32,我們將target data的28x28放大成32x32。
    transforms.Resize((32, 32)),
    # 水平翻轉 (Augmentation)
    transforms.RandomHorizontalFlip(),
    # 旋轉15度內 (Augmentation),旋轉後空的地方補0
    transforms.RandomRotation(15),#, fill=(0,)),
    # 最後轉成Tensor供model使用。
    transforms.ToTensor(),
])

target_dataset = ImageFolder('real_or_drawing/test_data', transform=target_transform)
test_dataloader = DataLoader(target_dataset, batch_size=128, shuffle=False)

feature_extractor = FeatureExtractor().cuda()
feature_extractor.load_state_dict(torch.load('strong1_extractor_model_1000.bin'))
label_predictor = LabelPredictor().cuda()
label_predictor.load_state_dict(torch.load('strong1_predictor_model_1000.bin'))
domain_classifier = DomainClassifier().cuda()
#domain_classifier.load_state_dict(torch.load('extractor_model_300.bin'))

feature_extractor.eval()
label_predictor.eval()
label_dict = {}
for i in range(10):
    label_dict[i] = []

for i, (test_data, _) in enumerate(test_dataloader):
    test_data = test_data.cuda()
    class_logits = label_predictor(feature_extractor(test_data))
    x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()