if len(states) >= 1: load_state = os.path.join(opath, states[-1]) if os.path.isfile(load_state): tmp_load = torch.load(load_state) discriminator.load_state_dict(tmp_load["idis"]) generator.load_state_dict(tmp_load["igen"]) lossD = tmp_load["lossD"] lossG = tmp_load["lossG"] lossI = tmp_load["lossI"] print("successfully loaded {}".format(load_state)) starting_epoch = int(states[-1][-6:-3]) + 1 print("continueing with epoch {}".format(starting_epoch)) del tmp_load # Configure data loaders Mset = SoundfileDataset(ipath=ipath, out_type="gan") assert Mset Mloader = torch.utils.data.DataLoader(Mset, batch_size=opt.batch_size, shuffle=True, num_workers=int(opt.workers)) Iset = DatasetCust(opt.dataroot, transform=transforms.Compose([ transforms.ToPILImage(), transforms.Resize((opt.image_size, opt.image_size)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) assert Iset
from dataset import SoundfileDataset fsize = 1024 ssize = 512 num_epochs = 15 batch_s = 2 seg_s = 2 learn_r = 0.001 log_percent = 0.25 CUDA_ON = True SHUFFLE_ON = False DATA_PATH = "./all_metadata.p" y, sr = librosa.load("/home/flo/IAML/fma_small/099/099214.mp3", duration=30.0, mono=True) #id 4470 dataset = SoundfileDataset(path=DATA_PATH, seg_size=seg_s, hotvec=False, cut_data=True, verbose=False, out_type='entr') print(dataset.data[6964]) print(dataset.data[6965]) print(y.shape) def calc_entropy(song): fsize = 1024 ssize = 512 lenY = song.shape[0] lenCut = lenY-(lenY%ssize) if(lenY < fsize): print("WTF DUDE!")
_CB = "\033[36;1m" _XX = "\033[0m" # CONST: IPATH = "../melsset.ln" BATCH_SIZE = 64 N_PROC = 16 CUDA_DEVICE = 0 #NOCUDA MEL_SEG_SIZE = 512 # ~25sec LOG_COUNT = 100 print("### creating dataset ###") dset = SoundfileDataset("../all_metadata.p", IPATH, seg_size=30, out_type='pre_mel', mel_seg_size=MEL_SEG_SIZE, verbose=True) print("### splitting dataset ###") tset, vset = dset.get_split(sampler=False) print("### initializing dataloader ###") tloader = DataLoader(tset, batch_size=BATCH_SIZE, shuffle=True, num_workers=N_PROC, drop_last=True) vloader = DataLoader(vset, batch_size=BATCH_SIZE, shuffle=False, num_workers=N_PROC, drop_last=True)
n_mels = 128 n_time_steps = 1800 NORMALIZE = True batch_size = 1 num_workers = 1 n_layers = 2 datapath = "./mels_set_f{}_h{}_b{}".format(n_fft, hop_length, n_mels) modelpath = "./lstm_f{}_h{}_b{}".format(n_fft, hop_length, n_mels) modelName = "lstm_99.nn" device = "cuda" dset = SoundfileDataset("./all_metadata.p", ipath=datapath, out_type="mel", normalize=NORMALIZE, n_time_steps=n_time_steps) tset, vset = dset.get_split(sampler=False) TLoader = DataLoader(tset, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=num_workers) VLoader = DataLoader(vset, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=num_workers)
def main(): sys.stdout = open('output.txt', 'w') device = find_device() print('=> loading dataset <=') dataset = SoundfileDataset(METADATA_PATH, DATASET_PATH, seg_size=3, cut_data=True, out_type='sample') print('=> dataset loaded <=') model = Model(SEGMENTS, SAMPLES, dataset.n_classes) model = model.to(device) print(model) optimizer = optim.SGD(model.parameters(), lr=learn_r, momentum=0.9, nesterov=True) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=0, cooldown=1, verbose=True) #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.2) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.2, patience=3, cooldown=0, verbose=True) criterion = nn.CrossEntropyLoss() train_sampler, valid_sampler = dataset.get_split() train_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=N_PROC, drop_last=True) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=N_PROC, drop_last=True) print('=> begin training <=') for epoch in range(0, num_epochs): #scheduler.step() #current_lr = scheduler.get_lr() #tqdm.write(f"learning rate for next epoch: {current_lr:.10f}") print(f'training epoch {epoch}') # train running_loss = 0.0 abs_prec = 0 model.train(True) with torch.set_grad_enabled(True): for X, y in tqdm(train_loader, desc=f'training epoch {epoch}'): X = X.to(device) y = y.to(device) pred = model(X) loss = criterion(pred, y.long()) optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.data abs_prec += (pred.max(1)[1] == y).sum().item() prec = abs_prec / (len(train_loader) * BATCH_SIZE) tqdm.write(f"train precision: {prec*100:.2f}%") # validate running_loss = 0.0 abs_prec = 0 model.train(False) with torch.set_grad_enabled(False): for X, y in tqdm(validation_loader, desc=f'validation epoch {epoch}'): X = X.to(device) y = y.to(device) pred = model(X) loss = criterion(pred, y.long()) optimizer.zero_grad() running_loss += loss.data abs_prec += (pred.max(1)[1] == y).sum().item() prec = abs_prec / (len(validation_loader) * BATCH_SIZE) tqdm.write(f"validation precision: {prec*100:.2f}%") #tqdm.write(f"validation running loss: {running_loss:.4f}%") scheduler.step(running_loss) torch.save(model, f"./model_E{epoch}_P{int(prec * 1000)}.t")
print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # dataloaders Mset = None if opt.conv: Mset = SoundfileDataset(ipath=ipath, out_type='cgan', n_time_steps=n_time_steps) else: Mset = SoundfileDataset(ipath=ipath, out_type="gan") assert Mset dataset = DatasetCust(opt.dataroot, transform=transforms.Compose([ transforms.ToPILImage(), transforms.Resize( (opt.image_size, opt.image_size)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) nc = 3
log_intervall = 200 # ipath = "./mels_set_f8820_h735_b256" ipath = "./mels_set_f{}_h{}_b{}".format(n_fft, hop_length, n_mels) statepath = os.path.join( os.path.join("./out", opt.opath), "ae_n{}_b{}_{}".format(opt.n_fft, opt.n_mels, middle_size)) print('final output-path: {}'.format(statepath)) os.makedirs(statepath, exist_ok=True) # log parameters log_file = open(os.path.join(statepath, "params.txt"), "w") log_file.write(str(opt)) log_file.close() dset = SoundfileDataset(ipath=ipath, out_type="ae", normalize=True) if DEBUG: print('warning, debugging turnned on!') dset.data = dset.data[:100] tset, vset = dset.get_split(sampler=False, split_size=0.2) TLoader = DataLoader(tset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=num_workers) VLoader = DataLoader(vset, batch_size=batch_size, shuffle=False,
import pickle import matplotlib.pyplot as plt import numpy as np from tqdm import tqdm from hashlib import md5 from dataset import SoundfileDataset FILTER = False path = './all_metadata.p' data = SoundfileDataset(path=path) train, valid = data.get_indices() d = pickle.load(open(path, 'rb')) classes = set() for i, (key, val) in enumerate(d.items()): if FILTER: if val['track']['genre_top'] == "": continue classes.add(val['track']['genre_top']) idx2lbl = dict(enumerate(classes)) lbl2idx = {v: k for k, v in idx2lbl.items()} n_classes = len(classes) print(n_classes) absolute = np.zeros(n_classes) train_occ = np.zeros(n_classes) val_occ = np.zeros(n_classes) for i in train: train_occ[data.data[i].label] += 1 train_dis = (train_occ / train_occ.sum()) * 100 print(train_occ.sum())
DEBUG = False LOG = False log_intervall = 50 #datapath = "./mels_set_db" datapath = "./mels_set_f{}_h{}_b{}".format(n_fft, hop_length, n_mels) statepath = "./lstm_f{}_h{}_b{}_no_max".format(n_fft, hop_length, n_mels) #statepath = "conv_small_b128" device = "cuda" filt_genre = None #filt_genre = ['Experimental', 'Instrumental', 'International', 'Pop'] dset = SoundfileDataset("./all_metadata.p", ipath=datapath, out_type="mel", normalize=NORMALIZE, n_time_steps=n_time_steps, filter_list=filt_genre) if DEBUG: dset.data = dset.data[:2000] tset, vset = dset.get_split(sampler=False) TLoader = DataLoader(tset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=num_workers) VLoader = DataLoader(vset, batch_size=batch_size, shuffle=False,