Ejemplo n.º 1
0
def train():
	cuda_available = torch.cuda.is_available()
	train_params,model_params,dataset_params = get_arguments()
	net = wavenet_autoencoder(**model_params)
	epoch_trained = 0
	if train_params['restore_model']:
		net = load_model(net,train_params['restore_dir'],train_params['restore_model'])
		if net is None:
			print("Initialize network and train from scratch.")
			net = wavenet_autoencoder(**model_params)
		else:
			epoch_trained = train_params["restore_model"].split('.')[0]
			epoch_trained = int(epoch_trained[7:])
	dataloader = audio_data_loader(**dataset_params)

	if cuda_available is False :
		warnings.warn("Cuda is not avalable, can not train model using multi-gpu.")
	if cuda_available:
		if train_params["device_ids"]:
			batch_size = dataset_params["batch_size"]
			num_gpu = len(train_params["device_ids"])
			assert batch_size % num_gpu == 0
			net = nn.DataParallel(net,device_ids=train_params['device_ids'])
		torch.backends.cudnn.benchmark = True		
		net = net.cuda()



	optimizer = get_optimizer(net,train_params['optimizer_type'],train_params['learning_rate'],train_params['momentum'])

	loss_func = nn.CrossEntropyLoss()
	if cuda_available:
		loss_func=loss_func.cuda()
	if not os.path.exists(train_params['log_dir']) :
		os.makedirs(train_params['log_dir'])
	if not os.path.exists(train_params['restore_dir']):
		os.makedirs(train_params['restore_dir'])
	loss_log_file = open(train_params['log_dir']+'loss_log.log','a')
	store_log_file = open(train_params['log_dir']+'store_log.log','a')


	total_loss = 0
	with open(train_params['log_dir']+'loss_log.log','r') as f:
		lines = f.readlines()
		if len(lines) > 0:
			num_trained = lines[-1].split(' ')[2]
			num_trained = int(num_trained)
		else:
			num_trained = 0
	f.close()

	for epoch in range(train_params['num_epochs']):
		for i_batch,sample_batch in enumerate(dataloader):
			print(i_batch)
			optimizer.zero_grad()
			music_piece = sample_batch['audio_piece']
			target_piece = sample_batch['audio_target']
			if cuda_available:
				music_piece = music_piece.cuda(async=True)
				target_piece = target_piece.cuda(async=True)
			music_piece = Variable(music_piece)
			target_piece = Variable(target_piece.view(-1))
	#		print(music_piece.size())
	#		print(target_piece.size())
	#		print('it is ok1')
			outputs = net(music_piece)
#			print(outputs.size())
	#		print('it is ok')
			loss = loss_func(outputs,target_piece)
			total_loss += loss.data[0]
			loss.backward()
			optimizer.step()

			num_trained += 1

			if num_trained%train_params['print_every'] ==0:
				avg_loss = total_loss/train_params['print_every']
				line = 'Average loss is ' + str(avg_loss) +'\n'
				loss_log_file.writelines(line)
				loss_log_file.flush()
				total_loss =0

		if (epoch+1)%train_params['check_point_every'] ==0:
			print(epoch_trained)
			save_model(net,epoch_trained + epoch + 1,train_params['restore_dir'])
			line = 'Epoch' + str(epoch_trained+epoch+1) +'model saved!'
			store_log_file.writelines(line)
			store_log_file.flush()
	loss_log_file.close()
	store_log_file.close()
Ejemplo n.º 2
0
def train():

    cuda_available = torch.cuda.is_available()
    train_params, model_params, dataset_params = get_arguments()
    net = WavenetAutoencoder(**model_params)
    epoch_trained = 0
    if train_params['restore_model']:
        net = load_model(net, train_params['restore_dir'],
                         train_params['restore_model'])
        if net is None:
            print("Initialize network and train from scratch.")
            net = WavenetAutoencoder(**model_params)
        else:
            #epoch_trained = train_params["restore_model"].split('.')[0]
            #epoch_trained = int(epoch_trained[7:])
            epoch_trained = 0
    dataloader = audio_data_loader(**dataset_params)

    if cuda_available is False:
        warnings.warn(
            "Cuda is not avalable, can not train model using multi-gpu.")
    if cuda_available:
        # Remove train_params "device_ids" for single GPU
        if train_params["device_ids"]:
            batch_size = dataset_params["batch_size"]
            num_gpu = len(train_params["device_ids"])
            assert batch_size % num_gpu == 0
            net = nn.DataParallel(net, device_ids=train_params['device_ids'])
        torch.backends.cudnn.benchmark = True
        net = net.cuda()

    optimizer = get_optimizer(net, train_params['optimizer'],
                              train_params['learning_rate'],
                              train_params['momentum'])

    loss_func = nn.CrossEntropyLoss()
    if cuda_available:
        loss_func = loss_func.cuda()
    if not os.path.exists(train_params['log_dir']):
        os.makedirs(train_params['log_dir'])
    if not os.path.exists(train_params['restore_dir']):
        os.makedirs(train_params['restore_dir'])
    loss_log_file = open(train_params['log_dir'] + 'loss_log.log', 'a')
    store_log_file = open(train_params['log_dir'] + 'store_log.log', 'a')

    total_loss = 0
    with open(train_params['log_dir'] + 'loss_log.log', 'r') as f:
        lines = f.readlines()
        if len(lines) > 0:
            num_trained = lines[-1].split(' ')[2]
            num_trained = int(num_trained)
        else:
            num_trained = 0
    f.close()

    # Add print for start of training time
    time = str(datetime.now())
    line = 'Training Started at' + str(time) + ' !!! \n'
    loss_log_file.writelines(line)
    loss_log_file.flush()

    for epoch in range(train_params['num_epochs']):
        net.train()
        for i_batch, sample_batch in enumerate(dataloader):

            optimizer.zero_grad()
            music_piece = sample_batch['audio_piece']
            target_piece = sample_batch['audio_target']
            if cuda_available:
                music_piece = music_piece.cuda(async=True)
                target_piece = target_piece.cuda(async=True)
            print("music_piece size = ", music_piece.size())
            music_piece = Variable(music_piece)
            target_piece = Variable(target_piece.view(-1))
            outputs = net(music_piece)

            print('target size = ', target_piece.data.size())
            print('outputs size = ', outputs.data.size())

            loss = loss_func(outputs, target_piece)
            print("loss is ", loss)

            loss.backward()
            if check_grad(net.parameters(), train_params['clip_grad'],
                          train_params['ignore_grad']):
                print('Not a finite gradient or too big, ignoring.')
                optimizer.zero_grad()
                continue

            optimizer.step()
            total_loss += loss.data[0]

            print(num_trained)
            num_trained += 1

            if num_trained % train_params['print_every'] == 0:
                avg_loss = total_loss / train_params['print_every']
                line = 'Average loss is ' + str(avg_loss) + '\n'
                loss_log_file.writelines(line)
                loss_log_file.flush()
                total_loss = 0

        if (epoch + 1) % train_params['check_point_every'] == 0:
            stored_models = glob.glob(train_params['restore_dir'] + '*.model')
            if len(stored_models) == train_params['max_check_points']:

                def cmp(x, y):
                    x = os.path.splitext(x)[0]
                    x = os.path.split(x)[-1]
                    y = os.path.splitext(y)[0]
                    y = os.path.split(y)[-1]
                    x = int(x[7:])
                    y = int(y[7:])
                    return x - y

                sorted_models = sorted(stored_models, keys=cmp_to_key(cmp))
                os.remove(sorted_models[0])
            print(epoch_trained)
            save_model(net, epoch_trained + epoch + 1,
                       train_params['restore_dir'])
            line = 'Epoch' + str(epoch_trained + epoch + 1) + 'model saved!'
            store_log_file.writelines(line)
            store_log_file.flush()

    # Add print for end of training time
    time = str(datetime.now())
    line = 'Training Ended at' + str(time) + ' !!! \n'
    loss_log_file.writelines(line)
    loss_log_file.flush()
    loss_log_file.close()
    store_log_file.close()
Ejemplo n.º 3
0
def train():
    '''
    Check whether cuda is available.
    '''
    cuda_available = torch.cuda.is_available()
    if cuda_available:
        torch.backends.cudnn.benchmark = True
    '''
    Get all needed parameters.
    All parameters are stored in json file in directory './params'.
    If you want to change the settings, simply modify the json file
    in './params/'
    '''
    train_params, wavenet_params, dataset_params = get_arguments()
    '''
    Launch instances of wavenet model and dataloader.
    '''
    net = wavenet(**wavenet_params)
    epoch_trained = 0
    if train_params["restore_model"]:
        net = load_model(net, train_params["restore_dir"],
                         train_params["restore_model"])
        if net is None:
            print("Initialize network and train from scratch.")
            net = wavenet(**wavenet_params)
        else:
            epoch_trained = train_params["restore_model"].split('.')[0]
            epoch_trained = int(epoch_trained[7:])
    dataloader = audio_data_loader(**dataset_params)
    '''
    Whether use gpu to train the network.
    whether use multi-gpu to train the network.
    '''
    if cuda_available is False and train_params["device_ids"] is not None:
        raise ValueError("Cuda is not avalable,",
                         " can not train model using multi-gpu.")
    if cuda_available:
        if train_params["device_ids"]:
            batch_size = dataset_params["batch_size"]
            num_gpu = len(train_params["device_ids"])
            assert batch_size % num_gpu == 0
            net = nn.DataParallel(net, device_ids=train_params["device_ids"])
        net = net.cuda()
    '''
    Start training.
    Save the model per train_params["check_point_every"] epochs.
    Save model to train_params["restore_dir"].
    Save at most train_params["max_check_points"] models.
    If the number of models in restore_dir is over max_check_points,
    overwrite the oldest model with the newest one.
    Write logging information to train_params["log_dir"].
    Logging information includes one epoch's average loss
    '''
    print("Start training.")
    print("Writing logging information to ",
          "{}".format(train_params["log_dir"]))
    print("Models are saved in {}".format(train_params["restore_dir"]))
    '''
    Define optimizer and loss function.
    '''
    optimizer = get_optimizer(net, train_params["optimizer"],
                              train_params["learning_rate"],
                              train_params["momentum"])
    loss_func = nn.CrossEntropyLoss()
    if cuda_available:
        loss_func = loss_func.cuda()
    if not os.path.exists(train_params["log_dir"]):
        os.makedirs(train_params["log_dir"])
    if not os.path.exists(train_params["restore_dir"]):
        os.makedirs(train_params["restore_dir"])
    loss_log_file = open(train_params["log_dir"] + 'loss_log.log', 'a')
    store_log_file = open(train_params["log_dir"] + 'store_log.log', 'a')
    '''
    Train in epochs
    '''
    total_loss = 0.0
    with open(train_params["log_dir"] + 'loss_log.log', 'r') as f:
        lines = f.readlines()
        if len(lines) > 0:
            num_trained = lines[-1].split(' ')[2]
            num_trained = int(num_trained)
        else:
            num_trained = 0
    f.close()

    for epoch in range(train_params["num_epochs"]):
        for i_batch, sampled_batch in enumerate(dataloader):
            optimizer.zero_grad()
            piece = sampled_batch["audio_piece"]
            target = sampled_batch["audio_target"]
            if cuda_available:
                piece = piece.cuda(async=True)
                target = target.cuda(async=True)
            piece, target = Variable(piece), Variable(target.view(-1))
            logits = net(piece)
            loss = loss_func(logits, target)
            total_loss += loss.data[0]
            loss.backward()
            optimizer.step()
            '''
            check whether to write loss information to log file
            '''
            num_trained += 1
            if num_trained % train_params["print_every"] == 0:
                avg_loss = total_loss / train_params["print_every"]
                line = "Trained over " + str(num_trained) + " pieces,"
                line += "Average loss is " + str(avg_loss) + "\n"
                loss_log_file.writelines(line)
                loss_log_file.flush()
                total_loss = 0.0
        '''
        Store model per check_point_every epochs.
        '''
        if (epoch + 1) % train_params["check_point_every"] == 0:
            stored_models = glob.glob(train_params["restore_dir"] + "*.model")
            # First whether to delete one oldest model
            if len(stored_models) == train_params["max_check_points"]:

                def cmp(x, y):
                    x = x.split('/')[-1]
                    y = y.split('/')[-1]
                    x = x.split('.')[0]
                    y = y.split('.')[0]
                    x = int(x[7:])
                    y = int(y[7:])
                    return x - y

                stored_models = sorted(stored_models, key=cmp_to_key(cmp))
                os.remove(stored_models[0])
            # Then store the newest model
            save_model(net, epoch_trained + epoch + 1,
                       train_params["restore_dir"])
            line = "Epoch " + str(epoch_trained + epoch + 1) + \
                   ", model saved!\n"
            store_log_file.writelines(line)
            store_log_file.flush()
    loss_log_file.close()
    store_log_file.close()