コード例 #1
0
ファイル: train.py プロジェクト: Flinst0n/tacotron2
def save_checkpoint(model, optimizer, learning_rate, iteration, filepath):
    print("Saving model and optimizer state at iteration {} to {}".format(
          iteration, filepath))
    model_for_saving = WaveGlow(**waveglow_config).cpu()
    model_for_saving.load_state_dict(model.state_dict())
    torch.save({'model': model_for_saving,
                'iteration': iteration,
                'optimizer': optimizer.state_dict(),
                'learning_rate': learning_rate}, filepath)
コード例 #2
0
def save_checkpoint(model, optimizer, epoch, filepath):
    print(f'Saving model and optimizer state at epoch {epoch} to {filepath}')
    model_for_saving = WaveGlow(**waveglow_config).cuda()
    model_for_saving.load_state_dict(model.state_dict())
    torch.save(
        {
            'model': model_for_saving,
            'epoch': epoch,
            'optimizer': optimizer.state_dict()
        }, filepath)
コード例 #3
0
def save_checkpoint(model, optimizer, learning_rate, iteration, filepath):
    print("Saving model and optimizer state at iteration {} to {}".format(
        iteration, filepath))
    model_for_saving = WaveGlow(**waveglow_config).cuda()
    model_for_saving.load_state_dict(model.state_dict())
    torch.save(
        {
            "model": model_for_saving,
            "iteration": iteration,
            "optimizer": optimizer.state_dict(),
            "learning_rate": learning_rate,
        },
        filepath,
    )
コード例 #4
0
def save_checkpoint(model, optimizer, amp, iteration, filepath):
    print("Saving model and optimizer state at iteration {} to {}".format(
        iteration, filepath))
    model_for_saving = WaveGlow(**waveglow_config).cuda()
    model_for_saving.load_state_dict(model.state_dict())
    checkpoint = {
        'model': model_for_saving,
        'iteration': iteration,
        'optimizer': optimizer.state_dict(),
        'cuda_rng_state_all': torch.cuda.get_rng_state_all(),
        'random_rng_state': torch.random.get_rng_state()
    }

    if amp is not None:
        checkpoint['amp'] = amp.state_dict()

    torch.save(checkpoint, filepath)
コード例 #5
0
        os.mkdir("results")
    audio.save_wav(wav[0].data.cpu().numpy(),
                   os.path.join("results",
                                str(num) + ".wav"))


if __name__ == "__main__":
    # Test

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    torch.manual_seed(hp.seed)
    torch.cuda.manual_seed(hp.seed)
    model = WaveGlow().cuda()
    checkpoint = torch.load('test/TTSglow_130000')
    model.load_state_dict(checkpoint['model'].state_dict())

    dataset = FastSpeechDataset()
    testing_loader = DataLoader(dataset,
                                batch_size=1,
                                shuffle=False,
                                collate_fn=collate_fn,
                                drop_last=True,
                                num_workers=4)
    model = model.eval()

    for i, data_of_batch in enumerate(testing_loader):
        src_seq = data_of_batch["texts"]
        src_pos = data_of_batch["pos"]

        src_seq = torch.from_numpy(src_seq).long().to(device)
コード例 #6
0
def main(files, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength, args):
    #mel_files = files_to_list(mel_files)
    #print(mel_files)
    files = ['/local-scratch/fuyang/cmpt726/final_project/cremad/1091_WSI_SAD_XX.wav']
    #files = ['/local-scratch/fuyang/cmpt726/waveglow/data/LJSpeech-1.1/LJ001-0001.wav']
    with open('config.json') as f:
        data = f.read()
    config = json.loads(data)
    waveglow_config = config["waveglow_config"]
    model = WaveGlow(**waveglow_config)
    checkpoint_dict = torch.load('waveglow_256channels_universal_v5.pt', map_location='cpu')
    model_for_loading = checkpoint_dict['model']
    model.load_state_dict(model_for_loading.state_dict())
    model.cuda()
    #waveglow = torch.load(waveglow_path)['model']
    #waveglow = waveglow.remove_weightnorm(waveglow)
    #waveglow.cuda()
    waveglow = model
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O1")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    mel_extractor = Get_mel(1024, 256, 1024, args.sampling_rate, 0.0, 8000.0)

    for i, file_path in enumerate(files):
        audio, rate = load_wav_to_torch(file_path)
        if rate != sampling_rate:
            audio = resampy.resample(audio.numpy(), rate, sampling_rate)
            audio = torch.from_numpy(audio).float()
        #if audio.size(0) >= args.segment_length:
        #    max_audio_start = audio.size(0) - args.segment_length
        #    audio_start = random.randint(0, max_audio_start)
        #    audio = audio[audio_start:audio_start+args.segment_length]
        #else:
        #    audio = torch.nn.functional.pad(audio, (0, args.segment_length-audio.size(0)), 'constant').data
        mel = mel_extractor.get_mel(audio)
        audio = audio / MAX_WAV_VALUE

        mel = torch.autograd.Variable(mel.cuda().unsqueeze(0))
        audio = torch.autograd.Variable(audio.cuda().unsqueeze(0))
        audio = audio.half() if is_fp16 else audio
        mel = mel.half() if is_fp16 else mel
        outputs = waveglow((mel, audio))
        z = outputs[0][:,4:]
        print(outputs)
        mel_up = waveglow.upsample(mel)
        time_cutoff = waveglow.upsample.kernel_size[0]-waveglow.upsample.stride[0]
        mel_up = mel_up[:,:,:-time_cutoff]
        #mel_up = mel_up[:,:,:-(time_cutoff+128)]

        mel_up = mel_up.unfold(2, waveglow.n_group, waveglow.n_group).permute(0,2,1,3)
        mel_up = mel_up.contiguous().view(mel_up.size(0), mel_up.size(1), -1).permute(0, 2, 1)
        audio = z
        mel_up = mel_up[:,:,:audio.size(2)]

        sigma = 0.7
        z_i = 0
        for k in reversed(range(waveglow.n_flows)):
            n_half = int(audio.size(1)/2)
            audio_0 = audio[:,:n_half, :]
            audio_1 = audio[:, n_half:, :]

            output = waveglow.WN[k]((audio_0, mel_up))

            s = output[:,n_half:, :]
            b = output[:, :n_half, :]
            audio_1 = (audio_1-b)/torch.exp(s)
            audio = torch.cat([audio_0, audio_1],1)

            audio = waveglow.convinv[k](audio, reverse=True)

            if k % waveglow.n_early_every == 0 and k > 0:
                z = outputs[0][:, 2-z_i:4-z_i]
                #if mel_up.type() == 'torch.cuda.HalfTensor':
                #    z = torch.cuda.HalfTensor(mel_up.size(0), waveglow.n_early_size, mel_up.size(2)).normal_()
                #else:
                #    z = torch.cuda.FloatTensor(mel_up.size(0), waveglow.n_early_size, mel_up.size(2)).normal_()
                audio = torch.cat((sigma*z, audio),1)
        audio = audio.permute(0,2,1).contiguous().view(audio.size(0), -1).data
        audio = audio * MAX_WAV_VALUE
        audio = audio.squeeze()
        audio = audio.cpu().numpy()
        audio = audio.astype('int16')
        audio_path = os.path.join(
            output_dir, "{}_synthesis.wav".format('fuyangz'))
        write(audio_path, sampling_rate, audio)
        print(audio_path)
コード例 #7
0
def main(style, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength, args):
    #mel_files = files_to_list(mel_files)
    #print(mel_files)
    dataset = voice_dataset(dataBase={
        'ravdess': './our_data/ravdess',
        'cremad': './our_data/cremad'
    },
                            style=('happy', 'sad', 'angry'))
    #print(len(dataset.final_data['happy']))

    #sample = dataset.pick_one_random_sample('happy')
    files = dataset.final_data[style]
    #files = ['/local-scratch/fuyang/cmpt726/waveglow/data/LJSpeech-1.1/LJ001-0001.wav']
    with open('config.json') as f:
        data = f.read()
    config = json.loads(data)
    waveglow_config = config["waveglow_config"]
    model = WaveGlow(**waveglow_config)
    checkpoint_dict = torch.load('waveglow_256channels_universal_v5.pt',
                                 map_location='cpu')
    model_for_loading = checkpoint_dict['model']
    model.load_state_dict(model_for_loading.state_dict())
    model.cuda()
    waveglow = model
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O1")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    mel_extractor = Get_mel(1024, 256, 1024, args.sampling_rate, 0.0, 8000.0)
    avg_z = np.zeros(8)
    _count = 0
    for i, (_, file_path) in enumerate(files):
        if i > 50:
            break
        try:
            audio, rate = load_wav_to_torch(file_path)
            if rate != sampling_rate:
                audio = resampy.resample(audio.numpy(), rate, sampling_rate)
                audio = torch.from_numpy(audio).float()
            #if audio.size(0) >= args.segment_length:
            #    max_audio_start = audio.size(0) - args.segment_length
            #    audio_start = random.randint(0, max_audio_start)
            #    audio = audio[audio_start:audio_start+args.segment_length]
            #else:
            #    audio = torch.nn.functional.pad(audio, (0, args.segment_length-audio.size(0)), 'constant').data
            mel = mel_extractor.get_mel(audio)
            audio = audio / MAX_WAV_VALUE

            mel = torch.autograd.Variable(mel.cuda().unsqueeze(0))
            audio = torch.autograd.Variable(audio.cuda().unsqueeze(0))
            audio = audio.half() if is_fp16 else audio
            mel = mel.half() if is_fp16 else mel
            outputs = waveglow((mel, audio))
            avg_z += outputs[0].squeeze(0).mean(1).detach().cpu().numpy()
            _count += 1
            z = outputs[0][:, 4:]

            #print(outputs)
            mel_up = waveglow.upsample(mel)
            time_cutoff = waveglow.upsample.kernel_size[
                0] - waveglow.upsample.stride[0]
            mel_up = mel_up[:, :, :-time_cutoff]
            #mel_up = mel_up[:,:,:-(time_cutoff+128)]

            mel_up = mel_up.unfold(2, waveglow.n_group,
                                   waveglow.n_group).permute(0, 2, 1, 3)
            mel_up = mel_up.contiguous().view(mel_up.size(0), mel_up.size(1),
                                              -1).permute(0, 2, 1)
            audio = z
            mel_up = mel_up[:, :, :audio.size(2)]

            sigma = 0.7
            z_i = 0
            for k in reversed(range(waveglow.n_flows)):
                n_half = int(audio.size(1) / 2)
                audio_0 = audio[:, :n_half, :]
                audio_1 = audio[:, n_half:, :]

                output = waveglow.WN[k]((audio_0, mel_up))

                s = output[:, n_half:, :]
                b = output[:, :n_half, :]
                audio_1 = (audio_1 - b) / torch.exp(s)
                audio = torch.cat([audio_0, audio_1], 1)

                audio = waveglow.convinv[k](audio, reverse=True)

                if k % waveglow.n_early_every == 0 and k > 0:
                    z = outputs[0][:, 2 - z_i:4 - z_i]
                    #if mel_up.type() == 'torch.cuda.HalfTensor':
                    #    z = torch.cuda.HalfTensor(mel_up.size(0), waveglow.n_early_size, mel_up.size(2)).normal_()
                    #else:
                    #    z = torch.cuda.FloatTensor(mel_up.size(0), waveglow.n_early_size, mel_up.size(2)).normal_()
                    audio = torch.cat((sigma * z, audio), 1)
            audio = audio.permute(0, 2,
                                  1).contiguous().view(audio.size(0), -1).data
            audio = audio * MAX_WAV_VALUE
            audio = audio.squeeze()
            audio = audio.cpu().numpy()
            audio = audio.astype('int16')
            audio_path = os.path.join(
                output_dir, "{}_synthesis.wav".format(file_path[:-4]))
            if os.path.exists(
                    os.path.join(*audio_path.split('/')[:-1])) is False:
                os.makedirs(os.path.join(*audio_path.split('/')[:-1]),
                            exist_ok=True)
            write(audio_path, sampling_rate, audio)
            print(audio_path)
        except:
            continue

    avg_z = avg_z / _count
    np.save(style, avg_z)
コード例 #8
0
ファイル: tts.py プロジェクト: malarinv/tacotron2
class TTSModel(object):
    """docstring for TTSModel."""
    def __init__(self, tacotron2_path, waveglow_path, **kwargs):
        super(TTSModel, self).__init__()
        hparams = HParams(**kwargs)
        self.hparams = hparams
        self.model = Tacotron2(hparams)
        if torch.cuda.is_available():
            self.model.load_state_dict(
                torch.load(tacotron2_path)["state_dict"])
            self.model.cuda().eval()
        else:
            self.model.load_state_dict(
                torch.load(tacotron2_path, map_location="cpu")["state_dict"])
            self.model.eval()
        self.k_cache = klepto.archives.file_archive(cached=False)
        if waveglow_path:
            if torch.cuda.is_available():
                wave_params = torch.load(waveglow_path)
            else:
                wave_params = torch.load(waveglow_path, map_location="cpu")
            try:
                self.waveglow = WaveGlow(**WAVEGLOW_CONFIG)
                self.waveglow.load_state_dict(wave_params)
            except:
                self.waveglow = wave_params["model"]
                self.waveglow = self.waveglow.remove_weightnorm(self.waveglow)
            if torch.cuda.is_available():
                self.waveglow.cuda().eval()
            else:
                self.waveglow.eval()
            # workaround from
            # https://github.com/NVIDIA/waveglow/issues/127
            for m in self.waveglow.modules():
                if "Conv" in str(type(m)):
                    setattr(m, "padding_mode", "zeros")
            for k in self.waveglow.convinv:
                k.float().half()
            self.denoiser = Denoiser(self.waveglow,
                                     n_mel_channels=hparams.n_mel_channels)
            self.synth_speech = klepto.safe.inf_cache(cache=self.k_cache)(
                self._synth_speech)
        else:
            self.synth_speech = klepto.safe.inf_cache(cache=self.k_cache)(
                self._synth_speech_fast)
        self.taco_stft = TacotronSTFT(
            hparams.filter_length,
            hparams.hop_length,
            hparams.win_length,
            n_mel_channels=hparams.n_mel_channels,
            sampling_rate=hparams.sampling_rate,
            mel_fmax=4000,
        )

    def _generate_mel_postnet(self, text):
        sequence = np.array(text_to_sequence(text,
                                             ["english_cleaners"]))[None, :]
        if torch.cuda.is_available():
            sequence = torch.autograd.Variable(
                torch.from_numpy(sequence)).cuda().long()
        else:
            sequence = torch.autograd.Variable(
                torch.from_numpy(sequence)).long()
        with torch.no_grad():
            mel_outputs, mel_outputs_postnet, _, alignments = self.model.inference(
                sequence)
        return mel_outputs_postnet

    def synth_speech_array(self, text, vocoder):
        mel_outputs_postnet = self._generate_mel_postnet(text)

        if vocoder == VOCODER_WAVEGLOW:
            with torch.no_grad():
                audio_t = self.waveglow.infer(mel_outputs_postnet, sigma=0.666)
                audio_t = self.denoiser(audio_t, 0.1)[0]
            audio = audio_t[0].data
        elif vocoder == VOCODER_GL:
            mel_decompress = self.taco_stft.spectral_de_normalize(
                mel_outputs_postnet)
            mel_decompress = mel_decompress.transpose(1, 2).data.cpu()
            spec_from_mel_scaling = 1000
            spec_from_mel = torch.mm(mel_decompress[0],
                                     self.taco_stft.mel_basis)
            spec_from_mel = spec_from_mel.transpose(0, 1).unsqueeze(0)
            spec_from_mel = spec_from_mel * spec_from_mel_scaling
            spec_from_mel = (spec_from_mel.cuda()
                             if torch.cuda.is_available() else spec_from_mel)
            audio = griffin_lim(
                torch.autograd.Variable(spec_from_mel[:, :, :-1]),
                self.taco_stft.stft_fn,
                GL_ITERS,
            )
            audio = audio.squeeze()
        else:
            raise ValueError("vocoder arg should be one of [wavglow|gl]")
        audio = audio.cpu().numpy()
        return audio

    def _synth_speech(self,
                      text,
                      speed: float = 1.0,
                      sample_rate: int = OUTPUT_SAMPLE_RATE):
        audio = self.synth_speech_array(text, VOCODER_WAVEGLOW)

        return postprocess_audio(
            audio,
            src_rate=self.hparams.sampling_rate,
            dst_rate=sample_rate,
            tempo=speed,
        )

    def _synth_speech_fast(self,
                           text,
                           speed: float = 1.0,
                           sample_rate: int = OUTPUT_SAMPLE_RATE):
        audio = self.synth_speech_array(text, VOCODER_GL)

        return postprocess_audio(
            audio,
            tempo=speed,
            src_rate=self.hparams.sampling_rate,
            dst_rate=sample_rate,
        )
コード例 #9
0
def train(num_gpus, rank, group_name, output_directory, epochs, learning_rate,
          sigma, iters_per_checkpoint, batch_size, seed, fp16_run,
          checkpoint_path, with_tensorboard):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        init_distributed(rank, num_gpus, group_name, **dist_config)
    #=====END:   ADDED FOR DISTRIBUTED======

    criterion = WaveGlowLoss(sigma)
    model = WaveGlow(**waveglow_config).cuda()

    #=====START: ADDED FOR DISTRIBUTED======
    if num_gpus > 1:
        model = apply_gradient_allreduce(model)
    #=====END:   ADDED FOR DISTRIBUTED======

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if fp16_run:
        from apex import amp
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    # Load checkpoint if one exists
    iteration = 0
    if checkpoint_path == "waveglow_256channels_.pt":
        checkpoint_dict = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint_dict['model'].state_dict())
        iteration += 1  # next iteration is iteration + 1
    elif checkpoint_path != "":
        model, optimizer, iteration = load_checkpoint(checkpoint_path, model,
                                                      optimizer)
        iteration += 1  # next iteration is iteration + 1

    trainset = Mel2Samp(**data_config)
    # =====START: ADDED FOR DISTRIBUTED======
    train_sampler = DistributedSampler(trainset) if num_gpus > 1 else None
    # =====END:   ADDED FOR DISTRIBUTED======
    train_loader = DataLoader(trainset, num_workers=1, shuffle=False,
                              sampler=train_sampler,
                              batch_size=batch_size,
                              pin_memory=False,
                              drop_last=True)

    # Get shared output_directory ready
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        print("output directory", output_directory)

    if with_tensorboard and rank == 0:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter(os.path.join(output_directory, 'logs'))

    model.train()
    epoch_offset = max(0, int(iteration / len(train_loader)))
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in range(epoch_offset, epochs):
        print("Epoch: {}".format(epoch))
        for i, batch in enumerate(train_loader):
            model.zero_grad()

            mel, audio = batch
            mel = torch.autograd.Variable(mel.cuda())
            audio = torch.autograd.Variable(audio.cuda())
            outputs = model((mel, audio))

            loss = criterion(outputs)
            if num_gpus > 1:
                reduced_loss = reduce_tensor(loss.data, num_gpus).item()
            else:
                reduced_loss = loss.item()

            if fp16_run:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            optimizer.step()

            print("{}:\t{:.9f}".format(iteration, reduced_loss))
            if with_tensorboard and rank == 0:
                logger.add_scalar('training_loss', reduced_loss, i + len(train_loader) * epoch)

            if (iteration % iters_per_checkpoint == 0):
                if rank == 0:
                    checkpoint_path = "{}/waveglow_{}".format(
                        output_directory, iteration)
                    save_checkpoint(model, optimizer, learning_rate, iteration,
                                    checkpoint_path)

            iteration += 1
コード例 #10
0
def main(style, waveglow_path, sigma, output_dir, sampling_rate, is_fp16,
         denoiser_strength, args):
    #mel_files = files_to_list(mel_files)
    #print(mel_files)
    dataset = voice_dataset(dataBase={
        'ravdess': './our_data/ravdess',
        'cremad': './our_data/cremad'
    },
                            style=('happy', 'sad', 'angry'))
    #print(len(dataset.final_data['happy']))

    #sample = dataset.pick_one_random_sample('happy')
    styles = ['happy', 'sad', 'angry']
    with open('config.json') as f:
        data = f.read()
    config = json.loads(data)
    waveglow_config = config["waveglow_config"]
    model = WaveGlow(**waveglow_config)
    checkpoint_dict = torch.load('waveglow_256channels_universal_v5.pt',
                                 map_location='cpu')
    model_for_loading = checkpoint_dict['model']
    model.load_state_dict(model_for_loading.state_dict())
    model.cuda()
    waveglow = model
    if is_fp16:
        from apex import amp
        waveglow, _ = amp.initialize(waveglow, [], opt_level="O1")

    if denoiser_strength > 0:
        denoiser = Denoiser(waveglow).cuda()

    mel_extractor = Get_mel(1024, 256, 1024, args.sampling_rate, 0.0, 8000.0)

    vector_all = {}
    for style in styles:
        files = dataset.final_data[style].copy()
        random.shuffle(files)

        vectors = []
        for i, (_, file_path) in enumerate(files):
            if i > 200:
                break
            try:
                audio, rate = load_wav_to_torch(file_path)
                if rate != sampling_rate:
                    audio = resampy.resample(audio.numpy(), rate,
                                             sampling_rate)
                    audio = torch.from_numpy(audio).float()
                #if audio.size(0) >= args.segment_length:
                #    max_audio_start = audio.size(0) - args.segment_length
                #    audio_start = random.randint(0, max_audio_start)
                #    audio = audio[audio_start:audio_start+args.segment_length]
                #else:
                #    audio = torch.nn.functional.pad(audio, (0, args.segment_length-audio.size(0)), 'constant').data
                mel = mel_extractor.get_mel(audio)
                audio = audio / MAX_WAV_VALUE

                mel = torch.autograd.Variable(mel.cuda().unsqueeze(0))
                audio = torch.autograd.Variable(audio.cuda().unsqueeze(0))
                audio = audio.half() if is_fp16 else audio
                mel = mel.half() if is_fp16 else mel
                outputs = waveglow((mel, audio))
                vectors.append(
                    outputs[0].squeeze(0).mean(1).detach().cpu().numpy())
                print(style, i)
            except:
                continue

        vector_all[style] = vectors

    np.save('all_style_vector', vector_all)
コード例 #11
0
ファイル: train.py プロジェクト: Mr-DDDAlKilanny/waveglow
def save_checkpoint(model, optimizer, learning_rate, iteration, filepath,
                    drive_fid):
    print("Saving model and optimizer state at iteration {} to {}".format(
        iteration, filepath))
    model_for_saving = WaveGlow(**waveglow_config).cuda()
    model_for_saving.load_state_dict(model.state_dict())
    torch.save(
        {
            'model': model_for_saving,
            'iteration': iteration,
            'optimizer': optimizer.state_dict(),
            'learning_rate': learning_rate
        }, filepath)
    uploaded = False
    attempt = 0
    file_title = filepath[filepath.find("/") + 1:]
    while not uploaded and attempt < 10:
        attempt += 1
        try:
            if gauth.credentials is None:
                # Authenticate if they're not there
                gauth.LocalWebserverAuth()
            elif gauth.access_token_expired:
                # Refresh them if expired
                print("Google Drive Token Expired, Refreshing")
                gauth.Refresh()
            else:
                # Initialize the saved creds
                gauth.Authorize()
            # Save the current credentials to a file
            # gauth.SaveCredentialsFile("GoogleDriveCredentials.txt")
            f = drive.CreateFile({
                'title':
                file_title,
                "parents": [{
                    "kind": "drive#fileLink",
                    "id": drive_fid
                }]
            })
            f.SetContentFile(filepath)
            f.Upload()
            uploaded = True
            break
        except:
            print("Failed uploading to drive at attempt #{}".format(attempt))
            sleep(30)
        if uploaded:
            try:
                ok = False
                for file in drive.ListFile({
                        'q':
                        "'" + drive_fid + "' in parents"
                }).GetList():
                    if file['title'] == file_title:
                        if file["fileSize"] > 4000000:
                            ok = True
                            print("File was successfully uploaded")
                        else:
                            file.Delete()
                            uploaded = False
                            print("File was not uploaded normally. Deleting")
                            sleep(30)
                        break
                if ok:
                    for file in drive.ListFile({
                            'q':
                            "'" + drive_fid + "' in parents"
                    }).GetList():
                        if file['title'] != file_title:
                            file.Delete()
                            sleep(
                                30
                            )  #make sure the file is deleted from drive first
            except:
                pass