Example #1
0
def get_noise(height, width, scale, num_scales, args):
    if width > 256:  # larger sizes don't fit in VRAM, just use default or randomize
        return None

    # expand onsets to noise shape
    # send to GPU as gaussian_filter on large noise tensors with high standard deviation is slow
    lo_onsets = args.lo_onsets[:, None, None, None].cuda()
    hi_onsets = args.hi_onsets[:, None, None, None].cuda()

    # 1s inside circle of radius, 0s outside
    mask = circular_mask(height, width, radius=int(width / 2),
                         soft=2)[None, None, ...].float().cuda()

    # create noise which changes quickly (small standard deviation smoothing)
    noise_noisy = ar.gaussian_filter(
        th.randn((args.n_frames, 1, height, width), device="cuda"), 5)

    # create noise which changes slowly (large standard deviation smoothing)
    noise = ar.gaussian_filter(
        th.randn((args.n_frames, 1, height, width), device="cuda"), 128)

    # for lower layers, noise inside circle are affected by low onsets
    if width < 128:
        noise = 2 * mask * lo_onsets * noise_noisy + (1 - mask) * (
            1 - lo_onsets) * noise
    # for upper layers, noise outside circle are affected by high onsets
    if width > 32:
        noise = 0.75 * (1 - mask) * hi_onsets * noise_noisy + mask * (
            1 - 0.75 * hi_onsets) * noise

    # ensure amplitude of noise is close to standard normal distribution (dividing by std. dev. gets it exactly there)
    noise /= noise.std() * 2

    return noise.cpu()
Example #2
0
def get_latents(selection, args):
    # expand envelopes to latent shape
    rms = args.rms[:, None, None]
    low_onsets = args.kick_onsets[:, None, None]
    high_onsets = args.snare_onsets[:, None, None]

    # get timestamps and labels with laplacian segmentation
    # k is the number of labels the algorithm may use
    # try multiple values with plot=True to see which value correlates best with the sections of the song
    timestamps, labels = ar.laplacian_segmentation(args.audio, args.sr, k=7)

    # a second set of latents for the drop section, the 'selection' variable is the other set for the intro
    drop_selection = ar.load_latents("workspace/cyphept_kelp_drop_latents.npy")
    color_layer = 9

    latents = []
    for (start, stop), l in zip(zip(timestamps, timestamps[1:]), labels):
        start_frame = int(round(start / args.duration * args.n_frames))
        stop_frame = int(round(stop / args.duration * args.n_frames))
        section_frames = stop_frame - start_frame
        section_bars = (stop - start) * (BPM / 60) / 4

        # get portion of latent selection (wrapping around to start)
        latent_selection_slice = ar.wrapping_slice(selection, l, 4)
        # spline interpolation loops through selection slice
        latent_section = ar.spline_loops(latent_selection_slice,
                                         n_frames=section_frames,
                                         n_loops=section_bars / 4)
        # set the color with laplacian segmentation label, (1 latent repeated for entire section in upper layers)
        latent_section[:, color_layer:] = th.cat(
            [selection[[l], color_layer:]] * section_frames)

        # same as above but for the drop latents (with faster loops)
        drop_selection_slice = ar.wrapping_slice(drop_selection, l, 4)
        drop_section = ar.spline_loops(drop_selection_slice,
                                       n_frames=section_frames,
                                       n_loops=section_bars / 2)
        drop_section[:, color_layer:] = th.cat(
            [drop_selection[[l], color_layer:]] * section_frames)

        # merged based on RMS (drop section or not)
        latents.append((1 - rms[start_frame:stop_frame]) * latent_section +
                       rms[start_frame:stop_frame] * drop_section)

    # concatenate latents to correct length & smooth over the junctions
    len_latents = sum([len(l) for l in latents])
    if len_latents != args.n_frames:
        latents.append(
            th.cat([latents[-1][[-1]]] * (args.n_frames - len_latents)))
    latents = th.cat(latents).float()
    latents = ar.gaussian_filter(latents, 3)

    # use onsets to modulate towards latents
    latents = 0.666 * low_onsets * selection[[2]] + (
        1 - 0.666 * low_onsets) * latents
    latents = 0.666 * high_onsets * selection[[1]] + (
        1 - 0.666 * high_onsets) * latents

    latents = ar.gaussian_filter(latents, 1, causal=0.2)
    return latents
Example #3
0
def get_latents(selection, args):
    chroma = ar.chroma(args.audio, args.sr, args.n_frames)
    chroma_latents = ar.chroma_weight_latents(chroma, selection)
    latents = ar.gaussian_filter(chroma_latents, 4)

    lo_onsets = args.lo_onsets[:, None, None]
    hi_onsets = args.hi_onsets[:, None, None]

    latents = hi_onsets * selection[[-4]] + (1 - hi_onsets) * latents
    latents = lo_onsets * selection[[-7]] + (1 - lo_onsets) * latents

    latents = ar.gaussian_filter(latents, 2, causal=0.2)

    return latents
Example #4
0
def initialize(args):
    # RMS can be used to distinguish between the drop sections and intro/outros
    rms = ar.rms(args.audio,
                 args.sr,
                 args.n_frames,
                 smooth=10,
                 clip=60,
                 power=1)
    rms = ar.expand(rms, threshold=0.8, ratio=10)
    rms = ar.gaussian_filter(rms, 4)
    rms = ar.normalize(rms)
    args.rms = rms

    # cheating a little here, this my song so I have the multitracks
    # this is much easier than fiddling with onsets until you have envelopes that dance nicely to the drums
    audio, sr = rosa.load("workspace/kelpkick.wav",
                          offset=args.offset,
                          duration=args.duration)
    args.kick_onsets = ar.onsets(audio, sr, args.n_frames, margin=1, smooth=4)
    audio, sr = rosa.load("workspace/kelpsnare.wav",
                          offset=args.offset,
                          duration=args.duration)
    args.snare_onsets = ar.onsets(audio, sr, args.n_frames, margin=1, smooth=4)

    ar.plot_signals([args.rms, args.kick_onsets, args.snare_onsets])

    return args
Example #5
0
def get_noise(height, width, scale, num_scales, args):
    if width > 256:
        return None

    lo_onsets = 1.25 * args.low_onsets[:, None, None, None].cuda()
    hi_onsets = 1.25 * args.high_onsets[:, None, None, None].cuda()

    noise_noisy = ar.gaussian_filter(th.randn((args.n_frames, 1, height, width), device="cuda"), 5)

    noise = ar.gaussian_filter(th.randn((args.n_frames, 1, height, width), device="cuda"), 128)
    if width > 8:
        noise = lo_onsets * noise_noisy + (1 - lo_onsets) * noise
        noise = hi_onsets * noise_noisy + (1 - hi_onsets) * noise

    noise /= noise.std() * 2.5

    return noise.cpu()
Example #6
0
def get_latents(selection, args):
    # create chromagram weighted sequence
    chroma = ar.chroma(args.audio, args.sr, args.n_frames)
    chroma_latents = ar.chroma_weight_latents(chroma, selection)
    latents = ar.gaussian_filter(chroma_latents, 4)

    # expand onsets to latent shape
    lo_onsets = args.lo_onsets[:, None, None]
    hi_onsets = args.hi_onsets[:, None, None]

    # modulate latents to specific latent vectors
    latents = hi_onsets * selection[[-4]] + (1 - hi_onsets) * latents
    latents = lo_onsets * selection[[-7]] + (1 - lo_onsets) * latents

    latents = ar.gaussian_filter(latents, 2, causal=0.2)

    return latents
Example #7
0
def get_latents(selection, args):
    chroma = ar.chroma(args.audio, args.sr, args.n_frames)
    chroma_latents = ar.chroma_weight_latents(chroma, selection[:12])  # shape [n_frames, 18, 512]
    latents = ar.gaussian_filter(chroma_latents, 5)

    lo_onsets = args.low_onsets[:, None, None]  # expand to same shape as latents [n_frames, 1, 1]
    hi_onsets = args.high_onsets[:, None, None]

    latents = hi_onsets * selection[[-4]] + (1 - hi_onsets) * latents
    latents = lo_onsets * selection[[-7]] + (1 - lo_onsets) * latents

    latents = ar.gaussian_filter(latents, 5, causal=0)

    # cheating a little, you could probably do this with laplacian segmentation, but is it worth the effort?
    drop_start = int(5591 * (45 / args.duration))
    drop_end = int(5591 * (135 / args.duration))

    # selection of latents with different colors (chosen with select_latents.py)
    color_latent_selection = th.from_numpy(np.load("workspace/cyphept-multicolor-latents.npy"))

    # build sequence of latents for just the upper layers
    color_layer = 9
    color_latents = [latents[:drop_start, color_layer:]]

    # for 4 different sections in the drop, use a different color latent
    drop_length = drop_end - drop_start
    section_length = int(drop_length / 4)
    for i, section_start in enumerate(range(0, drop_length, section_length)):
        if i > 3:
            break
        color_latents.append(th.cat([color_latent_selection[[i], color_layer:]] * section_length))

    # ensure color sequence is correct length and concatenate
    if drop_length - 4 * section_length != 0:
        color_latents.append(th.cat([color_latent_selection[[i], color_layer:]] * (drop_length - 4 * section_length)))
    color_latents.append(latents[drop_end:, color_layer:])
    color_latents = th.cat(color_latents, axis=0)

    color_latents = ar.gaussian_filter(color_latents, 5)

    # set upper layers of latent sequence to the colored sequence
    latents[:, color_layer:] = color_latents

    return latents
Example #8
0
def get_bends(args):
    # repeat the intermediate features outwards on both sides (2:1 aspect ratio)
    # + add some noise to give the whole thing a little variation (disguises the repetition)
    transform = th.nn.Sequential(
        th.nn.ReplicationPad2d((2, 2, 0, 0)), ar.AddNoise(0.025 * th.randn(size=(1, 1, 4, 8), device="cuda")),
    )
    bends = [{"layer": 0, "transform": transform}]

    # during the drop, create scrolling effect
    drop_start = int(5591 * (45 / args.duration))
    drop_end = int(5591 * (135 / args.duration))

    # calculate length of loops, number of loops, and remainder at end of drop
    scroll_loop_length = int(6 * args.fps)
    scroll_loop_num = int((drop_end - drop_start) / scroll_loop_length)
    scroll_trunc = (drop_end - drop_start) - scroll_loop_num * scroll_loop_length

    # apply network bending to 4th layer in StyleGAN
    # lower layer network bends have more fluid outcomes
    tl = 4
    h = 2 ** tl
    w = 2 * h

    # create values between 0 and 1 corresponding to fraction of scroll from left to right completed
    # all 0s during intro
    intro_tl8 = np.zeros(drop_start)
    # repeating linear interpolation from 0 to 1 during drop
    loops_tl8 = np.concatenate([np.linspace(0, w, scroll_loop_length)] * scroll_loop_num)
    # truncated interp
    last_loop_tl8 = np.linspace(0, w, scroll_loop_length)[:scroll_trunc]
    # static at final truncated value during outro
    outro_tl8 = np.ones(args.n_frames - drop_end) * np.linspace(0, w, scroll_loop_length)[scroll_trunc + 1]

    # create 2D array of translations in x and y directions
    x_tl8 = np.concatenate([intro_tl8, loops_tl8, last_loop_tl8, outro_tl8])
    y_tl8 = np.zeros(args.n_frames)
    translation = (th.tensor([x_tl8, y_tl8]).float().T)[: args.n_frames]

    # smooth the transition from intro to drop to prevent jerk
    translation.T[0, drop_start - args.fps : drop_start + args.fps] = ar.gaussian_filter(
        translation.T[0, drop_start - 5 * args.fps : drop_start + 5 * args.fps], 5
    )[4 * args.fps : -4 * args.fps]

    class Translate(NetworkBend):
        """From audioreactive/examples/bend.py"""

        def __init__(self, modulation, h, w, noise):
            sequential_fn = lambda b: th.nn.Sequential(
                th.nn.ReflectionPad2d((int(w / 2), int(w / 2), 0, 0)),  #  < Reflect out to 5x width (so that after
                th.nn.ReflectionPad2d((w, w, 0, 0)),  #                    < translating w pixels, center crop gives
                th.nn.ReflectionPad2d((w, 0, 0, 0)),  #                    < same features as translating 0 pixels)
                AddNoise(noise),  # add some noise to disguise reflections
                kT.Translate(b),
                kA.CenterCrop((h, w)),
            )
            super(Translate, self).__init__(sequential_fn, modulation)

    # create static noise for translate bend
    noise = 0.2 * th.randn((1, 1, h, 5 * w), device="cuda")
    # create function which returns an initialized Translate object when fed a batch of modulation
    # this is so that creation of the object is delayed until the specific batch is sent into the generator
    # (there's probably an easier way to do this without the kornia transforms, e.g. using Broad et al.'s transform implementations)
    transform = lambda batch: partial(Translate, h=h, w=w, noise=noise)(batch)
    bends += [{"layer": tl, "transform": transform, "modulation": translation}]  # add network bend to list dict

    return bends