예제 #1
0
def gen_hihat(all_data, fs, fps, cand):
    fps = librosa.samples_to_frames(fs, hop_length=hop_len, n_fft=win_len)
    fps = 100
    print(cand)
    proc = BeatTrackingProcessor(look_aside=0.2, fps=fps)
    act = RNNBeatProcessor()(all_data)
    beat_times = proc(act)

    song_len = librosa.samples_to_time(data.shape, sr=fs)[0]
    hihat = np.zeros(all_data.shape)
    idx = np.where(beat_times <= song_len)[0]
    new_beat_times = np.zeros(idx.shape)
    new_beat_times[idx] = beat_times[idx]
    beat_samples = librosa.time_to_samples(new_beat_times, sr=fs)
    start = librosa.frames_to_samples(cand[0], hop_len, n_fft=win_len)
    end = librosa.frames_to_samples(cand[-1], hop_len, n_fft=win_len)
    cand_len = end - start

    i = 3
    is_hihat = np.zeros(beat_samples.shape)
    while i < len(beat_samples):
        is_hihat[i] = 1
        i = i + 4
    for i, s in enumerate(beat_samples):
        if is_hihat[i] == 1:
            if s + cand_len > hihat.shape:
                break
            hihat[s:s + cand_len] = data[start:end]

    return hihat, new_beat_times, beat_samples
예제 #2
0
def predict(flac_path,
            title="",
            model_path="./model",
            diff_root_only=True,
            max_num_chord=4):
    label_path = "chord_labels.txt"

    # Estimate the bpm of the audio
    beat_proc = RNNBeatProcessor()
    tempo_proc = TempoEstimationProcessor(min_bpm=50, max_bpm=180, fps=100)

    beat_processed = beat_proc(flac_path)
    tempo_estimation = tempo_proc(beat_processed)

    BPM = BPM_selector(tempo_estimation)
    sec_per_beat = 60 / BPM

    sec_per_frame = 2048 / 16000
    # set eighth note as the minimum duration of the chord
    min_duration = sec_per_beat / 2

    # Read chord labels file
    with open(label_path) as f:
        with torch.no_grad():
            chord_labels = ast.literal_eval(f.read())

            # Process raw audio
            X = cqt_preprocess(flac_path)
            X = Variable(
                torch.from_numpy(np.expand_dims(X, axis=0)).float().cpu())

            # Load model
            model = Net(1).cpu()
            state_dict = torch.load(model_path,
                                    map_location="cpu")["state_dict"]
            new_state_dict = OrderedDict()
            for k, v in state_dict.items():
                name = k[7:]
                new_state_dict[name] = v
            model.load_state_dict(new_state_dict)
            model.eval()

            # Estimate
            estimation = np.zeros((22, X.shape[2]))
            estimation = model(X).data.cpu()[0][0]
            estimation = to_probability(estimation)

            # Post-processing
            estimation = dp_post_processing(estimation)

            # predict_list_majmin = _predict(estimation, chord_labels[13:], sec_per_frame, min_duration, mapping_majmin)
            predict_list_seventh = _predict(estimation, chord_labels[13:],
                                            sec_per_frame, min_duration,
                                            mapping_seventh)

        text = ''
        for chord in predict_list_seventh:
            text += f'{chord[0]}\t{chord[1]}\t{chord[2]}\n'

        return text
예제 #3
0
def calculateBeatsAndSemitones(infile):
    print 'Loading audio file...', infile
    proc = BeatTrackingProcessor(fps=100,
                                 method='comb',
                                 min_bpm=40,
                                 max_bpm=240,
                                 act_smooth=0.09,
                                 hist_smooth=7,
                                 alpha=0.79)
    act = RNNBeatProcessor()(infile)
    beats = proc(act).astype('float32')

    audio = essentia.standard.MonoLoader(filename=infile)()
    #bt = BeatTrackerMultiFeature()
    #beats, _ = bt(audio)
    # TODO: best partameters.
    parameters = {}
    #stepsize, chroma = vamp.collect(
    #    audio, 44100, "nnls-chroma:nnls-chroma", output = "chroma", step_size=2048)["matrix"]
    stepsize, semitones = vamp.collect(audio,
                                       44100,
                                       "nnls-chroma:nnls-chroma",
                                       output="semitonespectrum",
                                       step_size=2048)["matrix"]
    return len(audio), beats, semitones
def beat_extractor(queue_beat):
    kwargs = dict(
        fps=100,
        correct=True,
        infile=None,
        outfile=None,
        max_bpm=170,
        min_bpm=60,
        #nn_files = [BEATS_LSTM[0]],
        transition_lambda=100,
        num_frames=1,
        online=True,
        verbose=1)

    def beat_callback(beats, output=None):
        if len(beats) > 0:
            # Do something with the beat (for now, just print the array to stdout)
            queue_beat.put(beats[0])
            #print(beats)

    #print('Process to write betas: %s' % os.getpid())
    in_processor = RNNBeatProcessor(**kwargs)
    beat_processor = DBNBeatTrackingProcessor(**kwargs)
    out_processor = [beat_processor, beat_callback]
    processor = IOProcessor(in_processor, out_processor)
    process_online(processor, **kwargs)
예제 #5
0
def get_beat_processor():
    print('START BEAT PROCESSOR   >> ', str(datetime.now()))
    from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
    from madmom.processors import SequentialProcessor
    print('BEAT PROCESSOR         >> ', str(datetime.now()))
    return SequentialProcessor(
        [RNNBeatProcessor(),
         DBNBeatTrackingProcessor(fps=100)])
예제 #6
0
def beatSyncFeature(feature, audio, sr, hop_length):
    # Aggregate feature between beat events
    fps = SR / HOP_LENGTH
    beat_proc = DBNBeatTrackingProcessor(fps=100)
    beat_act = RNNBeatProcessor()(audio)
    beat_times = beat_proc(beat_act)
    # We'll use the median value of each feature between beat frames
    feature = librosa.feature.sync(feature, (beat_times * fps).astype(int),
                                   aggregate=np.median)
    return feature, beat_times
예제 #7
0
파일: main.py 프로젝트: lhl2617/radetzky
def get_beats(file_path: str) -> List[float]:
    """
    Given the path to an audio file get a list of detected beat timings (in seconds)
    """
    print(f"Getting beats for {file_path}")
    proc = DBNBeatTrackingProcessor(fps=100)
    act = RNNBeatProcessor()(file_path)
    res: List[float] = proc(act)
    print(f"Got {len(res)} beats")
    print(res)
    return res
예제 #8
0
def getMadmomTempo(filename):
    """
    Call Madmom Tempo Estimation
    :return: Array of tempos sorted in decreasing order of strength
    """
    from madmom.features.beats import RNNBeatProcessor
    from madmom.features.tempo import TempoEstimationProcessor
    act = RNNBeatProcessor()(filename)
    proc = TempoEstimationProcessor(fps=100)
    res = proc(act)
    return res[:, 0]
예제 #9
0
def getRNNDBNOnsets(filename):
    """
    Call Madmom's implementation of RNN + DBN beat tracking
    :param filename: Path to audio file
    """
    print("Computing madmom beats...")
    from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
    proc = DBNBeatTrackingProcessor(fps=100)
    act = RNNBeatProcessor()(filename)
    b = proc(act)
    return b
예제 #10
0
def extract(yt_id):
    beats = SequentialProcessor(
        [RNNBeatProcessor(),
         DBNBeatTrackingProcessor(fps=100)])
    chordrec = SequentialProcessor(
        [CNNChordFeatureProcessor(),
         CRFChordRecognitionProcessor()])
    processMulti = ParallelProcessor([])
    processMulti.append(beats)
    processMulti.append(chordrec)
    beatSync = SequentialProcessor(
        [printTime, processMulti, printTime, arrange, printTime])
    return beatSync('tmp/' + yt_id + '.wav')
예제 #11
0
파일: features.py 프로젝트: ctralie/acoss
 def madmom_features(self, fps=100):
     """
     Call Madmom's implementation of RNN + DBN beat tracking. Madmom's
     results are returned in terms of seconds, but round and convert to
     be in terms of hop_size so that they line up with the features.
     The novelty function is also computed as a side effect (and is
     the bottleneck in the computation), so also return that
     Parameters
     ----------
     fps: int
         Frames per second in processing
     Returns
     -------
     {
         'tempos': ndarray(n_levels, 2)
             An array of tempo estimates in beats per minute,
             along with their confidences
         'onsets': ndarray(n_onsets)
             Array of onsets, where each onset indexes into a particular window
         'novfn': ndarray(n_frames)
             Evaluation of the rnn audio novelty function at each audio
             frame, in time increments equal to self.hop_length
         'snovfn': ndarray(n_frames)
             Superflux audio novelty function at each audio frame,
             in time increments equal to self.hop_length
     }
     """
     from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
     from madmom.features.tempo import TempoEstimationProcessor
     from madmom.features.onsets import SpectralOnsetProcessor
     from madmom.audio.filters import LogarithmicFilterbank
     beatproc = DBNBeatTrackingProcessor(fps=fps)
     tempoproc = TempoEstimationProcessor(fps=fps)
     novfn = RNNBeatProcessor()(self.audio_file) # This step is the computational bottleneck
     beats = beatproc(novfn)
     tempos = tempoproc(novfn)
     onsets = np.array(np.round(beats*self.fs/float(self.hop_length)), dtype=np.int64)
     # Resample the audio novelty function to correspond to the 
     # correct hop length
     nframes = len(self.librosa_noveltyfn())
     novfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(novfn))/float(fps), novfn) 
     
     # For good measure, also compute and return superflux
     sodf = SpectralOnsetProcessor(onset_method='superflux', fps=fps, \
                         filterbank=LogarithmicFilterbank,\
                           num_bands=24, log=np.log10)
     snovfn = sodf(self.audio_file)
     snovfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(snovfn))/float(fps), snovfn) 
     return {'tempos':tempos, 'onsets':onsets, 'novfn':novfn, 'snovfn':snovfn}
예제 #12
0
def getRNNDBNOnsets(filename, Fs, hopSize):
    """
    Call Madmom's implementation of RNN + DBN beat tracking
    :param filename: Path to audio file
    :param Fs: Sample rate
    :param hopSize: Hop size of each onset function value
    :returns (tempo, beats): Average tempo, numpy array
        of beat intervals in seconds
    """
    print("Computing madmom beats...")
    from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
    proc = DBNBeatTrackingProcessor(fps=100)
    act = RNNBeatProcessor()(filename)
    b = proc(act)
    tempo = 60 / np.mean(b[1::] - b[0:-1])
    beats = np.array(np.round(b * Fs / hopSize), dtype=np.int64)
    return (tempo, beats)
예제 #13
0
def chordBeats(infile, outfile):
    print 'Loading audio file...', infile

    #proc = BeatTrackingProcessor(
    #    fps = 100,
    #    method='comb', min_bpm=40,
    #    max_bpm=240, act_smooth=0.09,
    #    hist_smooth=7, alpha=0.79)
    proc = DBNBeatTrackingProcessor(fps=100,
                                    method='comb',
                                    min_bpm=40,
                                    max_bpm=240)
    act = RNNBeatProcessor()(infile)
    beats = proc(act).astype('float32')
    audio = essentia.standard.MonoLoader(filename=infile)()
    # TODO: best partameters.
    parameters = {}
    stepsize, semitones = vamp.collect(audio,
                                       44100,
                                       "nnls-chroma:nnls-chroma",
                                       output="semitonespectrum",
                                       step_size=2048)["matrix"]
    np.savez(outfile, [len(audio)], beats, semitones)
def get_tempo(loop):
    #tempo
    proc2 = bt.TempoEstimationProcessor(fps=100)
    act2 = RNNBeatProcessor()(loop)
    tempo = proc2(act2)[0][0]
    return tempo
예제 #15
0
from madmom.features.beats import RNNBeatProcessor
from madmom.features.tempo import TempoEstimationProcessor
import os
import numpy as np
proc = TempoEstimationProcessor(fps=100)

source_dir = r"D:\Program Files\StepMania 5\Songs\Albumix 3.I"
np.set_printoptions(suppress=True)

for (dirpath, dirnames, filenames) in os.walk(source_dir):
    name = os.path.relpath(dirpath, source_dir)
    #print(name)
    musicname = ""
    chartname = ""
    if len(dirnames) > 5:
        print(">Subdirectories found in " + name + ", continuing.")
        continue
    if dirnames:
        print("----<5 subdirectories in " + name + ", investigate!")
    for f in filenames:
        if f.endswith(".sm") or f.endswith(".SM"):
            chartname = f  #.ssc files are very similar, my packs don't need those .dwi files would need work
        elif f.endswith(".mp3") or f.endswith(".ogg"):
            musicname = f
    if musicname == "" or chartname == "":
        print("----Music/Chart (" + musicname + "," + chartname +
              ") not found in " + name)
        continue
    print(musicname)
    act = RNNBeatProcessor()(os.path.join(dirpath, musicname))
    print(str(proc(act)))
예제 #16
0
test.extend(sapce)
test.extend(drum)
test.extend(sapce)
test.extend(beattt)
test.extend(sapce)
test.extend(drum)
test.extend(drum[int(0.5 * drum.shape[0]):])
sss = np.zeros(data.shape)
sss[:np.array(test).shape[0]] = np.array(test)
#sd.play(sss*5+data*5, fs)
'''gen drum'''
fps = librosa.samples_to_frames(fs, hop_length=hop_len, n_fft=win_len)
fps = 100
print(fps)
proc = BeatTrackingProcessor(look_aside=0.2, fps=fps)
act = RNNBeatProcessor()(all_data)
beat_times = proc(act)

song_len = librosa.samples_to_time(data.shape, sr=fs)[0]
beat = np.zeros(all_data.shape)
idx = np.where(beat_times <= song_len)[0]
new_beat_times = np.zeros(idx.shape)
new_beat_times[idx] = beat_times[idx]

beat_samples = librosa.time_to_samples(new_beat_times, sr=fs)
cand_len = len(drum)

end = len(beat_samples)
is_drum = np.zeros(beat_samples.shape)
group = np.arange(len(beat_samples)) % 8
idx = np.where((group == 1) | (group == 6))
예제 #17
0
import pydub
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
from multiprocessing import Process
from madmom.features.tempo import TempoEstimationProcessor
from madmom.features.beats import RNNBeatProcessor

robot = stretch_body.robot.Robot()
robot.startup()

robot.stow()

filename = "./audios/forest.wav"
proc = TempoEstimationProcessor(fps=100)
act = RNNBeatProcessor()(filename)
tempo = proc(act)

tempo = tempo[0][0]
t = 60.0 / tempo * 4

# interonsets = np.ediff1d(onsets)
# interonsets = np.add.reduceat(interonsets, np.arange(0, len(interonsets), 8))

# print(interonsets)

xrotate = 3.14
xtilt = 0.5
xpan = 1
xwrist = 1.5
#start
예제 #18
0
def main():
    video_dir = 'dance_videos\\Danny Ocean - Baby I Wont.mp4'
    beat_dir = video_dir.strip('mp4') + 'npy'
    interval = [32, 36]  #in second
    REDU = True

    motion_base_dir = 'MyNao\\motion_base\\motion_base.json'
    if not os.path.exists(motion_base_dir):
        motion_base = {}
        with open(motion_base_dir, 'w') as f:
            json.dump(motion_base, f)
    with open(motion_base_dir, 'r') as f:
        motion_base = json.load(f)
    if REDU:
        pose_save_dir = 'MyNao\\motion_glance\\' + str(len(motion_base) - 1)
    else:
        pose_save_dir = 'MyNao\\motion_glance\\' + str(len(motion_base))
    if not os.path.exists(pose_save_dir):
        os.mkdir(pose_save_dir)

    motion = {}
    motion['feature'] = {}
    motion['feature']['bps'] = [None]
    motion['feature']['symmetric'] = False
    motion['feature']['repeat'] = True
    motion['frame'] = {}
    #args = parse_args()
    #cfg.set_args(args.gpu_ids)
    cudnn.fastest = True
    cudnn.benchmark = True
    cudnn.deterministic = False
    cudnn.enabled = True

    time_0 = time.time()
    tester = Tester(24)

    ##loading 3D pose estimation model
    tester._make_model()

    time_1 = time.time()
    print('loading integral pose model elapse:', round(time_1 - time_0, 2),
          's')

    ##loading yolo detector
    detector = YOLOv3(
        model_def=
        "3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\config\\yolov3.cfg",
        class_path=
        "3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\data\\coco.names",
        weights_path=
        "3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\weights\\yolov3.weights",
        classes=('person', ),
        max_batch_size=16,
        device=torch.device('cuda:{}'.format(cfg.gpu_ids[0])))
    print('loading yolo elapse:', round(time.time() - time_1, 2), 's')
    skeleton = ((0, 7), (7, 8), (8, 9), (9, 10), (8, 11), (11, 12), (12, 13),
                (8, 14), (14, 15), (15, 16), (0, 1), (1, 2), (2, 3), (0, 4),
                (4, 5), (5, 6))
    fig = plt.figure(figsize=(10, 10))
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)
    ])
    ##load model

    if not os.path.exists(video_dir.strip('mp4') + 'wav'):
        videoclip = VideoFileClip(video_dir)
        audioclip = videoclip.audio
        audioclip.write_audiofile(video_dir.strip('mp4') + 'wav')

    video = cv2.VideoCapture(video_dir)
    if not os.path.exists(beat_dir):
        time_2 = time.time()
        videoclip = VideoFileClip(video_dir)
        audioclip = videoclip.audio
        beat_activation = RNNBeatProcessor()(video_dir.strip('mp4') + 'wav')
        processor = DBNBeatTrackingProcessor(fps=100)
        beats = processor(beat_activation)
        frames_at_beat = (beats / audioclip.duration *
                          video.get(cv2.CAP_PROP_FRAME_COUNT)).astype(int)
        print('extracting beat sequence elapse:',
              round(time.time() - time_2, 2), 's')
        np.save(beat_dir, frames_at_beat)
    frames_at_beat = np.load(beat_dir).tolist()

    for beat in frames_at_beat:
        if interval[0] * video.get(cv2.CAP_PROP_FPS) > beat:
            continue
        else:
            interval[0] = beat
            break
    for beat in frames_at_beat:
        if interval[1] * video.get(cv2.CAP_PROP_FPS) > beat:
            continue
        else:
            interval[1] = beat
            break

    video.set(1, interval[0])
    frame = 0
    next_beat = 0
    last_beat = 0
    num_beat = 0
    num_frame_between_beats = []
    with torch.no_grad():
        while True:
            time_start = time.time()
            current_frame = video.get(cv2.CAP_PROP_POS_FRAMES)
            ret_val, raw_image = video.read()
            if current_frame == interval[1]:
                break
            input_img = raw_image.copy()
            ##using yolo to get human bounding box
            detections = detector.predict_single(input_img)
            # if not detections.cpu().numpy().all():
            #     detections = (0,0,input_img.shape[1],input_img.shape[0],1,1)
            #     print('not detected')

            if detections is None:
                detections = np.array(
                    [[0, 0, input_img.shape[1], input_img.shape[0], 1, 1, 1]])
                print('not detected')
            elif detections.size()[0] == 0:
                detections = np.array(
                    [[0, 0, input_img.shape[1], input_img.shape[0], 1, 1, 1]])
                print('not detected')
            last_conf = 0
            last_last_conf = 0
            for i, (x1_pred, y1_pred, x2_pred, y2_pred, conf, cls_conf,
                    cls_pred) in enumerate(detections):
                if conf.item() > last_conf:
                    x1 = int(round(x1_pred.item())) - 40
                    x2 = int(round(x2_pred.item())) + 40
                    y1 = int(round(y1_pred.item())) - 20
                    y2 = int(
                        round(y2_pred.item())
                    ) + 20  #for getting a larger bounding box to cover the full body, in order to get more accurate pose
                    last_last_conf = last_conf
                    last_conf = conf.item()
                print(last_conf, last_last_conf)
                if last_last_conf != 0:
                    sys.exit()
            #print(x1, x2, y1, y2, last_conf)
            img_patch = (input_img[y1:y2,
                                   x1:x2, ::-1]).copy().astype(np.float32)
            input_patch = cv2.resize(img_patch, (cfg.input_shape))

            input_patch = transform(input_patch).unsqueeze(0)
            coord_out = tester.model(input_patch)
            print('Running model time:', round(time.time() - time_start, 2),
                  's')

            motion['frame'][frame] = {}
            if frame + interval[0] in frames_at_beat:
                motion['frame'][frame]['next_beat'] = 0
                motion['frame'][frame]['last_beat'] = 0
                #frames_at_beat.remove(frame)
                next_beat = frames_at_beat.index(frame + interval[0]) + 1
                last_beat = frames_at_beat.index(frame + interval[0])
                num_beat += 1
                num_frame_between_beats.append(frames_at_beat[next_beat] -
                                               frames_at_beat[last_beat])
                print('Record key frame with beat:', current_frame)
            else:
                motion['frame'][frame]['next_beat'] = frames_at_beat[
                    next_beat] - (frame + interval[0])
                motion['frame'][frame]['last_beat'] = (
                    frame + interval[0]) - frames_at_beat[last_beat]

            coord_out = coord_out.cpu().numpy()
            coord_out_resize = coord_out * np.array([
                img_patch.shape[1] / cfg.input_shape[1],
                img_patch.shape[0] / cfg.input_shape[0], 1
            ])

            for idx in range(coord_out_resize.shape[1] - 1):
                motion['frame'][frame][idx] = (
                    coord_out_resize[0][idx][0].item(),
                    coord_out_resize[0][idx][2].item(),
                    coord_out_resize[0][idx][1].item())

            vis = True
            vis_3d = False
            if vis:
                tmpimg = input_patch[0].cpu().numpy()
                tmpimg = tmpimg * np.array(cfg.pixel_std).reshape(
                    3, 1, 1) + np.array(cfg.pixel_mean).reshape(3, 1, 1)
                tmpimg = (tmpimg).astype(np.uint8)
                tmpimg = tmpimg[::-1, :, :]
                tmpimg = np.transpose(tmpimg, (1, 2, 0)).copy()
                tmpkps = np.zeros((3, 18))
                tmpkps[:2, :] = coord_out[0, :, :2].transpose(
                    1, 0) / cfg.output_shape[0] * cfg.input_shape[0]
                tmpkps[2, :] = 1
                tmpimg = vis_keypoints(tmpimg, tmpkps, skeleton)
                tmpimg = cv2.resize(tmpimg,
                                    (img_patch.shape[1], img_patch.shape[0]))
                file_name = pose_save_dir + '\\{0}.png'.format(
                    str(frame).zfill(4))
                cv2.imwrite(file_name, tmpimg)
            if vis_3d:
                #coord_out = coord_out.cpu().numpy()
                #coord_out = coord_out * np.array([img_patch.shape[1]/cfg.input_shape[1], img_patch.shape[0]/cfg.input_shape[0], 1])
                pred = coord_out_resize.squeeze(
                )  #remove first batch dimension

                ax = plt.subplot('121', projection='3d')
                plt.axis('off')
                show3D_pose(pred, ax, skeleton, radius=40)
                file_name = pose_save_dir + '\\{0}.png'.format(
                    str(frame).zfill(4))
                plt.savefig(file_name)
                # cv2.imwrite(file_name, tmpimg)

            frame += 1
            print('Processing Frame:', round(time.time() - time_start, 2), 's')

        motion['feature']['fpb'] = np.mean(num_frame_between_beats)
        if REDU:
            motion_base[len(motion_base) - 1] = motion
        else:
            motion_base[len(motion_base)] = motion
        #with open(motion_base_dir, 'w') as f:
        #    json.dump(motion_base, f)
    print('done with', num_beat + 1,
          'beats! (This should be even for a normal dance)')
    print('num_frame between beats:')
    print(num_frame_between_beats)
예제 #19
0
    def __init__(self):
        self.pa = pyaudio.PyAudio()
        self.c_count = 0
        using_callback = True
        self.buffer = collections.deque(maxlen=self.RATE * 14)
        self.rnn = RNNBeatProcessor(online=True, nn_files=[BEATS_LSTM[0]])
        self.act_proc = DBNBeatTrackingProcessor(fps=100,
                                                 min_bpm=80.0,
                                                 max_bpm=180.0)
        self.dcp = DeepChromaProcessor()
        self.decode = DeepChromaChordRecognitionProcessor()
        self.start_current_time = None
        if using_callback:
            self.stream = self.pa.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       output=True,
                                       frames_per_buffer=self.CHUNK,
                                       stream_callback=self.callback)
            print(self.pa.get_default_output_device_info())
            print(self.pa.get_default_input_device_info())
            self.t_start = time.time()
            beepsnd, _ = librosa.load('block.wav', sr=None)
            out1 = (beepsnd).tostring()
            #print(beepsnd.size, len(out1))
            self.beepsnd = out1
            self.Flag = False
            self.beep_count = 0
            while self.stream.is_active():
                if len(self.buffer) == self.RATE * 8:
                    print('14 sec')
                    print(self.time_info)
                    print(time.time() - self.t_start)
                    self.tmp = np.array(self.buffer)
                    self.buffer.clear()
                    print(time.time() - self.t_start)
                    chroma_thread = threading.Thread(target=self.chroma_rec,
                                                     args=())
                    chroma_thread.start()
                    #chord = chroma_thread.run()

                    tmp2 = self.rnn(self.tmp)
                    # tmp2 = librosa.onset.onset_strength(tmp,sr=self.RATE, hop_length = int(self.RATE / 100),max_size=1,aggregate=np.median, n_mels=256)
                    # tmp2 /= np.max(tmp2)
                    #t_axes = librosa.frames_to_time(np.arange(len(tmp2)),sr=self.RATE)
                    t_proc = time.time() - self.t_start
                    print(t_proc)
                    tmp3_2 = self.act_proc(tmp2)
                    tmp3_1 = 60 / np.mean(np.diff(tmp3_2))
                    # print(tmp3)
                    #tmp3_1,tmp3_2 = librosa.beat.beat_track(onset_envelope=tmp2, sr=self.RATE)
                    print('tempo is %f' % tmp3_1)
                    print('beat is ', tmp3_2)

                    t_proc = time.time() - self.t_start
                    chroma_thread.join()

                    print(t_proc)
                    t = threading.Timer(60. / tmp3_1 - t_proc, self.flagit, ())
                    t.daemon = True
                    t.start()
                    # self.stream.write(self.beepsnd)

                    print(time.time() - self.t_start)
                else:
                    time.sleep(0.001)

        else:
            self.stream = self.pa.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       output=True,
                                       frames_per_buffer=self.CHUNK)
            self.t_start = time.time()
            self.loop()
예제 #20
0
    x_path = join(base, file_name + "_X.npy")
    y_path = join(base, file_name + "_y.npy")
    return np.load(x_path), np.load(y_path)


processors = [
    SpectralOnsetProcessor(fps=200),
    RNNOnsetProcessor(),
    CNNOnsetProcessor(),
    SpectralOnsetProcessor(onset_method='superflux',
                           fps=200,
                           filterbank=LogarithmicFilterbank,
                           num_bands=24,
                           log=np.log10),
    RNNDownBeatProcessor(),
    lambda sig: np.array(RNNBeatProcessor(post_processor=None)(sig)).T
]


def rhythm_features_for_signal(signal):
    rhythm_features = [process(signal) for process in processors]
    return concatenate_and_resample(rhythm_features)


def load_and_rhythm_preprocess(audio_dir, max_samples=-1):
    print('...load and preprocess files from folder')

    audio_files = get_list_of_files(audio_dir)

    if max_samples > 0:
        audio_files = audio_files[:max_samples]
예제 #21
0
def beat_activations(path):
    beat_activations_ = RNNBeatProcessor()(path)
    return beat_activations_
예제 #22
0
import sys
import bmaFunctions
import numpy
from madmom.features.chords import DeepChromaChordRecognitionProcessor
from madmom.audio.chroma import DeepChromaProcessor
from madmom.features.beats import DBNBeatTrackingProcessor
from madmom.features.beats import RNNBeatProcessor

#Setting up Deep Chroma Chord Recognition Processor
dcp = DeepChromaProcessor()
decode = DeepChromaChordRecognitionProcessor()
chroma = dcp(sys.argv[1])
chords = decode(chroma)

#Setting up Dynamic Baysian Network Tracking Processor
proc = DBNBeatTrackingProcessor(fps=100)
act = RNNBeatProcessor()(sys.argv[1])
beats = proc(act)

#calculating msi
beatsArray = numpy.array(beats)
msi = numpy.mean(beatsArray[1:] - beatsArray[:-1]) * 1000

beatmap = bmaFunctions.assignKeys(beats, chords, sys.argv[3])
if msi < 360:
    del beatmap[1::2]

#generating and printing beatmap
bmaFunctions.fancyPrint(beatmap, msi, sys.argv[2])

#TODO: eliminate trailing Ns
예제 #23
0
def main():
    """DBNBeatTracker"""

    # define parser
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='''
    The DBNBeatTracker.py program detects all beats in an audio file according to
    the method described in:

    "A Multi-Model Approach to Beat Tracking Considering Heterogeneous Music
     Styles"
    Sebastian Böck, Florian Krebs and Gerhard Widmer.
    Proceedings of the 15th International Society for Music Information
    Retrieval Conference (ISMIR), 2014.

    It does not use the multi-model (Section 2.2.) and selection stage (Section
    2.3), i.e. this version corresponds to the pure DBN version of the
    algorithm for which results are given in Table 2.

    Instead of the originally proposed state space and transition model for the
    DBN, the following is used:

    "An Efficient State Space Model for Joint Tempo and Meter Tracking"
    Florian Krebs, Sebastian Böck and Gerhard Widmer.
    Proceedings of the 16th International Society for Music Information
    Retrieval Conference (ISMIR), 2015.

    This program can be run in 'single' file mode to process a single audio
    file and write the detected beats to STDOUT or the given output file.

      $ DBNBeatTracker.py single INFILE [-o OUTFILE]

    If multiple audio files should be processed, the program can also be run
    in 'batch' mode to save the detected beats to files with the given suffix.

      $ DBNBeatTracker.py batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES

    If no output directory is given, the program writes the files with the
    detected beats to the same location as the audio files.

    The 'pickle' mode can be used to store the used parameters to be able to
    exactly reproduce experiments.

    ''')

    # version
    p.add_argument('--version',
                   action='version',
                   version='DBNBeatTracker.py.2016')
    # input/output options
    io_arguments(p, output_suffix='.beats.txt', online=True)
    ActivationsProcessor.add_arguments(p)
    # signal processing arguments
    SignalProcessor.add_arguments(p, norm=False, gain=0)
    # peak picking arguments
    DBNBeatTrackingProcessor.add_arguments(p)
    NeuralNetworkEnsemble.add_arguments(p, nn_files=None)

    # parse arguments
    args = p.parse_args()

    # set immutable arguments
    args.fps = 100

    # print arguments
    if args.verbose:
        print(args)

    # input processor
    if args.load:
        # load the activations from file
        in_processor = ActivationsProcessor(mode='r', **vars(args))
    else:
        # use a RNN to predict the beats
        in_processor = RNNBeatProcessor(**vars(args))

    # output processor
    if args.save:
        # save the RNN beat activations to file
        out_processor = ActivationsProcessor(mode='w', **vars(args))
    else:
        # track the beats with a DBN
        beat_processor = DBNBeatTrackingProcessor(**vars(args))
        # output handler
        from madmom.utils import write_events as writer
        # sequentially process everything
        out_processor = [beat_processor, writer]

    # create an IOProcessor
    processor = IOProcessor(in_processor, out_processor)
    # and call the processing function
    args.func(processor, **vars(args))
예제 #24
0
def main():
    video_list = ['Cant stop the feeling - Justin Timberlake - Easy Dance for Kids', 'Dance like yo daddy', 'Danny Ocean - Baby I Wont', 'Si una vez - If I Once', 'Vaiven - MegaMix']
    for video in video_list:
        video_dir = 'dance_videos\\' + video + '.mp4'
        beat_dir = video_dir.strip('mp4') + 'npy'

        cudnn.fastest = True
        cudnn.benchmark = True
        cudnn.deterministic = False
        cudnn.enabled = True

        time_0 = time.time()
        tester = Tester(24)

        ##loading 3D pose estimation model
        tester._make_model()

        time_1 = time.time()
        print('loading integral pose model elapse:',round(time_1-time_0,2),'s')

        ##loading yolo detector
        detector = YOLOv3( model_def="3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\config\\yolov3.cfg",
                            class_path="3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\data\\coco.names",
                            weights_path="3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\weights\\yolov3.weights",
                            classes=('person',),
                            max_batch_size=16,
                            device=torch.device('cuda:{}'.format(cfg.gpu_ids[0])))
        print('loading yolo elapse:',round(time.time()-time_1,2),'s')
        skeleton = ( (0, 7), (7, 8), (8, 9), (9, 10), (8, 11), (11, 12), (12, 13), (8, 14), (14, 15), (15, 16), (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6) )
        #fig = plt.figure(figsize=(10,10)) 
        transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)]
                                )
        
        if not os.path.exists(video_dir.strip('mp4')+'wav'):
            videoclip = VideoFileClip(video_dir)
            audioclip = videoclip.audio
            audioclip.write_audiofile(video_dir.strip('mp4')+'wav')

        video = cv2.VideoCapture(video_dir)
        if not os.path.exists(beat_dir):
            time_2 = time.time()
            videoclip = VideoFileClip(video_dir)
            audioclip = videoclip.audio
            beat_activation = RNNBeatProcessor()(video_dir.strip('mp4')+'wav')
            processor = DBNBeatTrackingProcessor(fps=100)
            beats = processor(beat_activation)
            frames_at_beat = (beats/audioclip.duration*video.get(cv2.CAP_PROP_FRAME_COUNT)).astype(int)
            print('extracting beat sequence elapse:', round(time.time()-time_2, 2), 's')
            np.save(beat_dir, frames_at_beat)
        frames_at_beat = np.load(beat_dir).tolist()

        ##########################################
        dance_primitives_dir = '.\\danceprimitives_trial'
        if not os.path.exists(dance_primitives_dir):
            os.mkdir(dance_primitives_dir)
        motion_index = len(os.listdir(dance_primitives_dir))
        for i in range(len(frames_at_beat)-1):

            motion_dir = os.path.join(dance_primitives_dir, '{0}'.format(str(motion_index).zfill(5)))
            if not os.path.exists(motion_dir):
                os.mkdir(motion_dir)

            start = frames_at_beat[i]
            end =frames_at_beat[i+1]
            dance_primitive = np.empty((0, 17*3)) # for motion control
            #dance_primitive_norm = np.empty((0, 17*3)) # for motion clustering
            video.set(1, start)
            jump_flag = 0
            frame = 0
            with torch.no_grad():
                time_start = time.time()
                while True:
                    current_frame = video.get(cv2.CAP_PROP_POS_FRAMES)
                    ret_val, raw_image = video.read()
                    if current_frame == end:
                        break
                    ##using yolo to get human bounding box
                    input_img = raw_image.copy()
                    detections = detector.predict_single(input_img)
                    if detections is None or detections.size()[0] == 0:
                        jump_flag = 1
                        break
                    last_conf = 0
                    for i, (x1_pred, y1_pred, x2_pred, y2_pred, conf, cls_conf, cls_pred) in enumerate(detections):
                        if conf.item() > last_conf:
                            x1 = max(int(round(x1_pred.item())) - 40, 0)
                            x2 = min(int(round(x2_pred.item())) + 40, input_img.shape[1]-1)
                            y1 = max(int(round(y1_pred.item())) - 20, 0)
                            y2 = min(int(round(y2_pred.item())) + 20, input_img.shape[0]-1)   #for getting a larger bounding box to cover the full body, in order to get more accurate pose
                            last_conf = conf.item()
                    img_patch = (input_img[y1:y2, x1:x2, ::-1]).copy().astype(np.float32)
                    ##using ResPoseNet to get 3D human pose
                    input_patch = cv2.resize(img_patch,(cfg.input_shape))
                    input_patch = transform(input_patch).unsqueeze(0)
                    coord_out = tester.model(input_patch).cpu().numpy() #dimention: 1 X 18 X 3, where '3' refers to x, z, y in sequence.
                    #show_pose(input_patch, img_patch, coord_out, skeleton, motion_dir, frame)
                    coord_out_resize = coord_out * np.array([img_patch.shape[1]/cfg.input_shape[1], img_patch.shape[0]/cfg.input_shape[0], 1]) #transform to original scale
                    coord_out = coord_out_resize[:, :-1, :] # neglect the key point for "throx"
                    #coord_out_norm = (coord_out-np.mean(coord_out, axis=1))/np.std(coord_out, axis=1)
                    dance_primitive = np.vstack((dance_primitive, np.reshape(coord_out[0], -1)))
                    #dance_primitive_norm = np.vstack((dance_primitive_norm, np.reshape(coord_out_norm[0], -1)))
                    frame += 1
                print('Processing Time Elapse:', round(time.time()-time_start,2), 's')

            if jump_flag == 1:
                continue

            #norm_sample = np.empty((0, 17*3))
            #num_sample = 10
            #print(dance_primitive_norm.shape[0])
            #sample_step = (dance_primitive_norm.shape[0]-1)/(num_sample-1)
            #for i in range(num_sample):
            #    norm_sample = np.vstack((norm_sample, dance_primitive_norm[round(i * sample_step)]))
            
            #print(norm_sample.shape)
            print(dance_primitive.shape)
            #np.save(os.path.join(motion_dir, 'dance_motion_normlized_'+ str(motion_index)), norm_sample)
            np.save(os.path.join(motion_dir, 'dance_motion_'+ str(motion_index)), dance_primitive)

            motion_index+=1



    ###########################################
    sys.exit()
    video.set(1, interval[0])
    frame=0
    next_beat = 0
    last_beat = 0
    num_beat = 0
    num_frame_between_beats = []
    with torch.no_grad():
        while True:
            time_start = time.time()
            current_frame = video.get(cv2.CAP_PROP_POS_FRAMES)
            ret_val, raw_image = video.read()
            if current_frame == interval[1]:
                break
            input_img = raw_image.copy()
                    ##using yolo to get human bounding box
            detections = detector.predict_single(input_img)
            # if not detections.cpu().numpy().all():
            #     detections = (0,0,input_img.shape[1],input_img.shape[0],1,1)
            #     print('not detected')

            if detections is None:
                detections = np.array([[0,0,input_img.shape[1],input_img.shape[0],1,1,1]])
                print('not detected')
            elif detections.size()[0] == 0:
                detections = np.array([[0,0,input_img.shape[1],input_img.shape[0],1,1,1]])
                print('not detected')
            last_conf = 0
            last_last_conf = 0
            for i, (x1_pred, y1_pred, x2_pred, y2_pred, conf, cls_conf, cls_pred) in enumerate(detections):
                if conf.item() > last_conf:
                    x1 = int(round(x1_pred.item())) - 40
                    x2 = int(round(x2_pred.item())) + 40
                    y1 = int(round(y1_pred.item())) - 20
                    y2 = int(round(y2_pred.item())) + 20    #for getting a larger bounding box to cover the full body, in order to get more accurate pose
                    last_last_conf = last_conf
                    last_conf = conf.item()
                print(last_conf, last_last_conf)
                if last_last_conf != 0:
                    sys.exit()
            #print(x1, x2, y1, y2, last_conf)
            img_patch = (input_img[y1:y2, x1:x2, ::-1]).copy().astype(np.float32)
            input_patch = cv2.resize(img_patch,(cfg.input_shape))

            input_patch = transform(input_patch).unsqueeze(0)
            coord_out = tester.model(input_patch)
            print('Running model time:',round(time.time()-time_start,2),'s')

            motion['frame'][frame] = {}
            if frame+interval[0] in frames_at_beat:
                motion['frame'][frame]['next_beat'] = 0
                motion['frame'][frame]['last_beat'] = 0
                #frames_at_beat.remove(frame)
                next_beat = frames_at_beat.index(frame+interval[0]) + 1
                last_beat = frames_at_beat.index(frame+interval[0])
                num_beat += 1
                num_frame_between_beats.append(frames_at_beat[next_beat] - frames_at_beat[last_beat])
                print('Record key frame with beat:', current_frame)
            else:
                motion['frame'][frame]['next_beat'] = frames_at_beat[next_beat] - (frame+interval[0])
                motion['frame'][frame]['last_beat'] = (frame+interval[0]) - frames_at_beat[last_beat]

            coord_out = coord_out.cpu().numpy()
            coord_out_resize = coord_out * np.array([img_patch.shape[1]/cfg.input_shape[1], img_patch.shape[0]/cfg.input_shape[0], 1])

            for idx in range(coord_out_resize.shape[1]-1):
                motion['frame'][frame][idx]=(coord_out_resize[0][idx][0].item(), coord_out_resize[0][idx][2].item(), coord_out_resize[0][idx][1].item())
            
            vis = True
            vis_3d = False
            if vis:
                    tmpimg = input_patch[0].cpu().numpy()
                    tmpimg = tmpimg * np.array(cfg.pixel_std).reshape(3,1,1) + np.array(cfg.pixel_mean).reshape(3,1,1)
                    tmpimg = (tmpimg).astype(np.uint8)
                    tmpimg = tmpimg[::-1, :, :]
                    tmpimg = np.transpose(tmpimg,(1,2,0)).copy()
                    tmpkps = np.zeros((3,18))
                    tmpkps[:2,:] = coord_out[0,:,:2].transpose(1,0) / cfg.output_shape[0] * cfg.input_shape[0]
                    tmpkps[2,:] = 1
                    tmpimg = vis_keypoints(tmpimg, tmpkps, skeleton)
                    tmpimg = cv2.resize(tmpimg,(img_patch.shape[1],img_patch.shape[0]))
                    file_name = pose_save_dir+'\\{0}.png'.format(str(frame).zfill(4))
                    cv2.imwrite(file_name, tmpimg)
            if vis_3d:
                #coord_out = coord_out.cpu().numpy()
                #coord_out = coord_out * np.array([img_patch.shape[1]/cfg.input_shape[1], img_patch.shape[0]/cfg.input_shape[0], 1])
                pred=coord_out_resize.squeeze() #remove first batch dimension

                ax=plt.subplot('121',projection='3d')
                plt.axis('off')
                show3D_pose(pred,ax,skeleton,radius=40)
                file_name = pose_save_dir + '\\{0}.png'.format(str(frame).zfill(4))
                plt.savefig(file_name)
                # cv2.imwrite(file_name, tmpimg)

            frame+=1
            print('Processing Frame:',round(time.time()-time_start,2),'s')

        motion['feature']['fpb'] = np.mean(num_frame_between_beats)
        if REDU:
            motion_base[len(motion_base)-1] = motion
        else:
            motion_base[len(motion_base)] = motion
        #with open(motion_base_dir, 'w') as f:
        #    json.dump(motion_base, f)
    print('done with', num_beat + 1, 'beats! (This should be even for a normal dance)')
    print('num_frame between beats:')
    print(num_frame_between_beats)
예제 #25
0
    def __init__(self):
        self.pa = pyaudio.PyAudio()
        self.c_count = 0
        using_callback = True
        self.buffer = collections.deque(maxlen=self.RATE * 14)
        self.rnn = RNNBeatProcessor(online=True, nn_files=[BEATS_LSTM[0]])
        self.act_proc = DBNBeatTrackingProcessor(fps=100,
                                                 min_bpm=80.0,
                                                 max_bpm=180.0)
        self.dcp = DeepChromaProcessor()
        self.decode = DeepChromaChordRecognitionProcessor()
        self.start_current_time = None
        self.beep_count = 0
        source_path = 'tool'
        style_name = 'test_midi_folder'

        self.test = InstScheduler(FoxDot.lib.Clock, source_path)
        self.test.AddMidiFolder(style_name)
        self.test.Live_event(
        )  # Online random playing event determined by prosperity function
        self.test.set_tempo_pattern(
            4, 4
        )  # if the meta file is exist, calling this routine is not required
        if using_callback:
            self.stream = self.pa.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       output=True,
                                       frames_per_buffer=self.CHUNK,
                                       stream_callback=self.callback)
            print(self.pa.get_default_output_device_info())
            print(self.pa.get_default_input_device_info())
            self.t_start = time.time()
            beepsnd, _ = librosa.load('block.wav', sr=None)
            out1 = (beepsnd).tostring()
            #print(beepsnd.size, len(out1))
            self.beepsnd = out1
            self.Flag = False

            while self.stream.is_active():
                if len(self.buffer) == self.RATE * 14:
                    print('14 sec')
                    print(self.time_info)
                    print(time.time() - self.t_start)
                    self.tmp = np.array(self.buffer)
                    self.buffer.clear()
                    print(time.time() - self.t_start)
                    chroma_thread = threading.Thread(target=self.chroma_rec,
                                                     args=())
                    chroma_thread.start()
                    #chord = chroma_thread.run()

                    tmp2 = self.rnn(self.tmp)
                    # tmp2 = librosa.onset.onset_strength(tmp,sr=self.RATE, hop_length = int(self.RATE / 100),max_size=1,aggregate=np.median, n_mels=256)
                    # tmp2 /= np.max(tmp2)
                    #t_axes = librosa.frames_to_time(np.arange(len(tmp2)),sr=self.RATE)
                    t_proc = time.time() - self.t_start
                    print(t_proc)
                    tmp3_2 = self.act_proc(tmp2)
                    tmp3_1 = 60 / np.mean(np.diff(tmp3_2))
                    # print(tmp3)
                    #tmp3_1,tmp3_2 = librosa.beat.beat_track(onset_envelope=tmp2, sr=self.RATE)
                    print('tempo is %f' % tmp3_1)
                    print('beat is ', tmp3_2)

                    t_proc = time.time() - self.t_start
                    chroma_thread.join()

                    print(t_proc)
                    t = threading.Timer(60. / tmp3_1 - t_proc, self.flagit, ())
                    t.daemon = True
                    t.start()
                    print(int(tmp3_1))
                    self.test.StartInTime(
                        np.mean(np.diff(tmp3_2)) * 4 - (14 - tmp3_2[-1]) -
                        t_proc, int(tmp3_1))
                    break
                    # self.stream.write(self.beepsnd)

                    print(time.time() - self.t_start)
                else:
                    time.sleep(0.001)
            while (1):
                time.sleep(0.01)

        else:
            self.stream = self.pa.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       output=True,
                                       frames_per_buffer=self.CHUNK)
            self.t_start = time.time()
            self.loop()
예제 #26
0
"""kwargs = dict(
    fps = 100,
    correct = True,
    infile = 'C:\\Users\\lenovo\\Desktop\\dance_videos\\better_when_im_dancing.wav',
    outfile = 'C:\\Users\\lenovo\\Desktop\\dance_videos\\beats.txt',
    max_bpm = 170,
    min_bpm = 60,
    #nn_files = [BEATS_LSTM[0]],
    transition_lambda = 100,
    num_frames = 1,
    online = False,
    verbose = 0
)

def beat_callback(beats, output=None):
    if len(beats) > 0:
        # Do something with the beat (for now, just print the array to stdout)
        print(beats)

in_processor = RNNBeatProcessor(**kwargs)
beat_processor = DBNBeatTrackingProcessor(**kwargs)
out_processor = [beat_processor, write_beats]
processor = IOProcessor(in_processor, out_processor)
#process_offline(processor, **kwargs)
processor.process('C:\\Users\\lenovo\\Desktop\\dance_videos\\better_when_im_dancing.wav')
"""
act = RNNBeatProcessor()(
    'C:\\Users\\lenovo\\Desktop\\dance_videos\\better_when_im_dancing.wav')
proc = DBNBeatTrackingProcessor(fps=100)
beat = proc(act)
write_beats(beat, 'C:\\Users\\lenovo\\Desktop\\dance_videos\\beats.txt')