def main(DIRECTORY, args):
    
    print("[Start] making audio for the training.")
    
    TR_SPEECH_PATH = DIRECTORY['TR_SPEECH_PATH']
    TR_NOISE_PATH = DIRECTORY['TR_NOISE_PATH']
    OUTPUT_PATH = DIRECTORY['OUTPUT_PATH']
    
    # set output path
    merged_output_path = os.path.join(OUTPUT_PATH,'merged_audio')
   
    # make output path
    makedirs(merged_output_path)
    
    # make merged speech audio using librosa
    speech_merged_audio,rate = get_merged_audio(TR_SPEECH_PATH, audio_type='speech', args=args)
    
    # make merged noise audio using librosa
    noise_merged_audio,rate = get_merged_audio(TR_NOISE_PATH, audio_type='noise', args=args )
   
    # make RMS of speech and noise equal
    noise_merged_audio = equalizingRMS(speech_merged_audio, noise_merged_audio)
    
    # write wav files
    write_audio(merged_output_path+'/merged_train_speech_audio.wav', speech_merged_audio, args.sampling_rate)
    write_audio(merged_output_path+'/merged_train_noise_audio.wav', noise_merged_audio, args.sampling_rate)
    

    print("[Finish] making audio for the training.")
Esempio n. 2
0
def run_openpose(args, video_path):
    video_idx = path.basename(video_path).split('.')[0]
    try:
        img_dir = path.join(args.output_root, args.img_folder, video_idx)
        op_dir = path.join(args.output_root, args.openpose_folder, video_idx)
        img_names = sorted(glob.glob(img_dir + '/*.jpg'))
        op_names = sorted(glob.glob(op_dir + '/*.json'))

        # If the frames haven't been extracted or OpenPose hasn't been run or
        # finished processing.
        if (not os.path.isdir(img_dir) or not os.path.isdir(op_dir)
                or len(img_names) != len(op_names)):
            makedirs(img_dir)

            # First run OpenPose on key frames, then decide whether to run
            # the whole batch of frames.
            extract_key_frames(args, video_path, img_dir)
            run_openpose_cmd(args, video_idx)

            # If key frame looks good, extract all frames in the batch and
            # run OpenPose.
            if args.n_skip_frames > 1:
                extract_valid_frames(args, video_path, img_dir)
                run_openpose_cmd(args, video_idx)

            # Remove all unusable frames.
            remove_invalid_frames(args, video_idx)
            remove_static_frames(args, video_idx)
            remove_isolated_frames(args, video_idx)
    except:
        raise ValueError('video %s running openpose failed' % video_idx)
Esempio n. 3
0
def process_video(videopath, savedir, min_interval_time=0.1, crop_mode='peak', crop_time=0.2, rate=44100, fc=[20,8000], saveimage=True):
    """
    videopath:
    savedir:
    min_interval_time:
    crop_mode: 'peak' | 'time'
    crop_time:
    rate:
    saveimage:

    return:
    video_infos :fps,endtime,height,width
    peakindexs
    bias
    syllables

    """
    util.makedirs(savedir)
    
    # process video
    video_infos = ffmpeg.get_video_infos(videopath)
    ffmpeg.video2voice(videopath, os.path.join(savedir,'video_tmp.wav'),samplingrate=44100)
    if saveimage:
        util.makedirs(os.path.join(savedir,'imgs'))
        ffmpeg.video2image(videopath,os.path.join(savedir,'imgs','%05d.png'))
    
    # process audio
    audio,syllables,features,peakindexs,bias = process_audio(os.path.join(savedir,'video_tmp.wav'), 
        savedir, min_interval_time,crop_mode, crop_time, rate, fc)

    np.save(os.path.join(savedir,'video_infos.npy'), np.array(video_infos))
    
    return audio,syllables,features,peakindexs,bias,video_infos
Esempio n. 4
0
def process_audio(audiopath, savedir, min_interval_time=0.1, crop_mode='peak', crop_time=0.2, rate=44100, fc=[20,8000], hpss=''):
    util.makedirs(savedir)
    # to wav
    if (os.path.splitext(audiopath)[1]).lower() != '.wav':
        ffmpeg.video2voice(audiopath, os.path.join(savedir,'video_tmp.wav'),samplingrate=44100)
        audiopath = os.path.join(savedir,'video_tmp.wav')
    
    _,audio = sound.load(audiopath,ch=0)
    _audio = audio.copy()
    if hpss == 'harmonic':
        harmonic,percussive = librosa.effects.hpss(_audio)
        energy = dsp.energy(sound.filter(harmonic,fc,rate), 4410, 441, 4410)
    elif hpss == 'percussive':
        harmonic,percussive = librosa.effects.hpss(_audio)
        energy = dsp.energy(sound.filter(percussive,fc,rate), 4410, 441, 4410)
    else:
        energy = dsp.energy(sound.filter(_audio,fc,rate), 4410, 441, 4410)
    
    peakindexs = arrop.findpeak(energy,interval = int(min_interval_time*100))
    y = arrop.get_y(peakindexs, energy)
    plt.plot(energy)
    plt.scatter(peakindexs,y,c='orange')
    plt.show()

    peakindexs = peakindexs*441


    bias = []
    if crop_mode == 'peak':
        valleyindexs = arrop.findpeak(energy,interval = int(min_interval_time*100),reverse=True)*441  
        for i in range(len(peakindexs)):
            for j in range(len(valleyindexs)-1):
                if valleyindexs[j] < peakindexs[i]:
                    if valleyindexs[j+1] > peakindexs[i]:
                        left = np.clip(peakindexs[i]-valleyindexs[j],int(min_interval_time*rate*0.5),int(min_interval_time*rate*5))
                        right = np.clip(valleyindexs[j+1]-peakindexs[i],int(min_interval_time*rate*0.5),int(min_interval_time*rate*5))
                        bias.append([left,right])
    elif crop_mode == 'time':
        for i in range(len(peakindexs)):
            bias.append([int(rate*crop_time/2),int(rate*crop_time/2)])

    syllables = []
    features = []        
    for i in range(len(peakindexs)):
        syllable = audio[peakindexs[i]-bias[i][0]:peakindexs[i]+bias[i][1]]
        
        syllables.append(syllable)
        features.append(sound.freqfeatures(syllable, 44100))

    # save
    np.save(os.path.join(savedir,'peakindexs.npy'), np.array(peakindexs))
    np.save(os.path.join(savedir,'bias.npy'), np.array(bias))
    np.save(os.path.join(savedir,'syllables.npy'), np.array(syllables))
    np.save(os.path.join(savedir,'features.npy'), np.array(features))
    
    # for syllable in syllables:
    #     sound.playtest(syllable)
    
    return audio,syllables,features,peakindexs,bias
Esempio n. 5
0
def run_densepose(args, video_idx):
    try:
        img_dir = path.join(args.output_root, args.img_folder, video_idx)
        dp_dir = path.join(args.output_root, args.densepose_folder, video_idx)
        img_names = sorted(glob.glob(img_dir + '/*.jpg'))
        dp_names = sorted(glob.glob(dp_dir + '/*.png'))

        if not os.path.isdir(dp_dir) or len(img_names) != len(dp_names):
            makedirs(dp_dir)

            # Run densepose.
            run_densepose_cmd(args, video_idx)
    except:
        raise ValueError('video %s running densepose failed' % video_idx)
Esempio n. 6
0
def extract_all_frames(args, video_path):
    video_idx = path.basename(video_path).split('.')[0]
    img_dir = path.join(args.output_root, args.img_folder, video_idx)
    makedirs(img_dir)

    vidcap = cv2.VideoCapture(video_path)
    success, image = vidcap.read()
    frame_count = 0
    while success:
        write_name = path.join(img_dir, "frame%06d.jpg" % frame_count)
        cv2.imwrite(write_name, image)
        success, image = vidcap.read()
        frame_count += 1
    print('Extracted %d frames' % frame_count)
Esempio n. 7
0
def run_openpose_cmd(args, video_idx):
    pwd = os.getcwd()
    img_dir = path.join(pwd, args.output_root, args.img_folder, video_idx)
    op_dir = path.join(pwd, args.output_root, args.openpose_folder, video_idx)
    render_dir = path.join(pwd, args.output_root,
                           args.openpose_folder + '_rendered', video_idx)
    makedirs(op_dir)
    makedirs(render_dir)

    cmd = 'cd %s; ./build/examples/openpose/openpose.bin --display 0 ' \
          '--disable_blending --image_dir %s --write_images %s --face --hand ' \
          '--face_render_threshold 0.1 --hand_render_threshold 0.02 ' \
          '--write_json %s; cd %s' \
          % (args.openpose_root, img_dir, render_dir, op_dir,
             path.join(pwd, args.output_root))
    os.system(cmd)
Esempio n. 8
0
def handlepost():
    if request.form['token'] != key:
        return {'return': 'token error'}

    if request.form['mode'] == 'clean':
        if os.path.isdir(opt.rec_tmp):
            shutil.rmtree(opt.rec_tmp)
        return {'return': 'done'}

    if request.form['mode'] == 'send':
        data = request.form['data']
        util.makedirs(os.path.join(opt.rec_tmp, request.form['label']))
        util.savetxt(
            data,
            os.path.join(opt.rec_tmp, request.form['label'],
                         util.randomstr(8)))
        return {'return': 'done'}

    if request.form['mode'] == 'train':
        train(opt)
        label_map = {}
        for i in range(len(categorys)):
            label_map[categorys[i]] = i

        file = util.loadfile(os.path.join(opt.save_dir, 'model.pt'))
        file = base64.b64encode(file).decode('utf-8')
        heatmap = util.loadfile(os.path.join(opt.save_dir,
                                             'final_heatmap.png'))
        heatmap = base64.b64encode(heatmap).decode('utf-8')
        return {
            'return':
            'done',
            'report':
            'macro-prec,reca,F1,err,kappa:' +
            str(statistics.report(core.confusion_mats[-1])),
            'label_map':
            json.dumps(label_map),
            'heatmap':
            heatmap,
            'network':
            file
        }

    return {'return': 'error'}
def main(DIRECTORY, args):

    print("[Start] making audio for the test.")

    TE_SPEECH_PATH = DIRECTORY['TE_SPEECH_PATH']
    TE_NOISE_PATH = DIRECTORY['TE_NOISE_PATH']
    OUTPUT_PATH = DIRECTORY['OUTPUT_PATH']

    # set output path
    output_path = os.path.join(OUTPUT_PATH, 'test_noisy_audio')

    # make output path
    makedirs(output_path)

    speech_names = [
        na for na in os.listdir(TE_SPEECH_PATH) if na.lower().endswith(".wav")
    ]
    noise_names = [
        na for na in os.listdir(TE_NOISE_PATH) if na.lower().endswith(".wav")
    ]

    for speech_na in speech_names:
        speech_path = os.path.join(TE_SPEECH_PATH, speech_na)
        speech, _ = read_audio(speech_path, target_fs=args.sampling_rate)

        for noise_na in noise_names:
            noise_path = os.path.join(TE_NOISE_PATH, noise_na)
            noise, _ = read_audio(noise_path, target_fs=args.sampling_rate)
            noise = equalizingRMS(ref=speech, target=noise)
            noisy = addNoise(speech=speech, noise=noise, snr=0)
            noisy_na = os.path.join("%s_%s.wav" %
                                    (os.path.splitext(speech_na)[0],
                                     os.path.splitext(noise_na)[0]))
            noisy_path = os.path.join(output_path, noisy_na)
            write_audio(noisy_path, noisy, args.sampling_rate)

    print("[Finish] making audio for the test.")
Esempio n. 10
0
import os
import sys
sys.path.append("..")
from cores import Options
from util import util, ffmpeg

opt = Options()
opt.parser.add_argument('--datadir',
                        type=str,
                        default='',
                        help='your video dir')
opt.parser.add_argument('--savedir',
                        type=str,
                        default='../datasets/video2image',
                        help='')
opt = opt.getparse()

files = util.Traversal(opt.datadir)
videos = util.is_videos(files)

util.makedirs(opt.savedir)
for video in videos:
    ffmpeg.continuous_screenshot(video, opt.savedir, opt.fps)
Esempio n. 11
0
opt.parser.add_argument('--perload_num',type=int,default=16, help='')
opt.parser.add_argument('--norm',type=str,default='instance', help='')

opt.parser.add_argument('--maxiter',type=int,default=10000000, help='')
opt.parser.add_argument('--savefreq',type=int,default=10000, help='')
opt.parser.add_argument('--startiter',type=int,default=0, help='')
opt.parser.add_argument('--continuetrain', action='store_true', help='')
opt.parser.add_argument('--savename',type=str,default='MosaicNet', help='')


'''
--------------------------Init--------------------------
'''
opt = opt.getparse()
dir_checkpoint = os.path.join('checkpoints/',opt.savename)
util.makedirs(dir_checkpoint)
util.writelog(os.path.join(dir_checkpoint,'loss.txt'), 
              str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))

N = opt.N
loss_sum = [0.,0.,0.,0.]
loss_plot = [[],[]]
item_plot = []

videos = os.listdir('./dataset')
videos.sort()
lengths = []
print('check dataset...')
for video in videos:
    video_images = os.listdir('./dataset/'+video+'/ori')
    lengths.append(len(video_images))
Esempio n. 12
0
from util import image_processing as impro

opt = Options()
opt.parser.add_argument('--datadir',
                        type=str,
                        default=' ',
                        help='your images dir')
opt.parser.add_argument('--savedir',
                        type=str,
                        default='../datasets/draw/face',
                        help='')
opt = opt.getparse()

mask_savedir = os.path.join(opt.savedir, 'mask')
img_savedir = os.path.join(opt.savedir, 'origin_image')
util.makedirs(mask_savedir)
util.makedirs(img_savedir)

filepaths = util.Traversal(opt.datadir)
filepaths = util.is_imgs(filepaths)
random.shuffle(filepaths)
print('find image:', len(filepaths))

# mouse callback function
drawing = False  # true if mouse is pressed
ix, iy = -1, -1
brushsize = 20


def draw_circle(event, x, y, flags, param):
    global ix, iy, drawing, brushsize
Esempio n. 13
0
import numpy as np
import cv2
import os
import sys

sys.path.append("..")
from util import image_processing as impro
from util import util

img_dir = './datasets_img/pix2pix/edges2cat/images'
output_dir = './datasets_img/pix2pix/edges2cat/train'
util.makedirs(output_dir)

img_names = os.listdir(img_dir)
for i, img_name in enumerate(img_names, 2000):
    try:
        img = impro.imread(os.path.join(img_dir, img_name))
        img = impro.resize(img, 286)
        h, w = img.shape[:2]
        edges = cv2.Canny(img, 150, 250)
        edges = impro.ch_one2three(edges)
        out_img = np.zeros((h, w * 2, 3), dtype=np.uint8)
        out_img[:, 0:w] = edges
        out_img[:, w:2 * w] = img
        cv2.imwrite(os.path.join(output_dir, '%05d' % i + '.jpg'), out_img)
    except Exception as e:
        pass
Esempio n. 14
0
def main(DIRECTORY, args):
    
    OUTPUT_PATH = DIRECTORY['OUTPUT_PATH']
    
    hop_size = int(args.win_size - args.overlap) # hop size to make spectrogram 
    
    print("[Start] trainning.. NMF algoritm.")
    
    
    #-------------------train------------------
    target_dir = os.path.join(OUTPUT_PATH,'merged_audio')
     
    speech_path = os.path.join(target_dir, "merged_train_speech_audio.wav")
    noise_path = os.path.join(target_dir, "merged_train_noise_audio.wav") 
    
    # get magnitude of spectrograms 
    V_speech = abs(audio_to_spectrogram(speech_path, args.sampling_rate, args.window, args.fft, hop_size))
    V_noise = abs(audio_to_spectrogram(noise_path, args.sampling_rate, args.window, args.fft, hop_size))

    # do NMF (V ~= WH)
    V = np.concatenate((V_speech, V_noise), axis=1)
    num_basis_train = args.num_basis_speech + args.num_basis_noise
    W_train, H_train = nmf_train(V, args.max_iter_train, args.epsilon, num_basis_train)
    print("[End] trainning.. NMF algoritm.")
    
    
    #-------------------test-------------------
    print("[Start] Test.. NMF algoritm (output: enhanced speech).")
    test_dir = os.path.join(OUTPUT_PATH,'test_noisy_audio') 
    noisy_names = [na for na in os.listdir(test_dir) if na.lower().endswith(".wav")]
        
    output_path = os.path.join(OUTPUT_PATH,'enhanced_audio',str(args.nmf_mode))
    makedirs(output_path)
    
    for noisy_na in noisy_names:
        print("%s"%(noisy_na))
        noisy_path = os.path.join(test_dir, noisy_na)
        
        stft_noisy = audio_to_spectrogram(noisy_path, args.sampling_rate, args.window, args.fft, hop_size) # spectrogram for noisy
        
        V_noisy = abs(stft_noisy) # magnitude spectrogram for noisy 
        H_noisy = nmf_test(V_noisy, W_train, H_train, args.max_iter_test, args.epsilon, penalty=args.penalty, algorithm=args.nmf_mode) # new encoding vector obtained through nmf algorithm
        
        enhanced_V = wienner_filtering(V_noisy, W_train, H_noisy, args.num_basis_speech, args.p) # magnitude spectrogram for enhanced 
        reconstructed_audio = spectrogram_to_audio(enhanced_V, np.angle(stft_noisy), args.window, hop_size) # reconstruct audio 
        
        # write enhanced audio
        out_audio_path = os.path.join(output_path,"enhanced_%s"%noisy_na)
        write_audio(out_audio_path, reconstructed_audio, args.sampling_rate)
        
        #-------------------plot-------------------      

        if args.visualize:
            
            # spectrogram for clean speech
            clean_speech_na = ("_").join(noisy_na.split("_")[0:-1])+os.path.splitext(noisy_na)[-1]
            clean_speech_path = os.path.join(DIRECTORY['TE_SPEECH_PATH'],clean_speech_na)
            stft_clean = audio_to_spectrogram(clean_speech_path, args.sampling_rate, args.window, args.fft, hop_size)
            
            # visualize 
            fig, axs = plt.subplots(3,1, sharex=False)
            axs[0].matshow(np.log10(np.abs(stft_noisy)**2), origin='lower', aspect='auto', cmap='jet')
            axs[1].matshow(np.log10(np.abs(stft_clean)**2), origin='lower', aspect='auto', cmap='jet')
            axs[2].matshow(np.log10(enhanced_V**2), origin='lower', aspect='auto', cmap='jet')
            axs[0].set_title("0 db mixture log spectrogram (%s)" % noisy_na)
            axs[1].set_title("Clean speech log spectrogram")
            axs[2].set_title("Enhanced speech log spectrogram")
            for j1 in range(3):
                axs[j1].xaxis.tick_bottom()
            plt.tight_layout()
            fig = plt.gcf()
            plt.show()
            
            if args.plot_save:
                
                plot_path = os.path.join(OUTPUT_PATH,"plot",str(args.nmf_mode))
                makedirs(plot_path)
                
                seg_result_path = os.path.join(plot_path,"%s.png"%(os.path.splitext(noisy_na)[0]))
                fig.savefig(seg_result_path, dpi=300)

            
    print("[End] Test.. NMF algoritm (output: enhanced speech).")
Esempio n. 15
0
import sys
sys.path.append("..")
from util import util, transformer, dataloader, statistics, plot, options
from util import array_operation as arr
from models import creatnet, core

# -----------------------------Init-----------------------------
opt = options.Options()
opt.parser.add_argument('--rec_tmp',
                        type=str,
                        default='./server_data/rec_data',
                        help='')
opt = opt.getparse()
opt.k_fold = 0
opt.save_dir = './checkpoints'
util.makedirs(opt.save_dir)
util.makedirs(opt.rec_tmp)

# -----------------------------Load original data-----------------------------
signals, labels = dataloader.loaddataset(opt)
ori_signals_train,ori_labels_train,ori_signals_eval,ori_labels_eval = \
signals[:opt.fold_index[0]].copy(),labels[:opt.fold_index[0]].copy(),signals[opt.fold_index[0]:].copy(),labels[opt.fold_index[0]:].copy()
label_cnt, label_cnt_per, label_num = statistics.label_statistics(labels)
opt = options.get_auto_options(opt, label_cnt_per, label_num,
                               ori_signals_train)

# -----------------------------def network-----------------------------
core = core.Core(opt)
core.network_init(printflag=True)

Esempio n. 16
0
import os
import numpy as np
from scipy.io import wavfile
import matplotlib.pylab as plt
import random
import scipy.signal
import time
import librosa
from util import util, ffmpeg, dsp, sound, notation

dataset = './dataset/诸葛亮'
video_names = os.listdir(dataset)
video_names.sort()
util.clean_tempfiles(tmp_init=False)
util.makedirs('./tmp/voice')

for i in range(len(video_names)):
    ffmpeg.video2voice(os.path.join(dataset, video_names[i]),
                       os.path.join('./tmp/voice', '%03d' % i + '.wav'),
                       samplingrate=44100)
    voice = sound.load(os.path.join('./tmp/voice', '%03d' % i + '.wav'))[1]
    base_freq = sound.basefreq(voice, 44100, 5000, mode='mean')
    print(video_names[i])
    print('basefreq:', base_freq)
    print('note:', librosa.hz_to_note(base_freq))
    f, fft = dsp.showfreq(voice, 44100, 5000)
    plt.plot(f, fft)
    plt.show()
Esempio n. 17
0
import cv2
import numpy as np
import datetime
import os
import random

import sys

sys.path.append("..")
from util import util
from util import image_processing as impro

image_dir = './datasets_img/v2im'
mask_dir = './datasets_img/v2im_mask'
util.makedirs(mask_dir)

files = os.listdir(image_dir)
files_new = files.copy()
print('find image:', len(files))
masks = os.listdir(mask_dir)
print('mask:', len(masks))

# mouse callback function
drawing = False  # true if mouse is pressed
ix, iy = -1, -1
brushsize = 20


def draw_circle(event, x, y, flags, param):
    global ix, iy, drawing, brushsize
Esempio n. 18
0
opt = Options().getparse()
system_type = 'Linux'
if 'Windows' in platform.platform():
    system_type = 'Windows'

#-------------------------------Media Init-------------------------------
if util.is_img(opt.media):
    img = cv2.imread(opt.media)
    h_media,w_media = img.shape[:2]
elif util.is_video(opt.media): 
    fps,endtime,h_media,w_media = ffmpeg.get_video_infos(opt.media)
    if opt.frame_num == 0:
        opt.frame_num = int(endtime*fps-5)
    if opt.ori_fps == 0:
        opt.ori_fps = fps
    util.makedirs('./tmp')
else:
    print('Can not load this file!')

#-------------------------------Image Shape Init-------------------------------
if opt.screen==1:
    limw = 80;limh = 24
if opt.screen==2:
    limw = 132;limh = 43
if opt.screen==3:
    limw = 203;limh = 55
screen_scale = limh/limw

img_scale = h_media/w_media/opt.char_scale
if img_scale >= screen_scale:
    strshape = (limh,int(limh/img_scale))
Esempio n. 19
0
opt.parser.add_argument('--time', type=int, default=5, help='split video time')
opt.parser.add_argument('--minmaskarea', type=int, default=2000, help='')
opt.parser.add_argument('--quality',
                        type=int,
                        default=45,
                        help='minimal quality')
opt.parser.add_argument('--outsize', type=int, default=286, help='')
opt.parser.add_argument('--startcnt', type=int, default=0, help='')
opt.parser.add_argument('--minsize',
                        type=int,
                        default=96,
                        help='minimal roi size')
opt.parser.add_argument('--no_sclectscene', action='store_true', help='')
opt = opt.getparse()

util.makedirs(opt.savedir)
util.writelog(
    os.path.join(opt.savedir, 'opt.txt'),
    str(time.asctime(time.localtime(time.time()))) + '\n' + util.opt2str(opt))

videopaths = util.Traversal(opt.datadir)
videopaths = util.is_videos(videopaths)
random.shuffle(videopaths)

# def network
net = loadmodel.bisenet(opt, 'roi')

result_cnt = opt.startcnt
video_cnt = 1
starttime = datetime.datetime.now()
for videopath in videopaths:
Esempio n. 20
0
      1@@@@@1 ,@@@@@@@@@@@@@@@@@D                    M@@@@@1,&@@@D ,@@@@1,M@@@@@1                      @@@@@@@@@@@@@@@@@,
      1@@@@@1 ,@@@@@@@@@@@@@@@@@,                    +@@@@@@@@@@@D ,@@@@@@@@@@@D                      +@@@@@@@@@@@@@@@@@+
      1@@@@@1 ,@@@@@@@@@@@@@@@@n                      ,M@@@@@@@@@&+n@@@@@@@@@@n                       n@@@@@@@@@@@@@@@@@D
       D@@@@1 ,@@@@@@@@@@@@@@M1                         1M@@@@@@@@@@@@@@@@@@D,                        M@@@@@MD1+1nM@@@@@@
         ,++   ++++++++++++,                              +nM@@@@@@@@@@@MD1                           nMMD1,        1DMMD
                                                              ,+1nnnn1+,
'''
#HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64HypoX64

print('Init...')
notations = notation.readscore('./music/CroatianRhapsody.txt')
dataset = './dataset/诸葛亮'
video_names = os.listdir(dataset)
video_names.sort()
util.clean_tempfiles(tmp_init=False)
util.makedirs('./tmp/voice')

seed_voices = []
seed_freqs = []
for i in range(len(video_names)):
    ffmpeg.video2voice(os.path.join(dataset, video_names[i]),
                       os.path.join('./tmp/voice', '%03d' % i + '.wav'),
                       samplingrate=44100)
    voice = sound.load(os.path.join('./tmp/voice', '%03d' % i + '.wav'))[1]
    #voice = dsp.bpf(voice, 44100, 20, 2000)
    base_freq = sound.basefreq(voice, 44100, 4000, mode='mean')
    seed_voices.append(voice)
    seed_freqs.append(base_freq)

    fps, endtime, height, width = ffmpeg.get_video_infos(
        os.path.join(dataset, video_names[i]))
Esempio n. 21
0
            os.path.join(send_data_dir, labels[i], samples[j]))
        data = {
            'token': opt.token,
            'mode': 'send',
            'label': labels[i],
            'data': txt_data
        }
        r = requests.post(opt.url, data)
print(r.json())
"""Train and get network weight
return: {'return' : 'done',  # txt
        'report'  : 'macro-prec,reca,F1,err,kappa:'+str(statistics.report(core.confusion_mats[-1])), # txt
        'label_map': {'user_nameA':0,'user_nameB':1,'user_nameC':2} # json
        'heatmap' : heatmap, # .png file, encode by base64
        'network' : file     # .pth file, encode by base64
        }
"""
data = {'token': opt.token, 'mode': 'train'}
r = requests.post(opt.url, data, timeout=60)
rec_data = r.json()
print('report:', rec_data['report'])
print('label_map:', rec_data['label_map'])

# save model.pt
util.makedirs('./client_data')
file = base64.b64decode(rec_data['network'])
util.savefile(file, './client_data/model.pt')
# save heatmap.png
file = base64.b64decode(rec_data['heatmap'])
util.savefile(file, './client_data/heatmap.png')
Esempio n. 22
0
    sys.exit(0)

opt = Options().getparse(test_flag = False)
if not os.path.isdir(opt.temp_dir):
    util.file_init(opt)

def saveScriptModel(model,example,savepath):
    model.cpu()
    traced_script_module = torch.jit.trace(model, example)
    # try ScriptModel
    output = traced_script_module(example)
    print(output)
    traced_script_module.save(savepath)

savedir = '../cpp/res/models/'
util.makedirs(savedir)

opt.mosaic_position_model_path = '../pretrained_models/mosaic/mosaic_position.pth'
model = loadmodel.bisenet(opt,'mosaic')
example = torch.ones((1,3,360,360))
saveScriptModel(model,example,os.path.join(savedir,'mosaic_position.pt'))



# def main():
    
#     if os.path.isdir(opt.media_path):
#         files = util.Traversal(opt.media_path)
#     else:
#         files = [opt.media_path]        
#     if opt.mode == 'add':
Esempio n. 23
0
                        help='if specified,save mask')
opt.parser.add_argument('--outsize', type=int, default=512, help='')
opt.parser.add_argument('--fold', type=int, default=1, help='')
opt.parser.add_argument('--start', type=int, default=0, help='')
opt.parser.add_argument('--minsize',
                        type=int,
                        default=128,
                        help='when [square], minimal roi size')
opt.parser.add_argument('--quality',
                        type=int,
                        default=40,
                        help='when [square], minimal quality')

opt = opt.getparse()

util.makedirs(opt.savedir)
util.writelog(
    os.path.join(opt.savedir, 'opt.txt'),
    str(time.asctime(time.localtime(time.time()))) + '\n' + util.opt2str(opt))
opt.mod = (opt.mod).split(',')

#save dir
if opt.hd:
    train_A_path = os.path.join(opt.savedir, 'train_A')
    train_B_path = os.path.join(opt.savedir, 'train_B')
    util.makedirs(train_A_path)
    util.makedirs(train_B_path)
else:
    train_path = os.path.join(opt.savedir, 'train')
    util.makedirs(train_path)
if opt.savemask:
Esempio n. 24
0
SamplingRate = 44100
IntervalTime = 0.03

#video2voice
if 'preprocess' in STEP or 'full' in STEP:
    util.clean_tempfiles(tmp_init=True)
    names = os.listdir(video_dir)
    for i, name in enumerate(names, 0):
        video_path = os.path.join(video_dir, name)
        ffmpeg.video2voice(
            video_path,
            os.path.join('./tmp/video_voice', name.replace('mp4', 'wav')),
            'wav')

        img_dir = os.path.join('./tmp/video_imgs', '%02d' % i)
        util.makedirs(img_dir)
        ffmpeg.video2image(video_path, os.path.join(img_dir, '%05d.jpg'))

    ffmpeg.video2voice(music_path, './tmp/music/music.wav', 'wav')

if 'matchtest' in STEP or 'generate_video' in STEP or 'full' in STEP:
    '''
    dst crop and get features
    '''
    print('loading...')
    names = os.listdir('./tmp/video_voice')
    names.sort()
    audios = []
    for name in names:
        path = os.path.join('./tmp/video_voice', name)
        sampling_freq, audio = wavfile.read(path)
Esempio n. 25
0
import cv2
import os
from torchvision import transforms
from PIL import Image
import random
import sys
sys.path.append("..")
import util.image_processing as impro
from util import util, mosaic
import datetime
import shutil

mask_dir = '/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/mask'
img_dir = '/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/origin_image'
output_dir = './datasets_img'
util.makedirs(output_dir)
HD = True  # if false make dataset for pix2pix, if Ture for pix2pix_HD
MASK = True  # if True, output mask,too
OUT_SIZE = 256
FOLD_NUM = 2
Bounding = False

if HD:
    train_A_path = os.path.join(output_dir, 'train_A')
    train_B_path = os.path.join(output_dir, 'train_B')
    util.makedirs(train_A_path)
    util.makedirs(train_B_path)
else:
    train_path = os.path.join(output_dir, 'train')
    util.makedirs(train_path)
if MASK: