Beispiel #1
0
def sensitivity_analysis(index, s_cfg, cfg):
    arrays = np.load('analysis/arrays%d.npz' %index)
    sa = Saliency_Analyzor('../obj/MsPacman-v0')
    s0, s, saliency, act, r = arrays['s0'], arrays['s'], arrays['saliency'], int(arrays['act']), float(arrays['r'])
    # exp = generate_explanation(obj_sals)
    # Mask the object with ghost
    # plt.imshow(saliency, cmap='gray', aspect='equal')
    saliency = saliency[:, :, 3]
    saliency = cv2.resize(saliency, (160, 210))

    obj_sals = sa.object_saliencies(s0, saliency)
    obj_sals = sa.top_saliency_filter(obj_sals)
    saliency, obj, position = obj_sals[0]
    x, y = (position.left + position.right) / 2, (position.up + position.down) / 2
    masked_s0 = mask(s0, x, y, 'ghost', sa)
    #show_images(masked_s0, gray=True)
    # Calculate new saliency for masked image
    s_func = OfflinePredictor(s_cfg)
    #predfunc = OfflinePredictor(cfg)
    #masked_saliency =  s_func([[expand_state(masked_s0, need_grey=False)]])[0][0]
    saliency = s_func([[expand_state(masked_s0, need_grey=False)]])[0][0]
    saliency = saliency[:,:,3]
    desc = generate_description(act)
    exp = generate_explanation(obj_sals)
    sal_img = sa.saliency_analysis_image(masked_s0, obj_sals)

    show_analyze(0, desc, exp, s0, saliency, masked_s0, 0, save=False)
Beispiel #2
0
def eval(logdir):
    # Load graph
    model = Net1()

    # dataflow
    df = Net1DataFlow(hp.Test1.data_path, hp.Test1.batch_size)

    ckpt = tf.train.latest_checkpoint(logdir)

    pred_conf = PredictConfig(model=model,
                              input_names=get_eval_input_names(),
                              output_names=get_eval_output_names())
    if ckpt:
        pred_conf.session_init = SaverRestore(ckpt)
    predictor = OfflinePredictor(pred_conf)

    x_mfccs, y_ppgs = next(df().get_data())
    y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs)

    # plot confusion matrix
    _, idx2phn = load_vocab()
    y_ppg_1d = [idx2phn[i] for i in y_ppg_1d]
    pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d]
    summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns)

    writer = tf.summary.FileWriter(logdir)
    writer.add_summary(summ_loss)
    writer.add_summary(summ_acc)
    writer.add_summary(summ_cm)
    writer.close()
Beispiel #3
0
def do_convert(args, logdir):
    # Load graph
    model = Net()

    df = NetDataFlow(hp.convert.data_path, hp.convert.batch_size)

    ckpt = '{}/{}'.format(logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir)
    session_inits = []
    if ckpt:
        session_inits.append(SaverRestore(ckpt))
    pred_conf = PredictConfig(
        model=model,
        input_names=get_eval_input_names(),
        output_names=get_eval_output_names(),
        session_init=ChainInit(session_inits))
    predictor = OfflinePredictor(pred_conf)

    audio, y_audio = convert(predictor, df)

    soundfile.write("a.wav", y_audio[0], 16000, format="wav", subtype="PCM_16")
    soundfile.write("b.wav", audio[0], 16000, format="wav", subtype="PCM_16")

    # Write the result
    tf.summary.audio('A', y_audio, hp.default.sr, max_outputs=hp.convert.batch_size)
    tf.summary.audio('B', audio, hp.default.sr, max_outputs=hp.convert.batch_size)

    writer = tf.summary.FileWriter(logdir)
    with tf.Session() as sess:
        summ = sess.run(tf.summary.merge_all())
    writer.add_summary(summ)
    writer.close()
Beispiel #4
0
def do_convert(args, logdir1, logdir2):
    # Load graph
    model = Net2()

    df = Net2DataFlow(hp.convert.data_path, hp.convert.batch_size)

    ckpt1 = tf.train.latest_checkpoint(logdir1)
    ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2)
    session_inits = []
    if ckpt2:
        session_inits.append(SaverRestore(ckpt2))
    if ckpt1:
        session_inits.append(SaverRestore(ckpt1, ignore=['global_step']))
    pred_conf = PredictConfig(
        model=model,
        input_names=get_eval_input_names(),
        output_names=get_eval_output_names(),
        session_init=ChainInit(session_inits))
    predictor = OfflinePredictor(pred_conf)

    audio, y_audio, ppgs = convert(predictor, df)

    # Write the result
    tf.summary.audio('A', y_audio, hp.default.sr, max_outputs=hp.convert.batch_size)
    tf.summary.audio('B', audio, hp.default.sr, max_outputs=hp.convert.batch_size)

    # Visualize PPGs
    heatmap = np.expand_dims(ppgs, 3)  # channel=1
    tf.summary.image('PPG', heatmap, max_outputs=ppgs.shape[0])

    writer = tf.summary.FileWriter(logdir2)
    with tf.Session() as sess:
        summ = sess.run(tf.summary.merge_all())
    writer.add_summary(summ)
    writer.close()
def set_enviroment(args, logdir1, logdir2):
    # Load graph
    set_env_s = datetime.datetime.now()

    ckpt1 = tf.train.latest_checkpoint(logdir1)
    ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2)
    model = Net2ForConvert()

    session_inits = []
    if ckpt2:
        session_inits.append(SaverRestore(ckpt2))
    if ckpt1:
        session_inits.append(SaverRestore(ckpt1, ignore=['global_step']))
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    print("PredictConfig")
    pred_conf = PredictConfig(
        model=model,
        input_names=get_eval_input_names(),
        output_names=get_eval_output_names(),
        session_init=ChainInit(session_inits)
    )

    print("OfflinePredictor")
    predictor = OfflinePredictor(pred_conf)

    set_env_e = datetime.datetime.now()
    set_env_t = set_env_e - set_env_s
    print("Setting Environment time:{}s".format(set_env_t.seconds))

    return predictor
Beispiel #6
0
def eval(logdir1, logdir2):
    # Load graph
    model = Net2()

    # dataflow
    df = Net2DataFlow(hp.test2.data_path, hp.test2.batch_size)

    ckpt1 = tf.train.latest_checkpoint(logdir1)
    ckpt2 = tf.train.latest_checkpoint(logdir2)
    session_inits = []
    if ckpt2:
        session_inits.append(SaverRestore(ckpt2))
    if ckpt1:
        session_inits.append(SaverRestore(ckpt1, ignore=['global_step']))
    pred_conf = PredictConfig(model=model,
                              input_names=get_eval_input_names(),
                              output_names=get_eval_output_names(),
                              session_init=ChainInit(session_inits))
    predictor = OfflinePredictor(pred_conf)

    # x_mfccs, y_spec, _ = next(df().get_data())
    summ_loss, = predictor(next(df().get_data()))

    writer = tf.summary.FileWriter(logdir2)
    writer.add_summary(summ_loss)
    writer.close()
Beispiel #7
0
def do_convert(logdir1, logdir2, input_path, output_path):
    # Load graph
    model = Net2()
    model.actual_duration = librosa.core.get_duration(filename=input_path,
                                                      sr=hp.default.sr)

    # TODO isolate out logdirs, uhh and how to pre-dl from s3?

    assert len(input_path) > 0, "must be non-empty input path"

    df = Net2DataFlow(data_path=input_path, batch_size=1)

    ckpt1 = tf.train.latest_checkpoint(logdir1)
    ckpt2 = tf.train.latest_checkpoint(logdir2)
    session_inits = []
    session_inits.append(SaverRestore(ckpt2))
    session_inits.append(SaverRestore(ckpt1, ignore=['global_step']))
    pred_conf = PredictConfig(model=model,
                              input_names=get_eval_input_names(),
                              output_names=get_eval_output_names(),
                              session_init=ChainInit(session_inits))
    predictor = OfflinePredictor(pred_conf)

    audio, y_audio, ppgs = convert(predictor, df)

    write_wav(audio[0], hp.default.sr, output_path)
    def get_predictor(cls):
        ''' load trained model'''

        with cls.lock:
            # check if model is already loaded
            if cls.predictor:
                return cls.predictor

            os.environ['TENSORPACK_FP16'] = 'true'
        
            # create a mask r-cnn model
            mask_rcnn_model = ResNetFPNModel(True)

            try:
                model_dir = os.environ['SM_MODEL_DIR']
            except KeyError:
                model_dir = '/opt/ml/model'

            try:
                cls.pretrained_model = os.environ['PRETRAINED_MODEL']
            except KeyError:
                pass

            # file path to previoulsy trained mask r-cnn model
            latest_trained_model = ""
            model_search_path = os.path.join(model_dir, "model-*.index" )
            for model_file in glob.glob(model_search_path):
                if model_file > latest_trained_model:
                    latest_trained_model = model_file

            trained_model = latest_trained_model
            print(f'Using model: {trained_model}')

            # fixed resnet50 backbone weights
            cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model)
            cfg.MODE_FPN = True
            cfg.MODE_MASK = True

            # calling detection dataset gets the number of coco categories 
            # and saves in the configuration
            DetectionDataset()
            finalize_configs(is_training=False)

            # Create an inference model
            # PredictConfig takes a model, input tensors and output tensors
            cls.predictor = OfflinePredictor(PredictConfig(
                model=mask_rcnn_model,
                session_init=get_model_loader(trained_model),
                input_names=['images', 'orig_image_dims'],
                output_names=[
                    'generate_{}_proposals_topk_per_image/boxes'.format('fpn' if cfg.MODE_FPN else 'rpn'),
                    'generate_{}_proposals_topk_per_image/scores'.format('fpn' if cfg.MODE_FPN else 'rpn'),
                    'fastrcnn_all_scores',
                    'output/boxes',
                    'output/scores',
                    'output/labels',
                    'output/masks'
                ]))
            return cls.predictor
Beispiel #9
0
def sample_epoch_for_analysis(cfg, s_cfg, output):
    """
    :param cfg: cfg to predict Q values
    :param s_cfg: cfg to predict pixel saliency maps in 84 * 84 * 4
    :param output: output folder name
    :return:  save the sampled epoch arrays in output folder
    Arrays including (original_state(210*160*3), unresized_states(210*160*4), states(84*84*4), saliency(84*84*4), act, r, timestep)
    """
    player = get_player(dumpdir=output)
    predfunc = OfflinePredictor(cfg)
    s_func   = OfflinePredictor(s_cfg)
    timestep = 0
    sa = Saliency_Analyzor('../obj/MsPacman-v0')
    R = 0
    history = deque(maxlen=FRAME_HISTORY)
    while True:
        timestep += 1
        s0 = player.original_current_state()
        unresized_state = grey(s0)
        history.append(unresized_state)
        us = get_history_state(history)
        # Try to use four save frames to predict action
        s = player.current_state()
        # s = expand_state(s0)
        # Actions: 0-none; 1-Up; 2-right; 3-left; 4-down; 5-upright,6-leftup, 7-rightdown, 8-leftdown
        Qvalues = predfunc([[s]])[0][0]
        act = Qvalues.argmax()
        saliency = s_func([[s]])[0][0]
        #description = generate_description(Qvalues, act)
        r, isOver = player.action(act)
        if isOver:
            history.clear()
        save_arrays(s0, us, s, saliency, act, r, timestep, output)
        #show(s, saliency, act, timestep, output, last=True, save=True)
        #show_large(s0, saliency, act, timestep, output, save=True, save_npy=False, analyzor=sa, description=description, explanation=True)
        #print r, act
        R += r
        if timestep % 50 == 0:
            print timestep
            print 'Total Reward:', R
        if isOver:
            return
Beispiel #10
0
def compute_accuracy(model, mel_spec, speaker_id, ckpt=None):
    pred_conf = PredictConfig(
        model=model,
        input_names=get_eval_input_names(),
        output_names=get_eval_output_names(),
        session_init=SaverRestore(ckpt) if ckpt else None)
    accuracy_pred = OfflinePredictor(pred_conf)

    acc, = accuracy_pred(mel_spec, speaker_id)

    return acc
Beispiel #11
0
def run(cfg, s_cfg, output):
    player = get_player(dumpdir=output)
    predfunc = OfflinePredictor(cfg)
    s_func = OfflinePredictor(s_cfg)
    timestep = 0
    #sa = Saliency_Analyzor('../obj/MsPacman-v0')
    while True:
        timestep += 1
        s = player.current_state()
        s0 = player.original_current_state()
        act = predfunc([[s]])[0][0].argmax()
        saliency = s_func([[s]])[0][0]
        r, isOver = player.action(act)
        show(s, saliency, act, timestep, output, last=True, save=True)
        #show_large(s0, saliency, act, timestep, output, save=True, save_npy=False)
        #print r, act
        if timestep % 50 == 0:
            print timestep
        if isOver:
            return
Beispiel #12
0
def do_convert(args, logdir1, logdir2):
    # Load graph
    model = Net2()

    data = get_mfccs_and_spectrogram(args.file)

    ckpt1 = '{}/{}'.format(
        logdir1,
        args.net1) if args.net1 else tf.train.latest_checkpoint(logdir1)
    ckpt2 = '{}/{}'.format(
        logdir2,
        args.net2) if args.net2 else tf.train.latest_checkpoint(logdir2)
    session_inits = []
    if ckpt2:
        session_inits.append(SaverRestore(ckpt2))
    if ckpt1:
        session_inits.append(SaverRestore(ckpt1, ignore=['global_step']))
    pred_conf = PredictConfig(model=model,
                              input_names=get_eval_input_names(),
                              output_names=get_eval_output_names(),
                              session_init=ChainInit(session_inits))
    predictor = OfflinePredictor(pred_conf)

    audio, y_audio, ppgs = convert(predictor, data)

    target_file = args.file.split('/')[-1]
    portion = os.path.splitext(target_file)
    # converted_file = target_file.split('.')[0] + '_converted.wav'
    converted_file = portion[0] + '.wav'
    write_wav(audio[0], hp.Default.sr, args.savepath + converted_file)

    # Write the result
    tf.summary.audio('A',
                     y_audio,
                     hp.Default.sr,
                     max_outputs=hp.Convert.batch_size)
    tf.summary.audio('B',
                     audio,
                     hp.Default.sr,
                     max_outputs=hp.Convert.batch_size)

    # Visualize PPGs
    heatmap = np.expand_dims(ppgs, 3)  # channel=1
    tf.summary.image('PPG', heatmap, max_outputs=ppgs.shape[0])

    writer = tf.summary.FileWriter(args.savepath)
    with tf.Session() as sess:
        summ = sess.run(tf.summary.merge_all())
    writer.add_summary(summ)
    writer.close()
Beispiel #13
0
    def get_predictor(cls):
        ''' load trained model'''

        with cls.lock:
            # check if model is already loaded
            if cls.predictor:
                return cls.predictor

            # create a mask r-cnn model
            mask_rcnn_model = ResNetFPNModel()

            try:
                model_dir = os.environ['SM_MODEL_DIR']
            except KeyError:
                model_dir = '/opt/ml/model'

            try:
                cls.pretrained_model = os.environ['PRETRAINED_MODEL']
            except KeyError:
                pass

            # file path to previoulsy trained mask r-cnn model
            latest_trained_model = ""
            model_search_path = os.path.join(model_dir, "model-*.index")
            for model_file in glob.glob(model_search_path):
                if model_file > latest_trained_model:
                    latest_trained_model = model_file

            trained_model = latest_trained_model[:-6]
            print(f'Using model: {trained_model}')

            # fixed resnet50 backbone weights
            cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model)
            cfg.MODE_FPN = True
            cfg.MODE_MASK = True
            cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS
            finalize_configs(is_training=False)

            # Create an inference model
            # PredictConfig takes a model, input tensors and output tensors
            input_tensors = mask_rcnn_model.get_inference_tensor_names()[0]
            output_tensors = mask_rcnn_model.get_inference_tensor_names()[1]

            cls.predictor = OfflinePredictor(
                PredictConfig(model=mask_rcnn_model,
                              session_init=get_model_loader(trained_model),
                              input_names=input_tensors,
                              output_names=output_tensors))
            return cls.predictor
Beispiel #14
0
def init_predictor(ckpt_dir):
    """ Initializes an OfflinePredictor for the 'Net1' Phoneme classifier, given a directory of tf-checkpoints.

    :param ckpt_dir: Checkpoint directory.
    :return: OfflinePredictor
    """
    ckpt1 = tf.train.latest_checkpoint(ckpt_dir)
    assert ckpt1 is not None, "Failed to load checkpoint in '{}'".format(
        ckpt_dir)

    net1 = Net1()
    pred_conf = PredictConfig(
        model=net1,
        input_names=['x_mfccs'],
        output_names=['net1/ppgs'],
        session_init=ChainInit([SaverRestore(ckpt1, ignore=['global_step'])]))
    predictor = OfflinePredictor(pred_conf)
    return predictor
Beispiel #15
0
def do_convert(args, logdir1, logdir2, input_dir):
    # Load graph
    model = Net2()
    # input_dir = hp.convert.data_base_dir_original + hp.convert.data_path
    df = Net2DataFlow(input_dir, hp.convert.batch_size)

    ckpt1 = tf.train.latest_checkpoint(logdir1)
    ckpt2 = '{}/{}'.format(
        logdir2,
        args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2)
    session_inits = []
    if ckpt2:
        session_inits.append(SaverRestore(ckpt2))
    if ckpt1:
        session_inits.append(SaverRestore(ckpt1, ignore=['global_step']))
    pred_conf = PredictConfig(model=model,
                              input_names=get_eval_input_names(),
                              output_names=get_eval_output_names(),
                              session_init=ChainInit(session_inits))
    predictor = OfflinePredictor(pred_conf)

    # loop over all the audio files
    for wav_file in df.wav_files:
        # check if file is present audio
        out_path = wav_file.replace(hp.convert.data_base_dir_original,
                                    hp.convert.data_base_dir_convert)
        # change file extension from wv1/wv2 to wav
        out_path = out_path[:-2] + 'av'
        if os.path.isfile(out_path):
            # file is already present, move on to the next one.
            print("skipping " + wav_file)
            continue

        print("converting " + wav_file)
        # convert audio
        audio_len, feats = df.get_features(wav_file)
        audio_full = []
        for feat in feats:
            input_arr = ([feat[0]], [feat[1]], [feat[2]])
            audio, ppgs = convert(predictor, input_arr)
            audio_full.append(
                (audio[0] * hp.convert.amplitude_multiplier).astype(np.int16))
        scipy.io.wavfile.write(out_path, hp.default.sr,
                               np.concatenate(audio_full)[:audio_len])
Beispiel #16
0
def eval(logdir):
    # Load graph
    model = Net()

    # dataflow
    df = NetDataFlow(hp.test.data_path, hp.test.batch_size)

    ckpt = tf.train.latest_checkpoint(logdir)
    session_inits = []
    if ckpt:
        session_inits.append(SaverRestore(ckpt))
    pred_conf = PredictConfig(
        model=model,
        input_names=get_eval_input_names(),
        output_names=get_eval_output_names(),
        session_init=ChainInit(session_inits))
    predictor = OfflinePredictor(pred_conf)

    r_mel, t_spec, _ = next(df().get_data())
    summ_loss, = predictor(r_mel, t_spec)

    writer = tf.summary.FileWriter(logdir)
    writer.add_summary(summ_loss)
    writer.close()
    def get_predictor(cls):
        ''' load trained model'''

        with cls.lock:
            # check if model is already loaded
            if cls.predictor:
                return cls.predictor

            os.environ['TENSORPACK_FP16'] = 'true'

            # create a mask r-cnn model
            mask_rcnn_model = ResNetFPNModel(True)

            try:
                model_dir = os.environ['SM_MODEL_DIR']
            except KeyError:
                model_dir = '/opt/ml/model'

            try:
                cls.pretrained_model = os.environ['PRETRAINED_MODEL']
            except KeyError:
                pass

            try:
                div = int(eval(os.environ['divisor']))
            except KeyError:
                div = 1
                pass

            rpn_anchor_stride = int(16 / div)
            rpn_anchor_sizes = (int(32 / div), int(64 / div), int(128 / div),
                                int(256 / div), int(512 / div))

            try:
                rpn_anchor_stride = int(eval(os.environ['rpnanchor_stride']))
            except KeyError:
                pass

            try:
                nms_topk = int(eval(os.environ['NMS_TOPK']))
            except KeyError:
                nms_topk = 2
                pass

            try:
                nms_thresh = eval(os.environ['NMS_THRESH'])
            except KeyError:
                nms_thresh = 0.7
                pass

            try:
                results_per_img = eval(os.environ['res_perimg'])
            except KeyError:
                results_per_img = 400
                pass

            # file path to previoulsy trained mask r-cnn model
            latest_trained_model = ""
            model_search_path = os.path.join(model_dir, "model-*.index")
            for model_file in glob.glob(model_search_path):
                if model_file > latest_trained_model:
                    latest_trained_model = model_file

            trained_model = latest_trained_model
            print(f'Using model: {trained_model}')

            # fixed resnet50 backbone weights
            cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model)
            cfg.MODE_FPN = True
            cfg.MODE_MASK = True
            cfg.RPN.ANCHOR_STRIDE = rpn_anchor_stride
            cfg.RPN.ANCHOR_SIZES = rpn_anchor_sizes
            cfg.RPN.TEST_PRE_NMS_TOPK = int(6000 * nms_topk)
            cfg.RPN.TEST_POST_NMS_TOPK = int(1000 * nms_topk)
            cfg.RPN.TEST_PER_LEVEL_NMS_TOPK = int(1000 * nms_topk)
            # testing -----------------------
            cfg.TEST.FRCNN_NMS_THRESH = nms_thresh
            cfg.TEST.RESULT_SCORE_THRESH = 0.05
            cfg.TEST.RESULT_SCORE_THRESH_VIS = 0.2  # only visualize confident results
            cfg.TEST.RESULTS_PER_IM = results_per_img

            # calling detection dataset gets the number of coco categories
            # and saves in the configuration
            DetectionDataset()
            finalize_configs(is_training=False)

            # Create an inference model
            # PredictConfig takes a model, input tensors and output tensors
            cls.predictor = OfflinePredictor(
                PredictConfig(
                    model=mask_rcnn_model,
                    session_init=get_model_loader(trained_model),
                    input_names=['images', 'orig_image_dims'],
                    output_names=[
                        'generate_{}_proposals_topk_per_image/boxes'.format(
                            'fpn' if cfg.MODE_FPN else 'rpn'),
                        'generate_{}_proposals_topk_per_image/scores'.format(
                            'fpn' if cfg.MODE_FPN else 'rpn'),
                        'fastrcnn_all_scores', 'output/boxes', 'output/scores',
                        'output/labels', 'output/masks'
                    ]))
            return cls.predictor
Beispiel #18
0
    def get_predictor(cls):
        """load trained model"""

        with cls.lock:
            # check if model is already loaded
            if cls.predictor:
                return cls.predictor

            os.environ["TENSORPACK_FP16"] = "true"

            # create a mask r-cnn model
            mask_rcnn_model = ResNetFPNModel(True)

            try:
                model_dir = os.environ["SM_MODEL_DIR"]
            except KeyError:
                model_dir = "/opt/ml/model"
            try:
                resnet_arch = os.environ["RESNET_ARCH"]
            except KeyError:
                resnet_arch = "resnet50"

            # file path to previoulsy trained mask r-cnn model
            latest_trained_model = ""
            model_search_path = os.path.join(model_dir, "model-*.index")
            for model_file in glob.glob(model_search_path):
                if model_file > latest_trained_model:
                    latest_trained_model = model_file

            trained_model = latest_trained_model
            print(f"Using model: {trained_model}")

            # fixed resnet50 backbone weights
            cfg.MODE_FPN = True
            cfg.MODE_MASK = True
            if resnet_arch == "resnet101":
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 23, 3]
            else:
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3]

            cfg_prefix = "CONFIG__"
            for key, value in dict(os.environ).items():
                if key.startswith(cfg_prefix):
                    attr_name = key[len(cfg_prefix):]
                    attr_name = attr_name.replace("__", ".")
                    value = eval(value)
                    print(f"update config: {attr_name}={value}")
                    nested_var = cfg
                    attr_list = attr_name.split(".")
                    for attr in attr_list[0:-1]:
                        nested_var = getattr(nested_var, attr)
                    setattr(nested_var, attr_list[-1], value)

            # calling detection dataset gets the number of coco categories
            # and saves in the configuration
            DetectionDataset()
            finalize_configs(is_training=False)

            # Create an inference model
            # PredictConfig takes a model, input tensors and output tensors
            cls.predictor = OfflinePredictor(
                PredictConfig(
                    model=mask_rcnn_model,
                    session_init=get_model_loader(trained_model),
                    input_names=["images", "orig_image_dims"],
                    output_names=[
                        "generate_{}_proposals_topk_per_image/boxes".format(
                            "fpn" if cfg.MODE_FPN else "rpn"),
                        "generate_{}_proposals_topk_per_image/scores".format(
                            "fpn" if cfg.MODE_FPN else "rpn"),
                        "fastrcnn_all_scores",
                        "output/boxes",
                        "output/scores",
                        "output/labels",
                        "output/masks",
                    ],
                ))
            return cls.predictor
    if hp.embed.meta_path:
            params['meta_path'] = hp.embed.meta_path
    audio_meta = audio_meta_class(**params)
    data_loader = DataLoader(audio_meta, hp.embed.batch_size)

    # samples
    wav, mel_spec, speaker_id = data_loader.dataflow().get_data().next()

    ckpt = args.ckpt if args.ckpt else tf.train.latest_checkpoint(hp.logdir)

    pred_conf = PredictConfig(
        model=model,
        input_names=['x'],
        output_names=['embedding/embedding', 'prediction'],
        session_init=SaverRestore(ckpt) if ckpt else None)
    embedding_pred = OfflinePredictor(pred_conf)

    embedding, pred_speaker_id = embedding_pred(mel_spec)

    # get a random audio of the predicted speaker.
    wavfile_pred_speaker = np.array(map(lambda s: audio_meta_train.get_random_audio(s), pred_speaker_id))
    length = int(hp.signal.duration * hp.signal.sr)
    wav_pred_speaker = np.array(
        map(lambda w: fix_length(read_wav(w, hp.signal.sr, duration=hp.signal.duration), length),
            wavfile_pred_speaker))

    # write audio
    tf.summary.audio('wav', wav, hp.signal.sr, max_outputs=10)
    tf.summary.audio('wav_pred', wav_pred_speaker, hp.signal.sr, max_outputs=10)

    # write prediction
def do_convert(args, logdir1, logdir2):
    print("do_convert")
    # Load graph

    ckpt1 = tf.train.latest_checkpoint(logdir1)
    ckpt2 = '{}/{}'.format(
        logdir2,
        args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2)
    model = Net2ForConvert()

    session_inits = []
    if ckpt2:
        session_inits.append(SaverRestore(ckpt2))
    if ckpt1:
        session_inits.append(SaverRestore(ckpt1, ignore=['global_step']))
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    print("PredictConfig")
    pred_conf = PredictConfig(model=model,
                              input_names=get_eval_input_names(),
                              output_names=get_eval_output_names(),
                              session_init=ChainInit(session_inits))

    print("OfflinePredictor")
    set_env_s = datetime.datetime.now()
    predictor = OfflinePredictor(pred_conf)
    set_env_e = datetime.datetime.now()
    set_env_t = set_env_e - set_env_s
    print("Setting Environment time:{}s".format(set_env_t.seconds))

    input_name = ''
    while True:
        input_name = input("Write your audio file\'s path for converting : ")
        if input_name == 'quit':
            break
        elif len(glob.glob(input_name)) == 0:
            print("That audio file doesn't exist! Try something else.")
            continue

        convert_s = datetime.datetime.now()
        mfcc, spec, mel_spec = get_mfccs_and_spectrogram(input_name,
                                                         trim=False,
                                                         isConverting=True)
        mfcc = np.expand_dims(mfcc, axis=0)
        spec = np.expand_dims(spec, axis=0)
        mel_spec = np.expand_dims(mel_spec, axis=0)
        output_audio, ppgs = convert(predictor, mfcc, spec, mel_spec)

        input_audio, samplerate = load(input_name,
                                       sr=hp.default.sr,
                                       dtype=np.float64)
        """
        # F0 adaptation with WORLD Vocoder
        f0_conv_s = datetime.datetime.now()
        output_audio = f0_adapt(input_audio, output_audio, logdir2, samplerate)
        f0_conv_e = datetime.datetime.now()
        f0_conv_time = f0_conv_e - f0_conv_s
        print("F0 Adapting Time:{}s".format(f0_conv_time.seconds))
        """

        # Saving voice-converted audio to 32-bit float wav file
        # print(audio.dtype)
        output_audio = output_audio.astype(np.float32)
        write_wav(path="./converted/" + input_name,
                  y=output_audio,
                  sr=hp.default.sr)

        # Saving PPGS data to Grayscale Image and raw binary file
        ppgs = np.squeeze(ppgs, axis=0)
        plt.imsave('./converted/debug/' + input_name + '.png',
                   ppgs,
                   cmap='binary')
        np.save('./converted/debug/' + input_name + '.npy', ppgs)

        convert_e = datetime.datetime.now()
        convert_time = convert_e - convert_s
        print("Total Converting Time:{}s".format(convert_time.seconds))
Beispiel #21
0
    def get_predictor(cls):
        """load trained model"""

        with cls.lock:
            # check if model is already loaded
            if cls.predictor:
                return cls.predictor

            # create a mask r-cnn model
            mask_rcnn_model = ResNetFPNModel()

            try:
                model_dir = os.environ["SM_MODEL_DIR"]
            except KeyError:
                model_dir = "/opt/ml/model"

            try:
                resnet_arch = os.environ["RESNET_ARCH"]
            except KeyError:
                resnet_arch = "resnet50"

            # file path to previoulsy trained mask r-cnn model
            latest_trained_model = ""
            model_search_path = os.path.join(model_dir, "model-*.index")
            for model_file in glob.glob(model_search_path):
                if model_file > latest_trained_model:
                    latest_trained_model = model_file

            trained_model = latest_trained_model[:-6]
            print(f"Using model: {trained_model}")

            cfg.MODE_FPN = True
            cfg.MODE_MASK = True
            if resnet_arch == "resnet101":
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 23, 3]
            else:
                cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3]

            cfg_prefix = "CONFIG__"
            for key, value in dict(os.environ).items():
                if key.startswith(cfg_prefix):
                    attr_name = key[len(cfg_prefix) :]
                    attr_name = attr_name.replace("__", ".")
                    value = eval(value)
                    print(f"update config: {attr_name}={value}")
                    nested_var = cfg
                    attr_list = attr_name.split(".")
                    for attr in attr_list[0:-1]:
                        nested_var = getattr(nested_var, attr)
                    setattr(nested_var, attr_list[-1], value)

            cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS
            cfg.DATA.BASEDIR = "/data"
            cfg.DATA.TRAIN = "coco_train2017"
            cfg.DATA.VAL = "coco_val2017"
            register_coco(cfg.DATA.BASEDIR)
            finalize_configs(is_training=False)

            # Create an inference model
            # PredictConfig takes a model, input tensors and output tensors
            input_tensors = mask_rcnn_model.get_inference_tensor_names()[0]
            output_tensors = mask_rcnn_model.get_inference_tensor_names()[1]

            cls.predictor = OfflinePredictor(
                PredictConfig(
                    model=mask_rcnn_model,
                    session_init=get_model_loader(trained_model),
                    input_names=input_tensors,
                    output_names=output_tensors,
                )
            )
            return cls.predictor
Beispiel #22
0
 def play_model(cfg, player):
     predfunc = OfflinePredictor(cfg)
     while True:
         score = play_one_episode(player, predfunc)
         print("Total:", score)
def ckpt2mel(predictor, ppgs_dir, mel_dir, save_dir):
    print("get into ckpt")
    for fi in os.listdir(ppgs_dir):
        print("fi",fi)
        #ppgs_name = os.path.join(ppgs_dir, fi)

        mel, ppgs = queue_input(fi, ppgs_dir, mel_dir)
        pred_mel = predictor(mel, ppgs)
        #print("pred_mel",pred_mel.size())
        pred_mel = np.array(pred_mel)
        print("pred_mel",pred_mel.shape)
        length = pred_mel.shape[2]
        width =  pred_mel.shape[3]
        pred_mel = pred_mel.reshape((length, width))
        save_name = fi.split('.npy')[0]
        if hp.default.n_mels == 20:
            npy_dir = os.path.join(save_dir,'lpc20')
            if not os.path.exists(npy_dir):
                os.makedirs(npy_dir)
            npy_path = os.path.join(npy_dir, '%s_20.npy' %save_name)
            np.save(npy_path, pred_mel)
            print('saved',npy_dir)if hp.default.n_mels == 32:
            npy_dir = os.path.join(save_dir,'lpc32')
            if not os.path.exists(npy_dir):
                os.makedirs(npy_dir)
            npy_path = os.path.join(npy_dir, '%s_32.npy' %save_name)
            np.save(npy_path, pred_mel)
            print('saved',npy_dir)def do_convert(args, logdir2):
    # Load graph
    model = Net2()
    index = 0
    ppgs_dir = hp.convert.ppgs_path
    mel_dir = hp.convert.mel_path
    #for fi in os.listdir(ppgs_dir):
    #print("fi",fi)
    #ppgs_path = os.path.join(ppgs_dir, fi)
    #df = Net2DataFlow(hp.convert.mel_path, ppgs_path, hp.convert.batch_size)
    #print("finish df")
    ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2)
    print("ckpt2",ckpt2)
    session_inits = []
    if ckpt2:
        session_inits.append(SaverRestore(ckpt2))
    pred_conf = PredictConfig( model=model,
                     input_names=get_eval_input_names(),
                     output_names=get_eval_output_names(),
                     session_init=ChainInit(session_inits))
    predictor = OfflinePredictor(pred_conf)
    print("after predictor")
    #import pdb;pdb.set_trace()
    ckpt2mel(predictor, ppgs_dir, mel_dir, hp.convert.save_path)
    print("success")
    def get_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument('case2', type=str, help='experiment case name of train2')
    parser.add_argument('-ckpt', help='checkpoint to load model.')
    arguments = parser.parse_args()
    return arguments


if __name__ == '__main__':
    args = get_arguments()
    hp.set_hparam_yaml(args.case2)
    logdir_train2 = '{}/{}/train2'.format(hp.logdir_path, args.case2)

    print('case2: {},logdir2: {}'.format(args.case2, logdir_train2))

    s = datetime.datetime.now()

    do_convert(args, logdir2=logdir_train2)

    e = datetime.datetime.now()
    diff = e - s
    print("Done. elapsed time:{}s".format(diff.seconds))
Beispiel #24
0
            model=Model(),
            session_init=SaverRestore(args.load),
            input_var_names=['state'],
            output_var_names=['saliency'])

    #sample_epoch_for_analysis(cfg, s_cfg, args.output)
    #exit()
    #analyze('arrays1', args.output)
    #sensitivity_analysis(667, s_cfg, cfg)
    #run_submission(cfg, args.output, args.episode)
    #do_submit(args.output, args.api)
    #for i in xrange(100,300):
    #    object_saliencies(i, cfg, draw=True)
    #object_saliencies(120, cfg, draw=True)
    tm = TemplateMatcher('../obj/MsPacman-v0')
    predfunc = OfflinePredictor(cfg)
    s_func   = OfflinePredictor(s_cfg)
    acts = [0]
    #for i in xrange(1,1850):
    #    act = object_saliencies(i, predfunc, s_func, tm, draw=True)
    #    acts.append(act)
    #pickle.dump(acts, open('models/DDQN/acts-O-DDQN', 'w'))
    #real_act(tm)
    object_saliencies(641, predfunc, s_func, tm, draw=True)
    #change_points(tm, predfunc)



    #saliency = cv2.resize(saliency, (160, 210))
    #obj_sals = [(-17.05189323425293, 'ghost', Position(left=141, right=151, up=158, down=171))]
    #act = 3