def sensitivity_analysis(index, s_cfg, cfg): arrays = np.load('analysis/arrays%d.npz' %index) sa = Saliency_Analyzor('../obj/MsPacman-v0') s0, s, saliency, act, r = arrays['s0'], arrays['s'], arrays['saliency'], int(arrays['act']), float(arrays['r']) # exp = generate_explanation(obj_sals) # Mask the object with ghost # plt.imshow(saliency, cmap='gray', aspect='equal') saliency = saliency[:, :, 3] saliency = cv2.resize(saliency, (160, 210)) obj_sals = sa.object_saliencies(s0, saliency) obj_sals = sa.top_saliency_filter(obj_sals) saliency, obj, position = obj_sals[0] x, y = (position.left + position.right) / 2, (position.up + position.down) / 2 masked_s0 = mask(s0, x, y, 'ghost', sa) #show_images(masked_s0, gray=True) # Calculate new saliency for masked image s_func = OfflinePredictor(s_cfg) #predfunc = OfflinePredictor(cfg) #masked_saliency = s_func([[expand_state(masked_s0, need_grey=False)]])[0][0] saliency = s_func([[expand_state(masked_s0, need_grey=False)]])[0][0] saliency = saliency[:,:,3] desc = generate_description(act) exp = generate_explanation(obj_sals) sal_img = sa.saliency_analysis_image(masked_s0, obj_sals) show_analyze(0, desc, exp, s0, saliency, masked_s0, 0, save=False)
def eval(logdir): # Load graph model = Net1() # dataflow df = Net1DataFlow(hp.Test1.data_path, hp.Test1.batch_size) ckpt = tf.train.latest_checkpoint(logdir) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names()) if ckpt: pred_conf.session_init = SaverRestore(ckpt) predictor = OfflinePredictor(pred_conf) x_mfccs, y_ppgs = next(df().get_data()) y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs) # plot confusion matrix _, idx2phn = load_vocab() y_ppg_1d = [idx2phn[i] for i in y_ppg_1d] pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d] summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.add_summary(summ_acc) writer.add_summary(summ_cm) writer.close()
def do_convert(args, logdir): # Load graph model = Net() df = NetDataFlow(hp.convert.data_path, hp.convert.batch_size) ckpt = '{}/{}'.format(logdir, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir) session_inits = [] if ckpt: session_inits.append(SaverRestore(ckpt)) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio = convert(predictor, df) soundfile.write("a.wav", y_audio[0], 16000, format="wav", subtype="PCM_16") soundfile.write("b.wav", audio[0], 16000, format="wav", subtype="PCM_16") # Write the result tf.summary.audio('A', y_audio, hp.default.sr, max_outputs=hp.convert.batch_size) tf.summary.audio('B', audio, hp.default.sr, max_outputs=hp.convert.batch_size) writer = tf.summary.FileWriter(logdir) with tf.Session() as sess: summ = sess.run(tf.summary.merge_all()) writer.add_summary(summ) writer.close()
def do_convert(args, logdir1, logdir2): # Load graph model = Net2() df = Net2DataFlow(hp.convert.data_path, hp.convert.batch_size) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio, ppgs = convert(predictor, df) # Write the result tf.summary.audio('A', y_audio, hp.default.sr, max_outputs=hp.convert.batch_size) tf.summary.audio('B', audio, hp.default.sr, max_outputs=hp.convert.batch_size) # Visualize PPGs heatmap = np.expand_dims(ppgs, 3) # channel=1 tf.summary.image('PPG', heatmap, max_outputs=ppgs.shape[0]) writer = tf.summary.FileWriter(logdir2) with tf.Session() as sess: summ = sess.run(tf.summary.merge_all()) writer.add_summary(summ) writer.close()
def set_enviroment(args, logdir1, logdir2): # Load graph set_env_s = datetime.datetime.now() ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) model = Net2ForConvert() session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu else: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' print("PredictConfig") pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits) ) print("OfflinePredictor") predictor = OfflinePredictor(pred_conf) set_env_e = datetime.datetime.now() set_env_t = set_env_e - set_env_s print("Setting Environment time:{}s".format(set_env_t.seconds)) return predictor
def eval(logdir1, logdir2): # Load graph model = Net2() # dataflow df = Net2DataFlow(hp.test2.data_path, hp.test2.batch_size) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) # x_mfccs, y_spec, _ = next(df().get_data()) summ_loss, = predictor(next(df().get_data())) writer = tf.summary.FileWriter(logdir2) writer.add_summary(summ_loss) writer.close()
def do_convert(logdir1, logdir2, input_path, output_path): # Load graph model = Net2() model.actual_duration = librosa.core.get_duration(filename=input_path, sr=hp.default.sr) # TODO isolate out logdirs, uhh and how to pre-dl from s3? assert len(input_path) > 0, "must be non-empty input path" df = Net2DataFlow(data_path=input_path, batch_size=1) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = tf.train.latest_checkpoint(logdir2) session_inits = [] session_inits.append(SaverRestore(ckpt2)) session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio, ppgs = convert(predictor, df) write_wav(audio[0], hp.default.sr, output_path)
def get_predictor(cls): ''' load trained model''' with cls.lock: # check if model is already loaded if cls.predictor: return cls.predictor os.environ['TENSORPACK_FP16'] = 'true' # create a mask r-cnn model mask_rcnn_model = ResNetFPNModel(True) try: model_dir = os.environ['SM_MODEL_DIR'] except KeyError: model_dir = '/opt/ml/model' try: cls.pretrained_model = os.environ['PRETRAINED_MODEL'] except KeyError: pass # file path to previoulsy trained mask r-cnn model latest_trained_model = "" model_search_path = os.path.join(model_dir, "model-*.index" ) for model_file in glob.glob(model_search_path): if model_file > latest_trained_model: latest_trained_model = model_file trained_model = latest_trained_model print(f'Using model: {trained_model}') # fixed resnet50 backbone weights cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model) cfg.MODE_FPN = True cfg.MODE_MASK = True # calling detection dataset gets the number of coco categories # and saves in the configuration DetectionDataset() finalize_configs(is_training=False) # Create an inference model # PredictConfig takes a model, input tensors and output tensors cls.predictor = OfflinePredictor(PredictConfig( model=mask_rcnn_model, session_init=get_model_loader(trained_model), input_names=['images', 'orig_image_dims'], output_names=[ 'generate_{}_proposals_topk_per_image/boxes'.format('fpn' if cfg.MODE_FPN else 'rpn'), 'generate_{}_proposals_topk_per_image/scores'.format('fpn' if cfg.MODE_FPN else 'rpn'), 'fastrcnn_all_scores', 'output/boxes', 'output/scores', 'output/labels', 'output/masks' ])) return cls.predictor
def sample_epoch_for_analysis(cfg, s_cfg, output): """ :param cfg: cfg to predict Q values :param s_cfg: cfg to predict pixel saliency maps in 84 * 84 * 4 :param output: output folder name :return: save the sampled epoch arrays in output folder Arrays including (original_state(210*160*3), unresized_states(210*160*4), states(84*84*4), saliency(84*84*4), act, r, timestep) """ player = get_player(dumpdir=output) predfunc = OfflinePredictor(cfg) s_func = OfflinePredictor(s_cfg) timestep = 0 sa = Saliency_Analyzor('../obj/MsPacman-v0') R = 0 history = deque(maxlen=FRAME_HISTORY) while True: timestep += 1 s0 = player.original_current_state() unresized_state = grey(s0) history.append(unresized_state) us = get_history_state(history) # Try to use four save frames to predict action s = player.current_state() # s = expand_state(s0) # Actions: 0-none; 1-Up; 2-right; 3-left; 4-down; 5-upright,6-leftup, 7-rightdown, 8-leftdown Qvalues = predfunc([[s]])[0][0] act = Qvalues.argmax() saliency = s_func([[s]])[0][0] #description = generate_description(Qvalues, act) r, isOver = player.action(act) if isOver: history.clear() save_arrays(s0, us, s, saliency, act, r, timestep, output) #show(s, saliency, act, timestep, output, last=True, save=True) #show_large(s0, saliency, act, timestep, output, save=True, save_npy=False, analyzor=sa, description=description, explanation=True) #print r, act R += r if timestep % 50 == 0: print timestep print 'Total Reward:', R if isOver: return
def compute_accuracy(model, mel_spec, speaker_id, ckpt=None): pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=SaverRestore(ckpt) if ckpt else None) accuracy_pred = OfflinePredictor(pred_conf) acc, = accuracy_pred(mel_spec, speaker_id) return acc
def run(cfg, s_cfg, output): player = get_player(dumpdir=output) predfunc = OfflinePredictor(cfg) s_func = OfflinePredictor(s_cfg) timestep = 0 #sa = Saliency_Analyzor('../obj/MsPacman-v0') while True: timestep += 1 s = player.current_state() s0 = player.original_current_state() act = predfunc([[s]])[0][0].argmax() saliency = s_func([[s]])[0][0] r, isOver = player.action(act) show(s, saliency, act, timestep, output, last=True, save=True) #show_large(s0, saliency, act, timestep, output, save=True, save_npy=False) #print r, act if timestep % 50 == 0: print timestep if isOver: return
def do_convert(args, logdir1, logdir2): # Load graph model = Net2() data = get_mfccs_and_spectrogram(args.file) ckpt1 = '{}/{}'.format( logdir1, args.net1) if args.net1 else tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format( logdir2, args.net2) if args.net2 else tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) audio, y_audio, ppgs = convert(predictor, data) target_file = args.file.split('/')[-1] portion = os.path.splitext(target_file) # converted_file = target_file.split('.')[0] + '_converted.wav' converted_file = portion[0] + '.wav' write_wav(audio[0], hp.Default.sr, args.savepath + converted_file) # Write the result tf.summary.audio('A', y_audio, hp.Default.sr, max_outputs=hp.Convert.batch_size) tf.summary.audio('B', audio, hp.Default.sr, max_outputs=hp.Convert.batch_size) # Visualize PPGs heatmap = np.expand_dims(ppgs, 3) # channel=1 tf.summary.image('PPG', heatmap, max_outputs=ppgs.shape[0]) writer = tf.summary.FileWriter(args.savepath) with tf.Session() as sess: summ = sess.run(tf.summary.merge_all()) writer.add_summary(summ) writer.close()
def get_predictor(cls): ''' load trained model''' with cls.lock: # check if model is already loaded if cls.predictor: return cls.predictor # create a mask r-cnn model mask_rcnn_model = ResNetFPNModel() try: model_dir = os.environ['SM_MODEL_DIR'] except KeyError: model_dir = '/opt/ml/model' try: cls.pretrained_model = os.environ['PRETRAINED_MODEL'] except KeyError: pass # file path to previoulsy trained mask r-cnn model latest_trained_model = "" model_search_path = os.path.join(model_dir, "model-*.index") for model_file in glob.glob(model_search_path): if model_file > latest_trained_model: latest_trained_model = model_file trained_model = latest_trained_model[:-6] print(f'Using model: {trained_model}') # fixed resnet50 backbone weights cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model) cfg.MODE_FPN = True cfg.MODE_MASK = True cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS finalize_configs(is_training=False) # Create an inference model # PredictConfig takes a model, input tensors and output tensors input_tensors = mask_rcnn_model.get_inference_tensor_names()[0] output_tensors = mask_rcnn_model.get_inference_tensor_names()[1] cls.predictor = OfflinePredictor( PredictConfig(model=mask_rcnn_model, session_init=get_model_loader(trained_model), input_names=input_tensors, output_names=output_tensors)) return cls.predictor
def init_predictor(ckpt_dir): """ Initializes an OfflinePredictor for the 'Net1' Phoneme classifier, given a directory of tf-checkpoints. :param ckpt_dir: Checkpoint directory. :return: OfflinePredictor """ ckpt1 = tf.train.latest_checkpoint(ckpt_dir) assert ckpt1 is not None, "Failed to load checkpoint in '{}'".format( ckpt_dir) net1 = Net1() pred_conf = PredictConfig( model=net1, input_names=['x_mfccs'], output_names=['net1/ppgs'], session_init=ChainInit([SaverRestore(ckpt1, ignore=['global_step'])])) predictor = OfflinePredictor(pred_conf) return predictor
def do_convert(args, logdir1, logdir2, input_dir): # Load graph model = Net2() # input_dir = hp.convert.data_base_dir_original + hp.convert.data_path df = Net2DataFlow(input_dir, hp.convert.batch_size) ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format( logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) # loop over all the audio files for wav_file in df.wav_files: # check if file is present audio out_path = wav_file.replace(hp.convert.data_base_dir_original, hp.convert.data_base_dir_convert) # change file extension from wv1/wv2 to wav out_path = out_path[:-2] + 'av' if os.path.isfile(out_path): # file is already present, move on to the next one. print("skipping " + wav_file) continue print("converting " + wav_file) # convert audio audio_len, feats = df.get_features(wav_file) audio_full = [] for feat in feats: input_arr = ([feat[0]], [feat[1]], [feat[2]]) audio, ppgs = convert(predictor, input_arr) audio_full.append( (audio[0] * hp.convert.amplitude_multiplier).astype(np.int16)) scipy.io.wavfile.write(out_path, hp.default.sr, np.concatenate(audio_full)[:audio_len])
def eval(logdir): # Load graph model = Net() # dataflow df = NetDataFlow(hp.test.data_path, hp.test.batch_size) ckpt = tf.train.latest_checkpoint(logdir) session_inits = [] if ckpt: session_inits.append(SaverRestore(ckpt)) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) r_mel, t_spec, _ = next(df().get_data()) summ_loss, = predictor(r_mel, t_spec) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.close()
def get_predictor(cls): ''' load trained model''' with cls.lock: # check if model is already loaded if cls.predictor: return cls.predictor os.environ['TENSORPACK_FP16'] = 'true' # create a mask r-cnn model mask_rcnn_model = ResNetFPNModel(True) try: model_dir = os.environ['SM_MODEL_DIR'] except KeyError: model_dir = '/opt/ml/model' try: cls.pretrained_model = os.environ['PRETRAINED_MODEL'] except KeyError: pass try: div = int(eval(os.environ['divisor'])) except KeyError: div = 1 pass rpn_anchor_stride = int(16 / div) rpn_anchor_sizes = (int(32 / div), int(64 / div), int(128 / div), int(256 / div), int(512 / div)) try: rpn_anchor_stride = int(eval(os.environ['rpnanchor_stride'])) except KeyError: pass try: nms_topk = int(eval(os.environ['NMS_TOPK'])) except KeyError: nms_topk = 2 pass try: nms_thresh = eval(os.environ['NMS_THRESH']) except KeyError: nms_thresh = 0.7 pass try: results_per_img = eval(os.environ['res_perimg']) except KeyError: results_per_img = 400 pass # file path to previoulsy trained mask r-cnn model latest_trained_model = "" model_search_path = os.path.join(model_dir, "model-*.index") for model_file in glob.glob(model_search_path): if model_file > latest_trained_model: latest_trained_model = model_file trained_model = latest_trained_model print(f'Using model: {trained_model}') # fixed resnet50 backbone weights cfg.BACKBONE.WEIGHTS = os.path.join(cls.pretrained_model) cfg.MODE_FPN = True cfg.MODE_MASK = True cfg.RPN.ANCHOR_STRIDE = rpn_anchor_stride cfg.RPN.ANCHOR_SIZES = rpn_anchor_sizes cfg.RPN.TEST_PRE_NMS_TOPK = int(6000 * nms_topk) cfg.RPN.TEST_POST_NMS_TOPK = int(1000 * nms_topk) cfg.RPN.TEST_PER_LEVEL_NMS_TOPK = int(1000 * nms_topk) # testing ----------------------- cfg.TEST.FRCNN_NMS_THRESH = nms_thresh cfg.TEST.RESULT_SCORE_THRESH = 0.05 cfg.TEST.RESULT_SCORE_THRESH_VIS = 0.2 # only visualize confident results cfg.TEST.RESULTS_PER_IM = results_per_img # calling detection dataset gets the number of coco categories # and saves in the configuration DetectionDataset() finalize_configs(is_training=False) # Create an inference model # PredictConfig takes a model, input tensors and output tensors cls.predictor = OfflinePredictor( PredictConfig( model=mask_rcnn_model, session_init=get_model_loader(trained_model), input_names=['images', 'orig_image_dims'], output_names=[ 'generate_{}_proposals_topk_per_image/boxes'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'generate_{}_proposals_topk_per_image/scores'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'fastrcnn_all_scores', 'output/boxes', 'output/scores', 'output/labels', 'output/masks' ])) return cls.predictor
def get_predictor(cls): """load trained model""" with cls.lock: # check if model is already loaded if cls.predictor: return cls.predictor os.environ["TENSORPACK_FP16"] = "true" # create a mask r-cnn model mask_rcnn_model = ResNetFPNModel(True) try: model_dir = os.environ["SM_MODEL_DIR"] except KeyError: model_dir = "/opt/ml/model" try: resnet_arch = os.environ["RESNET_ARCH"] except KeyError: resnet_arch = "resnet50" # file path to previoulsy trained mask r-cnn model latest_trained_model = "" model_search_path = os.path.join(model_dir, "model-*.index") for model_file in glob.glob(model_search_path): if model_file > latest_trained_model: latest_trained_model = model_file trained_model = latest_trained_model print(f"Using model: {trained_model}") # fixed resnet50 backbone weights cfg.MODE_FPN = True cfg.MODE_MASK = True if resnet_arch == "resnet101": cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 23, 3] else: cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3] cfg_prefix = "CONFIG__" for key, value in dict(os.environ).items(): if key.startswith(cfg_prefix): attr_name = key[len(cfg_prefix):] attr_name = attr_name.replace("__", ".") value = eval(value) print(f"update config: {attr_name}={value}") nested_var = cfg attr_list = attr_name.split(".") for attr in attr_list[0:-1]: nested_var = getattr(nested_var, attr) setattr(nested_var, attr_list[-1], value) # calling detection dataset gets the number of coco categories # and saves in the configuration DetectionDataset() finalize_configs(is_training=False) # Create an inference model # PredictConfig takes a model, input tensors and output tensors cls.predictor = OfflinePredictor( PredictConfig( model=mask_rcnn_model, session_init=get_model_loader(trained_model), input_names=["images", "orig_image_dims"], output_names=[ "generate_{}_proposals_topk_per_image/boxes".format( "fpn" if cfg.MODE_FPN else "rpn"), "generate_{}_proposals_topk_per_image/scores".format( "fpn" if cfg.MODE_FPN else "rpn"), "fastrcnn_all_scores", "output/boxes", "output/scores", "output/labels", "output/masks", ], )) return cls.predictor
if hp.embed.meta_path: params['meta_path'] = hp.embed.meta_path audio_meta = audio_meta_class(**params) data_loader = DataLoader(audio_meta, hp.embed.batch_size) # samples wav, mel_spec, speaker_id = data_loader.dataflow().get_data().next() ckpt = args.ckpt if args.ckpt else tf.train.latest_checkpoint(hp.logdir) pred_conf = PredictConfig( model=model, input_names=['x'], output_names=['embedding/embedding', 'prediction'], session_init=SaverRestore(ckpt) if ckpt else None) embedding_pred = OfflinePredictor(pred_conf) embedding, pred_speaker_id = embedding_pred(mel_spec) # get a random audio of the predicted speaker. wavfile_pred_speaker = np.array(map(lambda s: audio_meta_train.get_random_audio(s), pred_speaker_id)) length = int(hp.signal.duration * hp.signal.sr) wav_pred_speaker = np.array( map(lambda w: fix_length(read_wav(w, hp.signal.sr, duration=hp.signal.duration), length), wavfile_pred_speaker)) # write audio tf.summary.audio('wav', wav, hp.signal.sr, max_outputs=10) tf.summary.audio('wav_pred', wav_pred_speaker, hp.signal.sr, max_outputs=10) # write prediction
def do_convert(args, logdir1, logdir2): print("do_convert") # Load graph ckpt1 = tf.train.latest_checkpoint(logdir1) ckpt2 = '{}/{}'.format( logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) model = Net2ForConvert() session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) if ckpt1: session_inits.append(SaverRestore(ckpt1, ignore=['global_step'])) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu else: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' print("PredictConfig") pred_conf = PredictConfig(model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) print("OfflinePredictor") set_env_s = datetime.datetime.now() predictor = OfflinePredictor(pred_conf) set_env_e = datetime.datetime.now() set_env_t = set_env_e - set_env_s print("Setting Environment time:{}s".format(set_env_t.seconds)) input_name = '' while True: input_name = input("Write your audio file\'s path for converting : ") if input_name == 'quit': break elif len(glob.glob(input_name)) == 0: print("That audio file doesn't exist! Try something else.") continue convert_s = datetime.datetime.now() mfcc, spec, mel_spec = get_mfccs_and_spectrogram(input_name, trim=False, isConverting=True) mfcc = np.expand_dims(mfcc, axis=0) spec = np.expand_dims(spec, axis=0) mel_spec = np.expand_dims(mel_spec, axis=0) output_audio, ppgs = convert(predictor, mfcc, spec, mel_spec) input_audio, samplerate = load(input_name, sr=hp.default.sr, dtype=np.float64) """ # F0 adaptation with WORLD Vocoder f0_conv_s = datetime.datetime.now() output_audio = f0_adapt(input_audio, output_audio, logdir2, samplerate) f0_conv_e = datetime.datetime.now() f0_conv_time = f0_conv_e - f0_conv_s print("F0 Adapting Time:{}s".format(f0_conv_time.seconds)) """ # Saving voice-converted audio to 32-bit float wav file # print(audio.dtype) output_audio = output_audio.astype(np.float32) write_wav(path="./converted/" + input_name, y=output_audio, sr=hp.default.sr) # Saving PPGS data to Grayscale Image and raw binary file ppgs = np.squeeze(ppgs, axis=0) plt.imsave('./converted/debug/' + input_name + '.png', ppgs, cmap='binary') np.save('./converted/debug/' + input_name + '.npy', ppgs) convert_e = datetime.datetime.now() convert_time = convert_e - convert_s print("Total Converting Time:{}s".format(convert_time.seconds))
def get_predictor(cls): """load trained model""" with cls.lock: # check if model is already loaded if cls.predictor: return cls.predictor # create a mask r-cnn model mask_rcnn_model = ResNetFPNModel() try: model_dir = os.environ["SM_MODEL_DIR"] except KeyError: model_dir = "/opt/ml/model" try: resnet_arch = os.environ["RESNET_ARCH"] except KeyError: resnet_arch = "resnet50" # file path to previoulsy trained mask r-cnn model latest_trained_model = "" model_search_path = os.path.join(model_dir, "model-*.index") for model_file in glob.glob(model_search_path): if model_file > latest_trained_model: latest_trained_model = model_file trained_model = latest_trained_model[:-6] print(f"Using model: {trained_model}") cfg.MODE_FPN = True cfg.MODE_MASK = True if resnet_arch == "resnet101": cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 23, 3] else: cfg.BACKBONE.RESNET_NUM_BLOCKS = [3, 4, 6, 3] cfg_prefix = "CONFIG__" for key, value in dict(os.environ).items(): if key.startswith(cfg_prefix): attr_name = key[len(cfg_prefix) :] attr_name = attr_name.replace("__", ".") value = eval(value) print(f"update config: {attr_name}={value}") nested_var = cfg attr_list = attr_name.split(".") for attr in attr_list[0:-1]: nested_var = getattr(nested_var, attr) setattr(nested_var, attr_list[-1], value) cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS cfg.DATA.BASEDIR = "/data" cfg.DATA.TRAIN = "coco_train2017" cfg.DATA.VAL = "coco_val2017" register_coco(cfg.DATA.BASEDIR) finalize_configs(is_training=False) # Create an inference model # PredictConfig takes a model, input tensors and output tensors input_tensors = mask_rcnn_model.get_inference_tensor_names()[0] output_tensors = mask_rcnn_model.get_inference_tensor_names()[1] cls.predictor = OfflinePredictor( PredictConfig( model=mask_rcnn_model, session_init=get_model_loader(trained_model), input_names=input_tensors, output_names=output_tensors, ) ) return cls.predictor
def play_model(cfg, player): predfunc = OfflinePredictor(cfg) while True: score = play_one_episode(player, predfunc) print("Total:", score)
def ckpt2mel(predictor, ppgs_dir, mel_dir, save_dir): print("get into ckpt") for fi in os.listdir(ppgs_dir): print("fi",fi) #ppgs_name = os.path.join(ppgs_dir, fi) mel, ppgs = queue_input(fi, ppgs_dir, mel_dir) pred_mel = predictor(mel, ppgs) #print("pred_mel",pred_mel.size()) pred_mel = np.array(pred_mel) print("pred_mel",pred_mel.shape) length = pred_mel.shape[2] width = pred_mel.shape[3] pred_mel = pred_mel.reshape((length, width)) save_name = fi.split('.npy')[0] if hp.default.n_mels == 20: npy_dir = os.path.join(save_dir,'lpc20') if not os.path.exists(npy_dir): os.makedirs(npy_dir) npy_path = os.path.join(npy_dir, '%s_20.npy' %save_name) np.save(npy_path, pred_mel) print('saved',npy_dir)if hp.default.n_mels == 32: npy_dir = os.path.join(save_dir,'lpc32') if not os.path.exists(npy_dir): os.makedirs(npy_dir) npy_path = os.path.join(npy_dir, '%s_32.npy' %save_name) np.save(npy_path, pred_mel) print('saved',npy_dir)def do_convert(args, logdir2): # Load graph model = Net2() index = 0 ppgs_dir = hp.convert.ppgs_path mel_dir = hp.convert.mel_path #for fi in os.listdir(ppgs_dir): #print("fi",fi) #ppgs_path = os.path.join(ppgs_dir, fi) #df = Net2DataFlow(hp.convert.mel_path, ppgs_path, hp.convert.batch_size) #print("finish df") ckpt2 = '{}/{}'.format(logdir2, args.ckpt) if args.ckpt else tf.train.latest_checkpoint(logdir2) print("ckpt2",ckpt2) session_inits = [] if ckpt2: session_inits.append(SaverRestore(ckpt2)) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names(), session_init=ChainInit(session_inits)) predictor = OfflinePredictor(pred_conf) print("after predictor") #import pdb;pdb.set_trace() ckpt2mel(predictor, ppgs_dir, mel_dir, hp.convert.save_path) print("success") def get_arguments(): parser = argparse.ArgumentParser() parser.add_argument('case2', type=str, help='experiment case name of train2') parser.add_argument('-ckpt', help='checkpoint to load model.') arguments = parser.parse_args() return arguments if __name__ == '__main__': args = get_arguments() hp.set_hparam_yaml(args.case2) logdir_train2 = '{}/{}/train2'.format(hp.logdir_path, args.case2) print('case2: {},logdir2: {}'.format(args.case2, logdir_train2)) s = datetime.datetime.now() do_convert(args, logdir2=logdir_train2) e = datetime.datetime.now() diff = e - s print("Done. elapsed time:{}s".format(diff.seconds))
model=Model(), session_init=SaverRestore(args.load), input_var_names=['state'], output_var_names=['saliency']) #sample_epoch_for_analysis(cfg, s_cfg, args.output) #exit() #analyze('arrays1', args.output) #sensitivity_analysis(667, s_cfg, cfg) #run_submission(cfg, args.output, args.episode) #do_submit(args.output, args.api) #for i in xrange(100,300): # object_saliencies(i, cfg, draw=True) #object_saliencies(120, cfg, draw=True) tm = TemplateMatcher('../obj/MsPacman-v0') predfunc = OfflinePredictor(cfg) s_func = OfflinePredictor(s_cfg) acts = [0] #for i in xrange(1,1850): # act = object_saliencies(i, predfunc, s_func, tm, draw=True) # acts.append(act) #pickle.dump(acts, open('models/DDQN/acts-O-DDQN', 'w')) #real_act(tm) object_saliencies(641, predfunc, s_func, tm, draw=True) #change_points(tm, predfunc) #saliency = cv2.resize(saliency, (160, 210)) #obj_sals = [(-17.05189323425293, 'ghost', Position(left=141, right=151, up=158, down=171))] #act = 3