# However, if SGE is used, we cannot simply set CUDA_VISIBLE_DEVICES. # So it is better to specify the GPU id outside the program. # Give an arbitrary number (except for -1) to --gpu can enable it. Leave it blank if you want to disable gpu. import tensorflow as tf if __name__ == '__main__': tf.reset_default_graph() tf.logging.set_verbosity(tf.logging.INFO) nnet_dir = os.path.join(args.model_dir, "nnet") config_json = os.path.join(args.model_dir, "nnet/config.json") if not os.path.isfile(config_json): sys.exit("Cannot find params.json in %s" % config_json) params = Params(config_json) # Change the output node if necessary if len(args.node) != 0: params.embedding_node = args.node tf.logging.info("Extract embedding from %s" % params.embedding_node) trainer = Trainer(params, args.model_dir, single_cpu=True) with open(os.path.join(nnet_dir, "feature_dim"), "r") as f: dim = int(f.readline().strip()) #trainer.build("predict", dim=dim) trainer.build("predict", dim=dim, loss_type="extract_asoftmax", num_speakers=154)
parser.add_argument("weights", type=str, help="The output weights") parser.add_argument("embeddings", type=str, help="Embeddings (label vector).") parser.add_argument("embedding_pic", type=str, help="The output pic") args = parser.parse_args() import tensorflow as tf if __name__ == '__main__': tf.reset_default_graph() tf.logging.set_verbosity(tf.logging.INFO) nnet_dir = os.path.join(args.model_dir, "nnet") config_json = os.path.join(args.model_dir, "nnet/config.json") if not os.path.isfile(config_json): sys.exit("Cannot find params.json in %s" % config_json) params = Params(config_json) # params.loss_func = "generalized_angular_triplet_loss" # params.dict["triplet_center"] = "average" # params.dict["triplet_center_momentum"] = 0.9 # params.dict["loss_compute"] = "softplus" # params.dict["margin"] = 0.1 num_total_train_speakers = KaldiDataRandomQueue( args.data_dir, args.data_spklist).num_total_speakers dim = FeatureReader(args.data_dir).get_dim() trainer = Trainer(params, args.model_dir, single_cpu=True) trainer.build("valid", dim=dim, loss_type=params.loss_func, num_speakers=num_total_train_speakers)
args = parser.parse_args() if args.gpu == -1: # Disable GPU os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf if __name__ == '__main__': tf.reset_default_graph() tf.logging.set_verbosity(tf.logging.INFO) nnet_dir = os.path.join(args.model_dir, "nnet") config_json = os.path.join(args.model_dir, "nnet/config.json") if not os.path.isfile(config_json): sys.exit("Cannot find params.json in %s" % config_json) params = Params(config_json) # Attention weights params.embedding_node = "attention_weights" with open(os.path.join(nnet_dir, "feature_dim"), "r") as f: dim = int(f.readline().strip()) trainer = Trainer(params, args.model_dir, dim, single_cpu=True) trainer.build("predict") if args.rspecifier.rsplit(".", 1)[1] == "scp": # The rspecifier cannot be scp sys.exit("The rspecifier must be ark or input pipe") fp_out = open_or_fd(args.wspecifier, "wb") for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
return mat if __name__ == '__main__': reader = tf.train.NewCheckpointReader(nnet_path + 'model-570000') u, s, v = np.linalg.svd(reader.get_tensor(nsplitname + 'kernel')) u, s, v = abandon(u, s, v, dimension=0.8) u = np.mat(u) s = np.mat(np.diag(np.array(s).squeeze())) v = np.mat(v) A = u * s B = v C = reader.get_tensor(nsplitname + 'bias') params = Params(json_path) x = tf.placeholder(tf.float32, [10, 175, 30], name='x') features, endpoints = tdnn_svd6(features=x, params=params, mid_channels=A.shape[1]) init = tf.global_variables_initializer() graph = tf.get_default_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) for name in reader.get_variable_to_shape_map(): if nsplitname in name or 'softmax' in name: continue herename = nname + re.match(r'(.+?)(\/.*)', name).group(2) + ':0' sess.run(tf.assign(graph.get_tensor_by_name(herename), reader.get_tensor(name))) sess.run(tf.assign(graph.get_tensor_by_name(nname + '/tdnn6.0_dense/kernel:0'), A)) sess.run(tf.assign(graph.get_tensor_by_name(nname + '/tdnn6.5_dense/kernel:0'), B))
args = parser.parse_args() if args.gpu == -1: # Disable GPU os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf if __name__ == '__main__': tf.reset_default_graph() tf.logging.set_verbosity(tf.logging.INFO) nnet_dir = os.path.join(args.model_dir, "nnet") config_json = os.path.join(args.model_dir, "nnet/config.json") if not os.path.isfile(config_json): sys.exit("Cannot find params.json in %s" % config_json) params = Params(config_json) # First, we need to extract the weights num_total_train_speakers = KaldiDataRandomQueue( os.path.dirname(args.spklist), args.spklist).num_total_speakers dim = FeatureReader(os.path.dirname(args.spklist)).get_dim() if "selected_dim" in params.dict: dim = params.selected_dim trainer = Trainer(params, args.model_dir, dim, num_total_train_speakers, single_cpu=True) trainer.build("valid") trainer.sess.run(tf.global_variables_initializer()) trainer.sess.run(tf.local_variables_initializer())