def load_custom_op(self, custom_op_paths): custom_op_path_list = custom_op_paths.split(",") for custom_op_path in custom_op_path_list: if os.path.isdir(custom_op_path): for filename in os.listdir(custom_op_path): if filename.endswith(".so"): op_filepath = os.path.join(custom_op_path, filename) logger.info("Load the so file from: {}".format(op_filepath)) tf.load_op_library(op_filepath) else: logger.error("The path does not exist: {}".format(custom_op_path))
def build_plasma_tensorflow_op(): global tf_plasma_op try: import tensorflow as tf print("TensorFlow version: " + tf.__version__) except ImportError: pass else: print("Compiling Plasma TensorFlow Op...") dir_path = os.path.dirname(os.path.realpath(__file__)) cc_path = os.path.join(dir_path, "tensorflow", "plasma_op.cc") so_path = os.path.join(dir_path, "tensorflow", "plasma_op.so") tf_cflags = tf.sysconfig.get_compile_flags() if sys.platform == 'darwin': tf_cflags = ["-undefined", "dynamic_lookup"] + tf_cflags cmd = ["g++", "-std=c++11", "-g", "-shared", cc_path, "-o", so_path, "-DNDEBUG", "-I" + pa.get_include()] cmd += ["-L" + dir for dir in pa.get_library_dirs()] cmd += ["-lplasma", "-larrow_python", "-larrow", "-fPIC"] cmd += tf_cflags cmd += tf.sysconfig.get_link_flags() cmd += ["-O2"] if tf.test.is_built_with_cuda(): cmd += ["-DGOOGLE_CUDA"] print("Running command " + str(cmd)) subprocess.check_call(cmd) tf_plasma_op = tf.load_op_library(TF_PLASMA_OP_PATH)
def _load_dynamiclib_module(): if Operator._dynamiclibop_module is None: libname = 'dynamiclibop.so.' + version dynamiclibop_path = os.path.join(cache_directory, libname) if not os.path.exists(dynamiclibop_path): # build the library if it does not exist already tf_include = tf.sysconfig.get_include() # resolve the directory of this file this_file_path = os.path.abspath(__file__) this_directory = os.path.split(this_file_path)[0] try: if cuda_enabled: tf.logging.log(tf.logging.INFO, '*** building dynamiclibop for GPU') subprocess.check_output(['g++', '-fPIC', '-Wall', '-shared', '-std=c++11', '-O2', '-Wextra', '-DGOOGLE_CUDA=1', '-o', dynamiclibop_path, this_directory + '/dynamiclibop.cc', '-isystem', cuda_directory + '/include', '-isystem', tf_include], stderr=subprocess.STDOUT, universal_newlines=True) else: tf.logging.log(tf.logging.INFO, '*** building dynamiclibop for CPU') subprocess.check_output(['g++', '-fPIC', '-Wall', '-shared', '-std=c++11', '-O2', '-Wextra', '-o', dynamiclibop_path, this_directory + '/dynamiclibop.cc', '-isystem', tf_include], stderr=subprocess.STDOUT, universal_newlines=True) except subprocess.CalledProcessError as exception: tf.logging.log(tf.logging.ERROR, 'g++ error: ' + exception.output) raise Operator._dynamiclibop_module = tf.load_op_library(dynamiclibop_path)
def do_test(self): test = [] test = self.d.gpu_test assemble_module = tf.load_op_library('./../assemble_boxes_gpu.so') with tf.Session(): with tf.device("/gpu:0"): if len(test) == 0: test = [1,1,3,3,1,15] test.extend([4,1,3,3,2,15]) test.extend([7,1,3,3,3,15]) test.extend([1,4,3,3,4,15]) test.extend([4,4,3,3,5,15]) test.extend([7,4,3,3,6,15]) test.extend([1,7,3,3,7,15]) test.extend([4,7,3,3,8,15]) test.extend([7,7,3,3,9,15]) test.extend([6,9, 16]) test = tf.constant(test, dtype=tf.uint16) #test = tf.cast(test, dtype=tf.int32) print test #result = assemble_module.assemble_boxes_op(test) result = assemble_module.assemble_boxes_cpu(test) self.r = result.eval() s = [] for i in range(len(self.r) // 6): print(self.r[ i * 6: i * 6 + 6]) g = self.r[i * 6 + 4] if not g in s: s.append(g) print "simple list:" , s
def f_segm_match(iou, s_gt): """Matching between segmentation output and groundtruth. Args: y_out: [B, T, H, W], output segmentations y_gt: [B, T, H, W], groundtruth segmentations s_gt: [B, T], groudtruth score sequence """ global hungarian_module if hungarian_module is None: hungarian_module = tf.load_op_library('hungarian.so') log.info('Loaded library "hungarian.so"') pass # Mask X, [B, M] => [B, 1, M] mask_x = tf.expand_dims(s_gt, dim=1) # Mask Y, [B, M] => [B, N, 1] mask_y = tf.expand_dims(s_gt, dim=2) iou_mask = iou * mask_x * mask_y # Keep certain precision so that we can get optimal matching within # reasonable time. eps = 1e-5 precision = 1e6 iou_mask = tf.round(iou_mask * precision) / precision match_eps = hungarian_module.hungarian(iou_mask + eps)[0] # [1, N, 1, 1] s_gt_shape = tf.shape(s_gt) num_segm_out = s_gt_shape[1] num_segm_out_mul = tf.pack([1, num_segm_out, 1]) # Mask the graph algorithm output. match = match_eps * mask_x * mask_y return match
def __init__(self, run_dir): r = 10. game_params = { 'r': r, 'dt': 1./9, 'host_speed': 10/3.6, 'target_speed': 5., 'num_of_targets': 5, } self._connect(game_params) self._train_params() self.fig = plt.figure() self.ax = plt.subplot2grid((2, 2), (0, 0), colspan=2, rowspan=2) self.run_dir = run_dir subprocess.Popen(self.run_dir + "./simulator") self.pipe_module = tf.load_op_library(self.run_dir + 'pipe.so') plt.ion() plt.show()
def testBasic(self): library_filename = os.path.join(tf.resource_loader.get_data_files_path(), 'duplicate_op.so') duplicate = tf.load_op_library(library_filename) self.assertEqual(len(duplicate.OP_LIST.op), 0) with self.test_session(): self.assertEqual(tf.add(1, 41).eval(), 42)
def testBasic(self): library_filename = os.path.join(tf.resource_loader.get_data_files_path(), "ackermann_op.so") ackermann = tf.load_op_library(library_filename) self.assertEqual(len(ackermann.OP_LIST.op), 1) self.assertEqual(ackermann.OP_LIST.op[0].name, "Ackermann") with self.test_session(): self.assertEqual(ackermann.ackermann().eval(), "A(m, 0) == A(m-1, 1)")
def Load(): """Load the TopN ops library and return the loaded module.""" with _ops_lock: global _topn_ops if not _topn_ops: ops_path = tf.resource_loader.get_path_to_datafile(TOPN_OPS_FILE) tf.logging.info('data path: %s', ops_path) _topn_ops = tf.load_op_library(ops_path) assert _topn_ops, 'Could not load topn_ops.so' return _topn_ops
def Load(library_base_dir=""): """Load the quantized ops library and return the loaded module.""" with _ops_lock: global _quantized_ops if not _quantized_ops: data_files_path = os.path.join(library_base_dir, tf.resource_loader.get_data_files_path()) tf.logging.info("q:data path: %s", data_files_path) _quantized_ops = tf.load_op_library(os.path.join(data_files_path, QUANTIZED_OPS_FILE)) assert _quantized_ops, "Could not load quantized_ops.so" return _quantized_ops
def Load(): """Load the inference ops library and return the loaded module.""" with _ops_lock: global _inference_ops if not _inference_ops: data_files_path = tf.resource_loader.get_data_files_path() tf.logging.info('data path: %s', data_files_path) _inference_ops = tf.load_op_library(os.path.join( data_files_path, INFERENCE_OPS_FILE)) assert _inference_ops, 'Could not load inference_ops.so' return _inference_ops
def Load(): """Load training ops library and return the loaded module.""" with _ops_lock: global _training_ops if not _training_ops: data_files_path = tf.resource_loader.get_data_files_path() tf.logging.info('data path: %s', data_files_path) _training_ops = tf.load_op_library(os.path.join( data_files_path, TRAINING_OPS_FILE)) assert _training_ops, 'Could not load _training_ops.so' return _training_ops
def Load(library_base_dir=''): """Load the quantized ops library and return the loaded module.""" with _kernels_lock: global _quantized_kernels if not _quantized_kernels: data_files_path = os.path.join(library_base_dir, tf.resource_loader.get_data_files_path()) tf.logging.info('data path: %s', data_files_path) _quantized_kernels = tf.load_op_library(os.path.join( data_files_path, QUANTIZED_KERNELS_FILE)) assert _quantized_kernels, 'Could not load _quantized_kernels.so' return _quantized_kernels
def load(library_path): fuzzy_module = tf.load_op_library(library_path) @ops.RegisterGradient("FuzzyCTCLoss") def _FuzzyCTCLossGrad(op, grad_loss, _): grad_without_gradient = array_ops.prevent_gradient( op.outputs[1], message="Currently there is no way to take the second " " derivative of ctc_loss due to the fused implementation's interaction " " with tf.gradients()") return [_BroadcastMul(tf.expand_dims(grad_loss, -1), grad_without_gradient), None, None, None] def fuzzy_ctc_greedy_decoder(inputs, sequence_length): outputs = fuzzy_module.fuzzy_ctc_greedy_decoder(inputs, sequence_length) (decoded_ix, decoded_val, decoded_shape, log_probabilities) = outputs return ([sparse_tensor.SparseTensor(decoded_ix, decoded_val, decoded_shape)], log_probabilities) return {"module": fuzzy_module, "decoder_op": fuzzy_ctc_greedy_decoder}
import tensorflow as tf from tensorflow.python.framework import ops nn_distance_module = tf.load_op_library('./external/tf_nndistance_so.so') def nn_distance(xyz1, xyz2): ''' Computes the distance of nearest neighbors for a pair of point clouds input: xyz1: (batch_size,#points_1,3) the first point cloud input: xyz2: (batch_size,#points_2,3) the second point cloud output: dist1: (batch_size,#point_1) distance from first to second output: idx1: (batch_size,#point_1) nearest neighbor from first to second output: dist2: (batch_size,#point_2) distance from second to first output: idx2: (batch_size,#point_2) nearest neighbor from second to first ''' xyz1 = tf.expand_dims(xyz1, 0) xyz2 = tf.expand_dims(xyz2, 0) return nn_distance_module.nn_distance(xyz1, xyz2) #@tf.RegisterShape('NnDistance') #def _nn_distance_shape(op): #shape1=op.inputs[0].get_shape().with_rank(3) #shape2=op.inputs[1].get_shape().with_rank(3) #return [tf.TensorShape([shape1.dims[0],shape1.dims[1]]),tf.TensorShape([shape1.dims[0],shape1.dims[1]]), #tf.TensorShape([shape2.dims[0],shape2.dims[1]]),tf.TensorShape([shape2.dims[0],shape2.dims[1]])] @ops.RegisterGradient('NnDistance') def _nn_distance_grad(op, grad_dist1, grad_idx1, grad_dist2, grad_idx2): xyz1 = op.inputs[0] xyz2 = op.inputs[1] idx1 = op.outputs[1]
import tensorflow as tf from tensorflow.python.framework import ops import os dot_slash = os.path.dirname(__file__) # Making roi_pooling_layer available for import as a library roi_location = os.path.join(dot_slash, "rpl.so") op_module = tf.load_op_library(roi_location) roi_pooling_layer = op_module.roi_pooler # Maknig nms available for import as a library nms_location = os.path.join(dot_slash, "nms.so") nms_module = tf.load_op_library(nms_location) nms = nms_module.nms # Making roi_pooling_layer's gradient available for import roi_grad_location = os.path.join(dot_slash, "rpl_grad.so") roi_grad_module = tf.load_op_library(roi_grad_location) roi_pooling_layer_grad = roi_grad_module.roi_pooler_grad @ops.RegisterGradient("RoiPooler") def _roi_pool_grad_cc(op, grad): return [roi_pooling_layer_grad(op.inputs[0], op.inputs[1], op.inputs[2], grad, op.get_attr("pooled_height"), op.get_attr("pooled_width"), op.get_attr("feature_stride")), None, None] # Making iou_labeler available for import iou_labeler_location = os.path.join(dot_slash, "iou_labeler.so")
import lattice_filter_op_loader import cv2 import matplotlib.pyplot as plt from copy import deepcopy from scipy.misc import imresize from keras.models import Sequential, Model from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D from keras.layers.core import Activation, Dropout, Flatten, Lambda from keras.layers.normalization import BatchNormalization from keras.optimizers import SGD, Adam, Nadam from keras.utils import np_utils, plot_model from keras import objectives, layers from keras import backend as K from os import path module = tf.load_op_library(path.join(path.dirname(path.abspath(__file__)), 'lattice_filter.so')) input_path = 'datasets/A' output_path = 'datasets/B' m = 256 n = 256 sketch_dim = (m,n,3) img_dim = (m,n,3) num_images = 16 num_epochs = 2 batch_size = 4 file_names = [] def load_file_names(path): return os.listdir(path)
import subprocess import sys import tensorflow as tf import zipfile sys.path.insert(0, "third_party/syntaxnet") from dragnn.protos import spec_pb2 from dragnn.python import dragnn_ops from dragnn.python import graph_builder from dragnn.python import trainer_lib from dragnn.python import check from google.protobuf import text_format from convert import convert_model tf.load_op_library('bazel-bin/nlp/parser/trainer/sempar.so') flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string('master_spec', '', 'Path to a complete dragnn master spec text proto.') flags.DEFINE_string('hyperparams', '', 'Training grid spec text proto.') flags.DEFINE_string('output_folder', '', 'Full path of the output folder.') flags.DEFINE_string('commons', '', 'Path to commons.') flags.DEFINE_string('train_corpus', '', 'Training corpus.') flags.DEFINE_string('dev_corpus', '', 'Dev corpus with gold frames.') flags.DEFINE_string('tf_master', '', 'TensorFlow execution engine to connect to.') flags.DEFINE_integer('train_steps', 200000, 'Number of training steps') flags.DEFINE_integer('report_every', 500, 'Checkpoint interval')
if 'OCTREE_KEY' in os.environ and os.environ['OCTREE_KEY'] == '64': print('INFO from ocnn: The octree key is 64 bits') octree_key64 = True tf_uintk = tf.uint64 tf_uints = tf.uint16 tf_intk = tf.int64 else: print('INFO from ocnn: The octree key is 32 bits, ' 'the octree depth should be smaller than 8. ') octree_key64 = False tf_uintk = tf.uint32 tf_uints = tf.uint8 tf_intk = tf.int32 _current_path = os.path.dirname(os.path.realpath(__file__)) _tf_ocnn_module = tf.load_op_library(os.path.join(_current_path, 'libocnn.so')) bounding_sphere = _tf_ocnn_module.bounding_sphere points_property = _tf_ocnn_module.points_property transform_points = _tf_ocnn_module.transform_points normalize_points = _tf_ocnn_module.normalize_points points_new = _tf_ocnn_module.points_new points_set_property = _tf_ocnn_module.points_set_property octree_drop = _tf_ocnn_module.octree_drop octree_scan = _tf_ocnn_module.octree_scan octree_cast = _tf_ocnn_module.octree_cast octree_batch = _tf_ocnn_module.octree_batch points2octree = _tf_ocnn_module.points_to_octree octree_property = _tf_ocnn_module.octree_property octree_pad = _tf_ocnn_module.octree_pad octree_depad = _tf_ocnn_module.octree_depad
# ``` from __future__ import division, print_function import os import numpy as np import tensorflow as tf from tensorflow.python.framework import ops from collections import namedtuple import logger from tf_conv_dims import calc_padding_4d, calc_out_size_4d, calc_out_size_4d_np log = logger.get() sbnet_module = tf.load_op_library('../sbnet_ops/libsbnet.so') BlockParams = namedtuple( 'BlockParams', ['bsize', 'bsize_out', 'boffset', 'bcount', 'bstrides']) # Gradients registration. @ops.RegisterGradient("SparseGather") def _sparse_gather_grad(op, grad): # x is shaped like full tensor [NHWC] # grad is shaped as gathered blocks [Nblocks*BH*BW*C] x = op.inputs[0] binCounts = op.inputs[1] activeBlockIndices = op.inputs[2] bsize = op.inputs[3] bstride = op.inputs[4]
#!/usr/bin/env python3 """ Gradients for prod force. """ import os import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import sparse_ops force_module_path = os.path.dirname(os.path.realpath(__file__)) + "/" assert (os.path.isfile(force_module_path + "libprod_force_grad.so") ), "force module grad does not exist" prod_force_grad_module = tf.load_op_library(force_module_path + 'libprod_force_grad.so') @ops.RegisterGradient("ProdForce") def _prod_force_grad_cc(op, grad): net_grad = prod_force_grad_module.prod_force_grad( grad, op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], op.inputs[4], n_a_sel=op.get_attr("n_a_sel"), n_r_sel=op.get_attr("n_r_sel")) return [net_grad, None, None, None, None]
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow_addons.utils import keras_utils from tensorflow_addons.utils.resource_loader import get_path_to_datafile _activation_ops_so = tf.load_op_library( get_path_to_datafile("custom_ops/activations/_activation_ops.so")) @keras_utils.register_keras_custom_object @tf.function def mish(x): """Mish: A Self Regularized Non-Monotonic Neural Activation Function. Computes mish activation: x * tanh(softplus(x)) See [Mish: A Self Regularized Non-Monotonic Neural Activation Function](https://arxiv.org/abs/1908.08681). Args: x: A `Tensor`. Must be one of the following types: `float16`, `float32`, `float64`. Returns:
def build_forward_backward(H, x, phase, boxes, flags): ''' Call build_forward() and then setup the loss functions ''' grid_size = H['grid_width'] * H['grid_height'] outer_size = grid_size * H['batch_size'] reuse = {'train': None, 'test': True}[phase] if H['use_rezoom']: (pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas) = build_forward(H, x, phase, reuse) else: pred_boxes, pred_logits, pred_confidences = build_forward( H, x, phase, reuse) with tf.variable_scope('decoder', reuse={ 'train': None, 'test': True }[phase]): outer_boxes = tf.reshape(boxes, [outer_size, H['rnn_len'], 4]) outer_flags = tf.cast(tf.reshape(flags, [outer_size, H['rnn_len']]), 'int32') if H['use_lstm']: hungarian_module = tf.load_op_library( 'utils/hungarian/hungarian.so') assignments, classes, perm_truth, pred_mask = ( hungarian_module.hungarian(pred_boxes, outer_boxes, outer_flags, H['solver']['hungarian_iou'])) else: classes = tf.reshape(flags, (outer_size, 1)) perm_truth = tf.reshape(outer_boxes, (outer_size, 1, 4)) pred_mask = tf.reshape(tf.cast(tf.greater(classes, 0), 'float32'), (outer_size, 1, 1)) true_classes = tf.reshape(tf.cast(tf.greater(classes, 0), 'int64'), [outer_size * H['rnn_len']]) pred_logit_r = tf.reshape( pred_logits, [outer_size * H['rnn_len'], H['num_classes']]) confidences_loss = (tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_logit_r, labels=true_classes)) ) / outer_size * H['solver']['head_weights'][0] residual = tf.reshape(perm_truth - pred_boxes * pred_mask, [outer_size, H['rnn_len'], 4]) boxes_loss = tf.reduce_sum( tf.abs(residual)) / outer_size * H['solver']['head_weights'][1] if H['use_rezoom']: if H['rezoom_change_loss'] == 'center': error = (perm_truth[:, :, 0:2] - pred_boxes[:, :, 0:2]) / tf.maximum( perm_truth[:, :, 2:4], 1.) square_error = tf.reduce_sum(tf.square(error), 2) inside = tf.reshape( tf.to_int64( tf.logical_and(tf.less(square_error, 0.2**2), tf.greater(classes, 0))), [-1]) elif H['rezoom_change_loss'] == 'iou': iou = train_utils.iou( train_utils.to_x1y1x2y2(tf.reshape(pred_boxes, [-1, 4])), train_utils.to_x1y1x2y2(tf.reshape(perm_truth, [-1, 4]))) inside = tf.reshape(tf.to_int64(tf.greater(iou, 0.5)), [-1]) else: assert H['rezoom_change_loss'] == False inside = tf.reshape(tf.to_int64((tf.greater(classes, 0))), [-1]) new_confs = tf.reshape( pred_confs_deltas, [outer_size * H['rnn_len'], H['num_classes']]) delta_confs_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=new_confs, labels=inside) ) / outer_size * H['solver']['head_weights'][0] * 0.1 pred_logits_squash = tf.reshape( new_confs, [outer_size * H['rnn_len'], H['num_classes']]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape( pred_confidences_squash, [outer_size, H['rnn_len'], H['num_classes']]) loss = confidences_loss + boxes_loss + delta_confs_loss if H['reregress']: delta_residual = tf.reshape( perm_truth - (pred_boxes + pred_boxes_deltas) * pred_mask, [outer_size, H['rnn_len'], 4]) delta_boxes_loss = (tf.reduce_sum( tf.minimum(tf.square(delta_residual), 10.**2)) / outer_size * H['solver']['head_weights'][1] * 0.03) boxes_loss = delta_boxes_loss tf.summary.histogram(phase + '/delta_hist0_x', pred_boxes_deltas[:, 0, 0]) tf.summary.histogram(phase + '/delta_hist0_y', pred_boxes_deltas[:, 0, 1]) tf.summary.histogram(phase + '/delta_hist0_w', pred_boxes_deltas[:, 0, 2]) tf.summary.histogram(phase + '/delta_hist0_h', pred_boxes_deltas[:, 0, 3]) loss += delta_boxes_loss else: loss = confidences_loss + boxes_loss return pred_boxes, pred_confidences, loss, confidences_loss, boxes_loss
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Python API for ScaNN - single machine, dense vector similarity search.""" import os import uuid from scann.scann_ops.py import scann_builder import tensorflow as tf _scann_ops_so = tf.load_op_library( os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cc/_scann_ops.so")) scann_create_searcher = _scann_ops_so.scann_scann_create_searcher scann_search = _scann_ops_so.scann_scann_search scann_search_batched = _scann_ops_so.scann_scann_search_batched scann_to_tensors = _scann_ops_so.scann_scann_to_tensors tensors_to_scann = _scann_ops_so.scann_tensors_to_scann def searcher_from_module(module, db=None): del db # Unused. return ScannSearcher(module.recreate_handle()) class ScannState(tf.Module): """Class that wraps ScaNN searcher assets for object-based checkpointing."""
#--Ayan Chakrabarti <*****@*****.**> import tensorflow as tf import os sopath = os.path.abspath(os.path.dirname(__file__)) + '/ops/quant.so' try: mod = tf.load_op_library(sopath) except: mod = None print("WARNING: COULD NOT LOAD CUDA LIBRARY") def cu_quant(qtype, act, bias): assert qtype == 4 or qtype == 8 vshp = act.get_shape().as_list() if qtype == 8: assert vshp[-1] >= 4 and vshp[-1] % 4 == 0 vshp[-1] = vshp[-1] // 4 else: assert vshp[-1] >= 8 and vshp[-1] % 8 == 0 vshp[-1] = vshp[-1] // 8 var = tf.Variable(tf.zeros(vshp, dtype=tf.float32)) sOp = [mod.save_act(var, act, bias, qtype).op] outs, Rm = mod.rest_act(var, bias, qtype) return sOp, outs, Rm def tf_quant(qtype, act, bias): assert qtype == 4 or qtype == 8
from tensorflow.python.framework import ops from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import math_ops import tensorflow as tf sdd_module = tf.load_op_library('./sparse_dense_dense.so') # indices: int64 def sdd(a, b, indices): print(indices) return sdd_module.sparse_dense_dense(a, tf.transpose(b), indices) @ops.RegisterGradient("SparseDenseDense") def _sparse_dense_dense_grad(op, grad): a = op.inputs[0] b = op.inputs[1] indices = op.inputs[2] print(a.get_shape(), b.get_shape(), grad) result_shape = tf.cast(tf.stack([tf.shape(a)[0], tf.shape(b)[0]]), dtype=dtypes.int64) grad = tf.SparseTensor(indices=indices, values=grad, dense_shape=result_shape) grad_T = sparse_ops.sparse_transpose(grad) grad_a = sparse_ops.sparse_tensor_dense_matmul(grad, b) grad_b = sparse_ops.sparse_tensor_dense_matmul(grad_T, a) return [grad_a, grad_b, None]
import tensorflow as tf from tensorflow.python.framework import ops import sys import os BASE_DIR = os.path.dirname(__file__) sys.path.append(BASE_DIR) evaluate_module = tf.load_op_library( os.path.join(BASE_DIR, 'tf_evaluate_so.so')) def evaluate(detections, names, numlist): ''' Input: detections: (n, 12) names: (m,) numlist: (m,) Output: precision_image: (NUM_CLASS, 3, 41) aos_image: (NUM_CLASS, 3, 41) precision_ground: (NUM_CLASS, 3, 41) aos_ground: (NUM_CLASS, 3, 41) precision_3d: (NUM_CLASS, 3, 41) aos_3d: (NUM_CLASS, 3, 41) ''' return evaluate_module.evaluate(detections, names, numlist) ops.NoGradient('Evaluate') def calc_iou(detections, groundtruths):
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.! r"""Ops for SentencePiece Encoding/Decoding.""" # TODO(taku): Implements n-best output from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import tensorflow as tf _gen_sentencepiece_processor_op = tf.load_op_library( os.path.join(os.path.dirname(__file__), '_sentencepiece_processor_ops.so')) def piece_size(model_file=None, model_proto=None, name=None): """Returns the piece size (vocabulary size). Args: model_file: The sentencepiece model file path. model_proto: The sentencepiece model serialized proto. Either `model_file` or `model_proto` must be set. name: The name argument that is passed to the op function. Returns: A scalar representing the vocabulary size. """ return _gen_sentencepiece_processor_op.sentencepiece_get_piece_size(
# # The Rosetta library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with the Rosetta library. If not, see <http://www.gnu.org/licenses/>. # ==============================================================================" import tensorflow as tf from tensorflow.python.ops import math_ops import os _secureop_lib = os.path.dirname(__file__) + '/../../../libsecure-ops.so' _secure_ops = tf.load_op_library(_secureop_lib) # ----------------------------- # secure reduction ops # ----------------------------- def SecureMax(input_tensor, axis=None, keepdims=None, name=None, reduction_indices=None, keep_dims=None): keepdims = False if keepdims is None else keepdims axis = math_ops._ReductionDims(input_tensor, axis) return _secure_ops.secure_reduce_max(input_tensor, reduction_indices=axis, name=name, keep_dims=keepdims)
# Imports and global variables # \**********************************/ # # Basic libs import numpy as np import tensorflow as tf import time # Subsampling extension import cpp_wrappers.cpp_subsampling.grid_subsampling as cpp_subsampling from utils.ply import read_ply # Load custom operation tf_neighbors_module = tf.load_op_library('tf_custom_ops/tf_neighbors.so') tf_batch_neighbors_module = tf.load_op_library( 'tf_custom_ops/tf_batch_neighbors.so') tf_subsampling_module = tf.load_op_library('tf_custom_ops/tf_subsampling.so') tf_batch_subsampling_module = tf.load_op_library( 'tf_custom_ops/tf_batch_subsampling.so') # ---------------------------------------------------------------------------------------------------------------------- # # Utility functions # \***********************/ # def grid_subsampling(points, features=None,
""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import sys import threading import time from six.moves import xrange # pylint: disable=redefined-builtin import numpy as np import tensorflow as tf word2vec = tf.load_op_library(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'word2vec_ops.so')) flags = tf.app.flags flags.DEFINE_string("save_path", None, "Directory to write the model and " "training summaries.") flags.DEFINE_string("train_data", None, "Training text file. " "E.g., unzipped file http://mattmahoney.net/dc/text8.zip.") flags.DEFINE_string( "eval_data", None, "File consisting of analogies of four tokens." "embedding 2 - embedding 1 + embedding 3 should be close " "to embedding 4." "See README.md for how to get 'questions-words.txt'.") flags.DEFINE_integer("embedding_size", 200, "The embedding dimension size.") flags.DEFINE_integer( "epochs_to_train", 15,
import tensorflow as tf import numpy as np import scipy.io test_mat = {} test_matrix = np.random.rand(2, 3, 4) test_mat['test'] = test_matrix scipy.io.savemat('test.mat', test_mat) print('Generated matrix:') print(test_matrix) parse_mat_module = tf.load_op_library('parse_mat.so') test_parse_tensor = parse_mat_module.parse_mat('test.mat', 'test', dtype=tf.float64) sess = tf.InteractiveSession() test_parse = sess.run(test_parse_tensor) print('Parsed matrix:') print(test_parse)
''' This is the demo for loading and running sparse convolution op. ''' import tensorflow as tf import numpy as np from scipy.sparse import random import time # load op try: _conv_sparse = tf.load_op_library('./build/libconv_sparse.so') except Exception as e: _conv_sparse = tf.load_op_library('./libconv_sparse.so') sparse_convolution = _conv_sparse.custom_convolution # tensor setting input_channel = 32 output_channel = 64 kernel_size = 11 sparse_density = 0.1 batch = 1 stride = 1 tolerance = 0.001 input_size = (256, 512) input_shape = (batch, input_size[0], input_size[1], input_channel ) # batch * height * width * ch_in kernel_shape = (kernel_size, kernel_size, input_channel, output_channel ) # k_h * k_w * ch_in * ch_out
""" Functions for reconstructing the DPC images from the raw phase steps. """ import os import logging import numpy as np import tensorflow as tf angle_module = tf.load_op_library(os.path.join("src", "arg.so")) log = logging.getLogger(__name__) def visibility(data): return 2 * data[..., 2] / data[..., 0] def get_signals(phase_stepping_curves, n_periods=1): """Get the average a_0, the phase phi and the amplitude |a_1| from the phase stepping curves. Input: phase_stepping_curves is a tensorflow.Tensor with the phase stepping curves along the third axis, (x, y) pixels along the first two axes. n_periods is the number of periods used in the phase stepping. returns a0, phi and a1 along the last axis. """
import tensorflow as tf import numpy as np arr = np.random.randn(5, 5).astype(np.float32) print arr mod = tf.load_op_library('/home/tron/nicksontf_tests/triangle.so') a = tf.placeholder(tf.float32, arr.shape) d = mod.triangle(a, 'upper') print arr sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) print sess.run(d, {a: arr})
def nn_configure_assemble(self): with tf.Session() as self.sess: self.assemble_module = tf.load_op_library('tensorflow/core/user_ops/assemble_boxes_gpu.so') pass
''' Furthest point sampling Original author: Haoqiang Fan Modified by Charles R. Qi All Rights Reserved. 2017. ''' import tensorflow as tf from tensorflow.python.framework import ops import sys import os BASE_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.append(BASE_DIR) sampling_module = tf.load_op_library( os.path.join(BASE_DIR, 'tf_sampling_so.so')) def prob_sample(inp, inpr): ''' input: batch_size * ncategory float32 batch_size * npoints float32 returns: batch_size * npoints int32 ''' return sampling_module.prob_sample(inp, inpr) ops.NoGradient('ProbSample') # TF1.0 API requires set shape in C++ #@tf.RegisterShape('ProbSample')
#!/usr/bin/env python3 """ Gradients for inner product. """ import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import sparse_ops mitsuba_grad_module = tf.load_op_library('./mitsuba_v2_grad_op.so') @ops.RegisterGradient("Mitsuba") def _inner_product_grad_cc(op, grad): """ The gradient for `inner_product` using the operation implemented in C++. :param op: `inner_product` `Operation` that we are differentiating, which we can use to find the inputs and outputs of the original op. :param grad: gradient with respect to the output of the `inner_product` op. :return: gradients with respect to the input of `inner_product`. """ return mitsuba_grad_module.mitsuba_grad(grad, op.inputs[0], op.inputs[1], op.inputs[2]) #return dir(mitsuba_grad_module)#.mitsuba_grad(grad, op.inputs[0], op.inputs[1])
import tensorflow as tf _sketch_op = tf.load_op_library('/data/shmsw25/vqa/model/CBP/build/count_sketch.so') def count_sketch(probs, project_size): """ Calculates count-min sketch of a tensor. Args: probs: A `Tensor` project_size: output size (`int`) Returns:c A projected count-min sketch `Tensor` with shape [batch_size, project_size]. """ with tf.variable_scope('CountSketch_'+probs.name.replace(':', '_')) as scope: input_size = int(probs.get_shape()[1]) # h, s must be sampled once history = tf.get_collection('__countsketch') if scope.name in history: scope.reuse_variables() tf.add_to_collection('__countsketch', scope.name) h = tf.get_variable('h', [input_size], initializer=tf.random_uniform_initializer(0, project_size), trainable=False) s = tf.get_variable('s', [input_size], initializer=tf.random_uniform_initializer(0, 2), trainable=False) h = tf.cast(h, 'int32') s = tf.cast(tf.floor(s) * 2 - 1, 'int32') # 1 or -1 sk = _sketch_op.count_sketch(probs, h, s, project_size)
from scipy.ndimage.filters import gaussian_filter from random import randint from PIL import Image import json random.seed(2018) letters = " !\"#&\\'()*+,-./0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÂÊÔàáâãèéêìíòóôõùúýăĐđĩũƠơưạảấầẩậắằẵặẻẽếềểễệỉịọỏốồổỗộớờởỡợụủỨứừửữựỳỵỷỹ" MAX_LEN = 70 WIDTH, HEIGHT = 1280, 64 SIZE = WIDTH, HEIGHT CHAR_DICT = len(letters) + 1 chars = letters wordChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÂÊÔàáâãèéêìíòóôõùúýăĐđĩũƠơưạảấầẩậắằẵặẻẽếềểễệỉịọỏốồổỗộớờởỡợụủỨứừửữựỳỵỷỹ" corpus = ' \n '.join(json.load(open('labels.json')).values()) word_beam_search_module = tf.load_op_library('lib/TFWordBeamSearch.so') mat = tf.placeholder(tf.float32) beamsearch_decoder = word_beam_search_module.word_beam_search( mat, 25, 'Words', 0.1, corpus, chars, wordChars) def text_to_labels(text): return list(map(lambda x: letters.index(x), text)) def labels_to_text(labels): return ''.join( list(map(lambda x: letters[x] if x < len(letters) else "", labels))) def beamsearch(sess, y_pred):
# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ZeroOut op Python library.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os.path import tensorflow as tf _zero_out_module = tf.load_op_library( os.path.join(tf.resource_loader.get_data_files_path(), 'zero_out_op_kernel_3.so')) zero_out = _zero_out_module.zero_out
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Test for version 1 of the zero_out op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf #from tensorflow.g3doc.how_tos.adding_an_op import cuda_op cuda_op = tf.load_op_library('cuda_op_kernel.so') class AddOneTest(tf.test.TestCase): def test(self): if tf.test.is_built_with_cuda(): with self.test_session(): result = cuda_op.add_one([5, 4, 3, 2, 1]) #self.assertAllEqual(result.eval(), [6, 5, 4, 3, 2]) print(result.eval(), "test") else: print("no cuda") if __name__ == '__main__': tf.test.main()
def testLoadTwice(self): zero_out_loaded_again = tf.load_op_library(os.path.join( tf.resource_loader.get_data_files_path(), 'zero_out_op_kernel_1.so')) self.assertEqual(zero_out_loaded_again, zero_out_op_1._zero_out_module)
""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import sys import threading import time from six.moves import xrange # pylint: disable=redefined-builtin import numpy as np import tensorflow as tf word2vec = tf.load_op_library(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'word2vec_ops.so')) flags = tf.app.flags flags.DEFINE_string("save_path", None, "Directory to write the model and " "training summaries.") flags.DEFINE_string("train_data", None, "Training text file. " "E.g., unzipped file http://mattmahoney.net/dc/text8.zip.") flags.DEFINE_string( "eval_data", None, "File consisting of analogies of four tokens." "embedding 2 - embedding 1 + embedding 3 should be close " "to embedding 4." "See README.md for how to get 'questions-words.txt'.") flags.DEFINE_integer("embedding_size", 200, "The embedding dimension size.") flags.DEFINE_integer( "epochs_to_train", 15,
def testZeroOutFloat(self): zero_out_module = tf.load_op_library('zero_out.so') with self.test_session(): result = zero_out_module.zero_out([5., 4., 3., 2., 1.]) self.assertAllEqual(result.eval(), [5., 0., 0., 0., 0.])
import tensorflow as tf import os.path as osp filename = osp.join(osp.dirname(__file__), 'psroi_pooling.so') _psroi_pooling_module = tf.load_op_library(filename) psroi_pool = _psroi_pooling_module.psroi_pool psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad
from os.path import exists from os import mkdir from os.path import join from PIL import Image import json import tensorflow as tf import threading import roi_pooling_op_grad module = tf.load_op_library('/Programs/tensorflow/roi_pooling.so') import numpy as np import h5py import matplotlib.pyplot as plt from matplotlib.patches import Rectangle from scipy.misc import imread, imresize from utils import load_vocab from time import time import datetime def load_images(ps): tic = time() images = [imread(p,mode='RGB') for p in ps] toc = time() print("imread = %1.3fs" % (toc-tic)) treated_images = [] sizes = [] for img in images: sizes.append(img.shape[:2])
# The Plasma TensorFlow Operator needs to be compiled on the end user's # machine since the TensorFlow ABI is not stable between versions. # The following code checks if the operator is already present. If not, # the function build_plasma_tensorflow_op can be used to compile it. TF_PLASMA_OP_PATH = os.path.join(pa.__path__[0], "tensorflow", "plasma_op.so") tf_plasma_op = None if os.path.exists(TF_PLASMA_OP_PATH): import tensorflow as tf tf_plasma_op = tf.load_op_library(TF_PLASMA_OP_PATH) def build_plasma_tensorflow_op(): global tf_plasma_op try: import tensorflow as tf print("TensorFlow version: " + tf.__version__) except ImportError: pass else: print("Compiling Plasma TensorFlow Op...") dir_path = os.path.dirname(os.path.realpath(__file__)) cc_path = os.path.join(dir_path, "tensorflow", "plasma_op.cc") so_path = os.path.join(dir_path, "tensorflow", "plasma_op.so") tf_cflags = tf.sysconfig.get_compile_flags()
import tensorflow as tf import os.path as osp import numpy as np #import os from config.config import cfg #pwd = os.getcwd() #filename = os.path.join(pwd, 'reorg.so') filename = osp.join(cfg.ROOT_DIR, 'lib', 'reorg_layer', 'reorg.so') assert osp.exists(filename), \ 'Path {} does not exist!'.format(filename) _reorg_module = tf.load_op_library(filename) reorg = _reorg_module.reorg reorg_grad = _reorg_module.reorg_grad
import tensorflow as tf import os.path as osp filename = '/home/alfonso/tensorflow/bazel-bin/tensorflow/core/user_ops/girshick_roipool/roi_pooling_girshick.so' _roi_pooling_module = tf.load_op_library(filename) roi_pool = _roi_pooling_module.roi_pool roi_pool_grad = _roi_pooling_module.roi_pool_grad
def testZeroOut(self): zero_out_module = tf.load_op_library('zero_out.so') with self.test_session(): result = zero_out_module.zero_out([5, 4, 3, 2, 1]) self.assertAllEqual(result.eval(), [5, 0, 0, 0, 0])
def __init__(self, data, is_training): self.hungarian_module = tf.load_op_library( '/tmp/work/munkres/hungarian.so') self.tower_size = FLAGS.batch_size // FLAGS.gpu_num self.indices_bottom = tf.reshape(tf.tile(tf.expand_dims( list(range(self.tower_size)), 1), [1, 9]), [-1]) * \ (FLAGS.config**2) images = tf.reshape(data["image"], [-1, FLAGS.patch_size, FLAGS.patch_size, 3]) labels = tf.reshape(data["label"], [-1]) mean = tf.reduce_mean(images, [-2, -3], keepdims=True) images = images - mean backbone = get_backbone() features, _ = backbone(images, is_training) if FLAGS.binary: features_b = tf.reshape( features, [self.tower_size, 9, features.get_shape().as_list()[-1]]) print(features.shape) print(features_b.shape) binary_loss_list = [] for pair_i in range(FLAGS.config**2): for pair_j in range(FLAGS.config**2): if pair_i == pair_j: continue pair_input = tf.concat( [features_b[:, pair_i, :], features_b[:, pair_j, :]], axis=1) pair_fc2 = fcLayer(pair_input, 512, bias_init=1.0, name="fc2_binary") pair_fc3 = fcLayer(pair_fc2, 9, std_init=0.01, name="fc3_binary", reluFlag=False) binary_one_hot = tf.one_hot( [utils.pair_label_3[pair_i][pair_j]] * self.tower_size, 9) binary_loss = tf.losses.softmax_cross_entropy( binary_one_hot, pair_fc3) binary_loss_list.append(binary_loss) mean_binary_loss = tf.reduce_mean(binary_loss_list) entropy_loss_list = [] column_loss_list = [] self.perm_list = [labels] # feature are firstly permuated by the input labels perm = labels for _ in range(FLAGS.iter_num): # reorder the features by indices indices = perm + self.indices_bottom gathered_features = tf.gather(features, indices) gathered_features = tf.reshape(gathered_features, [self.tower_size, -1]) fc2 = fcLayer(gathered_features, 4096, bias_init=1.0, name="fc2") fc3 = fcLayer(fc2, FLAGS.config**4, std_init=0.01, name="fc3", reluFlag=False) logits = tf.reshape( fc3, [self.tower_size, FLAGS.config**2, FLAGS.config**2]) # loss function reshaped_perm = tf.reshape(perm, [self.tower_size, FLAGS.config**2]) one_hot_perm = tf.one_hot(reshaped_perm, FLAGS.config**2) entropy_loss = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=one_hot_perm) prob = tf.nn.softmax(logits, axis=2) column_loss = tf.square(tf.reduce_sum(prob, axis=1) - 1) # predict permuation of the next iteration by hungarian algorithm predicted_perm = self.hungarian_module.hungarian(-1 * prob) predicted_perm = tf.reshape(predicted_perm, [-1]) # reorder the permutation for the next iteration based on the # current iteration predicted_indices = predicted_perm + self.indices_bottom predicted_indices = tf.reshape(predicted_indices, [-1, 1]) new_perm = tf.scatter_nd( indices=predicted_indices, updates=perm, shape=[self.tower_size * (FLAGS.config**2)]) perm = new_perm self.perm_list.append(perm) entropy_loss_list.append(entropy_loss) column_loss_list.append(column_loss) mean_column_loss = tf.reduce_mean(column_loss_list) mean_entropy_loss = tf.reduce_mean(entropy_loss_list) all_var = tf.trainable_variables() backbone_var = [ var for var in all_var if ("alexnet" in var.name) or ("resnet" in var.name) ] binary_var = [var for var in all_var if "binary" in var.name] unary_var = [ var for var in all_var if (var not in backbone_var) and (var not in binary_var) ] self.compute_gradients_losses = [{ 'value': mean_entropy_loss, 'var_list': backbone_var + unary_var }] self.display_losses = [{ 'name': tf.convert_to_tensor('c_loss'), 'value': mean_column_loss }, { 'name': tf.convert_to_tensor('e_loss'), 'value': mean_entropy_loss }] if FLAGS.binary: self.compute_gradients_losses.append({ 'value': mean_binary_loss, 'var_list': backbone_var + binary_var }) self.display_losses.append({ 'name': tf.convert_to_tensor('b_loss'), 'value': mean_binary_loss })
import tensorflow as tf import os.path as osp filename = osp.join(osp.dirname(__file__), 'psalign_pooling.so') _psalign_pooling_module = tf.load_op_library(filename) psalign_pool = _psalign_pooling_module.ps_align_pool psalign_pool_grad = _psalign_pooling_module.ps_align_pool_grad
import tensorflow as tf from tensorflow.python.framework import ops import sys, os base_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(base_dir) nnquery_module = tf.load_op_library(os.path.join(base_dir, 'tf_nnquery_so.so')) def build_sphere_neighbor(database, query, radius=0.1, dilation_rate=None, nnsample=100): ''' Input: database: (batch, npoint, 3+x) float32 array, database points query: (batch, mpoint, 3) float32 array, query points radius: float32, range search radius dilation_rate: float32, dilation rate of range search nnsample: int32, maximum number of neighbors to be sampled Output: nn_index: (batch, mpoint, nnsample) int32 array, neighbor and filter bin indices nn_count: (batch, mpoint) int32 array, number of neighbors nn_dist(optional): (batch, mpoint, nnsample) float32, sqrt distance array ''' database = database[:, :, 0:3] query = query[:, :, 0:3] if dilation_rate is not None: radius = dilation_rate * radius
import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import sparse_ops zero_out_module = tf.load_op_library('./zero_out.so') zero_out = zero_out_module.zero_out zero_out_float = zero_out_module.zero_out_float @ops.RegisterGradient("ZeroOut") def _zero_out_grad(op, grad): """The gradients for `zero_out`. Args: op: The `zero_out` `Operation` that we are differentiating, which we can use to find the inputs and outputs of the original op. grad: Gradient with respect to the output of the `zero_out` op. Returns: Gradients with respect to the input of `zero_out`. """ to_zero = op.inputs[0] shape = array_ops.shape(to_zero) index = array_ops.zeros_like(shape) first_grad = array_ops.reshape(grad, [-1])[0] to_zero_grad = sparse_ops.sparse_to_dense([index], shape, first_grad, 0) return [to_zero_grad] # List of one Tensor, since we have one input @ops.RegisterGradient("ZeroOutFloat") def _zero_out_float_grad(op, grad): """The gradients for `zero_out_float`.
import tensorflow as tf from tensorflow.python.framework import ops import sys import os BASE_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.append(BASE_DIR) grouping_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_grouping_so.so')) def query_ball_point(radius, nsample, xyz1, xyz2): ''' Input: radius: float32, ball search radius nsample: int32, number of points selected in each ball region xyz1: (batch_size, ndataset, 3) float32 array, input points xyz2: (batch_size, npoint, 3) float32 array, query points Output: idx: (batch_size, npoint, nsample) int32 array, indices to input points pts_cnt: (batch_size, npoint) int32 array, number of unique points in each local region ''' #return grouping_module.query_ball_point(radius, nsample, xyz1, xyz2) return grouping_module.query_ball_point(xyz1, xyz2, radius, nsample) ops.NoGradient('QueryBallPoint') def select_top_k(k, dist): ''' Input: k: int32, number of k SMALLEST elements selected dist: (b,m,n) float32 array, distance matrix, m query points, n dataset points Output: idx: (b,m,n) int32 array, first k in n are indices to the top k dist_out: (b,m,n) float32 array, first k in n are the top k ''' return grouping_module.selection_sort(dist, k)
# limitations under the License. # usage example #python ckpt_quantization.py --init_checkpoint=squad_model/QAT_noresidualQuant/model.ckpt-5474 --quantized_checkpoint=squad_model/QAT_noresidualQuant_quantized/model.ckpt import tensorflow as tf import numpy as np from tensorflow.contrib.framework.python.framework import checkpoint_utils from tensorflow.python.ops import io_ops from tensorflow.python.training.saver import BaseSaverBuilder import os import re build_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../lib') transformer_op_module = tf.load_op_library( os.path.join(build_path, 'libtf_weight_quantize.so')) ACTIVATION_AMAX_NUM = 80 INT8O_GEMM_NUM = 8 def checkpoint_quantization(in_checkpoint_file, out_checkpoint_file, per_channel_quantization): var_list = checkpoint_utils.list_variables(tf.flags.FLAGS.init_checkpoint) def init_graph(): restore_vars = [] layer_num = 0 regex = re.compile('layer_\d+') amaxTotalNum = 0 for name, shape in var_list:
import numpy import tensorflow as tf test = tf.load_op_library("calculate_proportions.so").calculate_proportions a = numpy.ones([10, 10, 10, 10], dtype=numpy.float32) b = numpy.zeros([10, 10, 10, 10], dtype=numpy.int32) for i in range(10): b[:, :, :, i] = i d = 0 for ii in range(10): for iii in range(10): for iiii in range(10): a[ii, iii, iiii, i] = numpy.random.rand() # a[ii,iii,iiii,i] = float(d*i) d += 1 aa = tf.placeholder(tf.float32, [10, 10, 10, 10]) bb = tf.placeholder(tf.int32, [10, 10, 10, 10]) an = tf.placeholder(tf.float32, [10, 3, 3, 10, 10]) with tf.device('cpu:0'): c = test(aa, bb, an) with tf.device('gpu:0'): e = test(aa, bb, an) # init = tf.global_variables_initializer() sess = tf.Session() # sess.run(init) res = sess.run([c, e], feed_dict={
import tensorflow as tf from tensorflow.python.framework import ops import os module_path = os.path.realpath(__file__) module_dir = os.path.dirname(module_path) lib_path = os.path.join(module_dir, 'roi_pooling.so') roi_pooling_module = tf.load_op_library(lib_path) def roi_pooling(input, rois, pool_height, pool_width): """ returns a tensorflow operation for computing the Region of Interest Pooling @arg input: feature maps on which to perform the pooling operation @arg rois: list of regions of interest in the format (feature map index, upper left, bottom right) @arg pool_width: size of the pooling sections """ # TODO(maciek): ops scope out = roi_pooling_module.roi_pooling(input, rois, pool_height=pool_height, pool_width=pool_width) output, argmax_output = out[0], out[1] return output @ops.RegisterGradient("RoiPooling") def _RoiPoolingGrad(op, *grads): orig_inputs = op.inputs[0] orig_rois = op.inputs[1] orig_output = op.outputs[0] orig_argmax_output = op.outputs[1] orig_output_grad = grads[0]
parser.add_argument("-steps", help="number of steps in task graph") parser.add_argument( "-kernel_type", help="kernel type for task graph [OPTIONS: busy_wait, ...]") parser.add_argument("-iter", help="number of iterations for task graph") args = parser.parse_args() # load core header file, set up calling core in c with ffi core_header = subprocess.check_output( ['gcc', '-D', '__attribute__(x)=', '-E', '-P', '../../../core/core_c.h']).decode('utf-8') ffi = cffi.FFI() ffi.cdef(core_header) c = ffi.dlopen("../../../core/libcore.so") no_input_module = tf.load_op_library("../../input_ops/no_input_op/no_input.so") no_input = no_input_module.no_input two_input_module = tf.load_op_library( "../../input_ops/two_input_op/two_input.so") two_input = two_input_module.two_input three_input_module = tf.load_op_library( "../../input_ops/three_input_op/three_input.so") three_input = three_input_module.three_input def app_from_core(): argv_elements = [ ffi.new("char[]", ""), ffi.new("char[]", "-type"), ffi.new("char[]", args.type), ffi.new("char[]", "-width"),