コード例 #1
0
ファイル: clustering.py プロジェクト: kbrems/opveclib
    def op(self, data, center):
        """The definition of the operator function.

        Thread pool over nData. This may generate too many threads for some input data.

        :param data: 2D matrix as data input with dimensions: nDim x nData.
        :type data; numpy array.
        :param center: 2D matrix of initial cluster centers with dimensions: nDim x nCenter.
        :type center: numpy array.
        :return minIndex: 1D matrix of assignemnts of data points to cluster centers: nData x 1.
        """
        nDimData    = data.shape[0]
        nDimCenter  = center.shape[0]
        nData       = data.shape[1]
        nCenter     = center.shape[1]
        assert nDimData == nDimCenter, "Data has % dimensions and centers have %d dimensions, but these must match!" % (nDimData, nDimCenter)
        nDim        = nDimData
        iSample     = ops.position_in(nData)[0]
        minDist     = ops.variable(sys.float_info.max, data.dtype)
        iMin        = ops.variable(0, ops.int64)
        for iCenter in ops.arange(nCenter):
            dist = ops.variable(0, center.dtype)
            for iDim in ops.arange(nDim):
                dist <<= dist + (data[iDim,iSample]-center[iDim,iCenter])*(data[iDim,iSample]-center[iDim,iCenter])
            with ops.if_(dist < minDist):
                iMin <<= iCenter
                minDist <<= dist

        #TODO([email protected]): Change this to uint64 whenever ovl supports non-floating point types for tensors.
        minIndex = ops.output(nData, ops.float64) # Use float64 because tensorflow does not support uint64 as type yet.
        minIndex[iSample] = ops.cast(iMin, ops.float64)

        return minIndex
コード例 #2
0
ファイル: graph.py プロジェクト: kbrems/opveclib
    def op(self, startEdge, fromVertex, toVertex):
        """The definition of the operator function.

        The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the
        separate lists.

        :param startEdge: Indices into toVertex where edges start.
        :type startEdge: list.
        :param fromVertex: The from-vertex of each edge.
        :type fromVertex: list.
        :param toVertex: The to-vertex of each edge.
        :type toVertex: list.
        :return Counts of triangles per edge.
        """
        iEdge           = ops.position_in(toVertex.shape)[0]
        count           = ops.output(toVertex.shape, ops.uint64)
        nTriangle       = ops.variable(0, ops.uint64)

        iFromVertex     = ops.variable(fromVertex[iEdge], fromVertex.dtype)
        iFromEdge       = ops.variable(startEdge[iFromVertex], startEdge.dtype)
        iFromEdgeEnd    = ops.variable(startEdge[iFromVertex+1], startEdge.dtype)
        iiFromVertex    = ops.variable(toVertex[iFromEdge], toVertex.dtype)

        iToVertex       = ops.variable(toVertex[iEdge], toVertex.dtype)
        iToEdge         = ops.variable(startEdge[iToVertex], startEdge.dtype)
        iToEdgeEnd      = ops.variable(startEdge[iToVertex+1], startEdge.dtype)
        iiToVertex      = ops.variable(toVertex[iToEdge], toVertex.dtype)

        nMerge          = iToEdgeEnd-iToEdge + iFromEdgeEnd-iFromEdge # Maximum number of merges.

        # This construction is a work-around for simulating the function of a while loop.
        #TODO([email protected]): Replace this construct by a while loop once it is available in ovl.
        for iMerge in ops.arange(nMerge):
            doMerge = ops.logical_and(iFromEdge < iFromEdgeEnd, iToEdge < iToEdgeEnd)
            doMerge = ops.logical_and(doMerge, iiFromVertex < iToVertex)

            with ops.if_(doMerge):

                with ops.if_(iiFromVertex < iiToVertex):
                    iFromEdge <<= iFromEdge+1
                    iiFromVertex <<= toVertex[iFromEdge]

                with ops.elif_(iiFromVertex > iiToVertex):
                    iToEdge <<= iToEdge+1
                    iiToVertex <<= toVertex[iToEdge]

                with ops.else_():
                    nTriangle <<= nTriangle+1
                    iFromEdge <<= iFromEdge+1
                    iToEdge <<= iToEdge+1
                    iiFromVertex <<= toVertex[iFromEdge]
                    iiToVertex <<= toVertex[iToEdge]


        #TODO([email protected]): Use a reduction function that computes a partial or complete sum.
        count[iEdge] = nTriangle # Save the triangles for each edge.

        return count
コード例 #3
0
ファイル: clustering.py プロジェクト: kbrems/opveclib
    def op(self, data, minIndex, nCenter):
        """The definition of the operator function.

        Thread pool over nCenter. That should be fine for most cases.

        :param data: 2D matrix as data input with dimensions: nDim x nData.
        :type data: numpy array.
        :param minIndex: 1D matrix of assignemnts of data points to cluster centers: nData x 1.
        :type minIndex: numpy array.
        :return 2D matrix with computed cluster centers with dimensions> nDim x nCenter.
        """
        nDim    = data.shape[0]
        nData   = data.shape[1]
        assert nData==minIndex.shape[0], "Data has %d values and minDist has %d values, but these must match!" % (nData, minIndex.shape[0])
        iCenter = ops.position_in(nCenter)[0]
        center  = ops.zeros([nDim,nCenter], data.dtype)
        count   = ops.variable(0, data.dtype)
        for iSample in ops.arange(nData):
            with ops.if_(iCenter==ops.cast(minIndex[iSample], ops.uint32)):
                count <<= count + 1
                for iDim in ops.arange(nDim):
                    center[iDim,iCenter] = center[iDim,iCenter] + data[iDim,iSample]

        newCenter = ops.output([nDim,nCenter],data.dtype)
        for iDim in ops.arange(nDim):
            newCenter[iDim,iCenter] = center[iDim,iCenter]/count
        return newCenter
コード例 #4
0
ファイル: lstm.py プロジェクト: spacekitteh/opveclib
def sig_grad(arg):
    valid_grad = ovl.logical_and(arg > -50, arg < 50)
    result = ovl.variable(0, arg.dtype)
    with ovl.if_(valid_grad):
        e = ovl.exp(-arg)
        result <<= e/((1+e)*(1+e))
    return result
コード例 #5
0
def accumulate(x, inner_fcn=None, axis=None):
    """
    Define the operator function.

    :param x: The input tensor
    :param inner_fcn: a lambda function to be applied for accumulation
    :param axis: The axis across which accumulation will be applied
    :return: The accumulated result
    """

    # assert that the axis parameter makes sense
    assert isinstance(axis, int)
    assert axis >= 0
    assert axis < x.rank

    # Define the workgroup shape. Here we use a single worker to perform the accumulation across the
    # accumulation axis. The workgroup shape is then the size of all other axes with accumulation axis removed.
    if x.rank is 1:
        workgroup_shape = [1]
    else:
        workgroup_shape = []
        for cur_dim, num_elements in enumerate(x.shape):
            if cur_dim == axis:
                pass
            else:
                workgroup_shape.append(num_elements)
    pos = ovl.position_in(workgroup_shape)

    # Define the accumulated output to be the same type as the input
    out = ovl.output_like(x)

    # Define a function for determining the index of the input tensor as a function of accumulation axis position
    # and the current worker position. This is equal to the worker position with the accumulation axis position
    # inserted where it should be in the indexing order.
    def resolve_position(axis_n):
        cur_pos = []
        offset = 0
        for cur_dim in range(x.rank):
            if cur_dim == axis:
                cur_pos.append(axis_n)
                offset = 1
            else:
                cur_pos.append(pos[cur_dim - offset])
        return cur_pos

    # initialize accumulator to be the first element along the accumulation axis
    initial_value = x[resolve_position(0)]
    accum = ovl.variable(initial_value, x.dtype)
    out[resolve_position(0)] = accum

    # use this worker to iterate over and accumulate the rest of the elements in the accumulation axis
    for i in ovl.arange(1, x.shape[axis]):
        accum <<= inner_fcn(accum, x[resolve_position(i)])
        out[resolve_position(i)] = accum

    return out
コード例 #6
0
def accumulate(x, inner_fcn=None, axis=None):
    """
    Define the operator function.

    :param x: The input tensor
    :param inner_fcn: a lambda function to be applied for accumulation
    :param axis: The axis across which accumulation will be applied
    :return: The accumulated result
    """

    # assert that the axis parameter makes sense
    assert isinstance(axis, int)
    assert axis >= 0
    assert axis < x.rank

    # Define the workgroup shape. Here we use a single worker to perform the accumulation across the
    # accumulation axis. The workgroup shape is then the size of all other axes with accumulation axis removed.
    if x.rank is 1:
        workgroup_shape = [1]
    else:
        workgroup_shape = []
        for cur_dim, num_elements in enumerate(x.shape):
            if cur_dim == axis:
                pass
            else:
                workgroup_shape.append(num_elements)
    pos = ovl.position_in(workgroup_shape)

    # Define the accumulated output to be the same type as the input
    out = ovl.output_like(x)

    # Define a function for determining the index of the input tensor as a function of accumulation axis position
    # and the current worker position. This is equal to the worker position with the accumulation axis position
    # inserted where it should be in the indexing order.
    def resolve_position(axis_n):
        cur_pos = []
        offset = 0
        for cur_dim in range(x.rank):
            if cur_dim == axis:
                cur_pos.append(axis_n)
                offset = 1
            else:
                cur_pos.append(pos[cur_dim-offset])
        return cur_pos

    # initialize accumulator to be the first element along the accumulation axis
    initial_value = x[resolve_position(0)]
    accum = ovl.variable(initial_value, x.dtype)
    out[resolve_position(0)] = accum

    # use this worker to iterate over and accumulate the rest of the elements in the accumulation axis
    for i in ovl.arange(1, x.shape[axis]):
        accum <<= inner_fcn(accum, x[resolve_position(i)])
        out[resolve_position(i)] = accum

    return out
コード例 #7
0
def conv_1d(x,
            v,
            kernel_orientation='as-is',
            stride=1,
            mode='same',
            data_format='NCE'):
    """
    Define the operator function.

    :param x: An input tensor of shape [num_batches, num_channels, num_elements].
    :param v: A filter/kernel of shape [num_filters, num_channels, kernel_size].
    :param kernel_orientation: The orientation of the kernel to use: 'as-is' or 'flipped'. This language is used
        rather than 'convolution' or 'cross-correlation' since the terms have become overloaded and ambiguous across
        some fields. As defined in https://en.wikipedia.org/wiki/Cross-correlation#Properties, 'as-is' yields the
        cross-correlation and 'flipped' yields the convolution.
    :param stride: kernel stride to use.
    :param mode: border mode: 'same', 'valid', or 'full'
    :param data_format: order of the dimensions in the input: 'NCE', 'NEC' etc.
    :return: an output tensor of shape [num_batches, num_filters, num_elements]
    """

    if kernel_orientation != 'as-is' and kernel_orientation != 'flipped':
        raise ValueError("kernel_orientation must be 'as-is' or 'flipped'")

    # resolve data layout based on data_format input
    assert x.rank == 3
    assert len(data_format) == 3
    assert data_format.count('N') == 1
    assert data_format.count('C') == 1
    assert data_format.count('E') == 1

    n_axis = data_format.find('N')
    c_axis = data_format.find('C')
    e_axis = data_format.find('E')

    num_elements = x.shape[e_axis]
    num_channels = x.shape[c_axis]
    num_batches = x.shape[n_axis]

    assert v.rank == 3
    if num_channels != v.shape[c_axis]:
        raise ValueError(
            'Channel axis size of input must match that of the filter.')

    num_filters = v.shape[n_axis]
    filter_size = v.shape[e_axis]
    left_apron = filter_size // 2
    right_apron = filter_size - left_apron - 1

    if not isinstance(stride, int) or stride < 1 or stride > num_elements:
        raise ValueError('Stride must be a positive integer')

    if mode == 'same':
        if filter_size > num_elements:
            raise ValueError('filter size, ' + str(filter_size) +
                             ',  cannot be larger than number of elements, ' +
                             str(num_elements))

        starting_element = -left_apron
        ending_element = num_elements - left_apron
    elif mode == 'valid':
        if filter_size > num_elements:
            raise ValueError('filter size, ' + str(filter_size) +
                             ',  cannot be larger than number of elements, ' +
                             str(num_elements))

        starting_element = 0
        ending_element = num_elements - (left_apron + right_apron)
    elif mode == 'full':
        starting_element = -(filter_size - 1)
        ending_element = num_elements
    else:
        raise ValueError("mode must be 'same', 'valid', or 'full'.")

    output_elements = (ending_element - starting_element)

    output_shape = [0, 0, 0]
    output_shape[n_axis] = num_batches
    output_shape[c_axis] = num_filters
    output_shape[e_axis] = output_elements
    output = ovl.output(output_shape, x.dtype)

    filters_per_worker = 1
    filter_workers, filter_remainder = divmod(num_filters, filters_per_worker)
    if filter_remainder > 0:
        filter_workers += 1

    batches_per_worker = 1
    batch_workers, batch_remainder = divmod(num_batches, batches_per_worker)
    if batch_remainder > 0:
        batch_workers += 1

    elements_per_worker = 10
    element_workers, element_remainder = divmod(output_elements,
                                                elements_per_worker)
    if element_remainder > 0:
        element_workers += 1

    workgroup_shape = [batch_workers, filter_workers, element_workers]
    ovl.logger.debug(u'    workgroup_shape: ' + str(workgroup_shape))
    pos = ovl.position_in(workgroup_shape)
    cur_batch_block = pos[0]
    cur_filter_block = pos[1]
    cur_element_block = pos[2]

    num_block_batches = ovl.variable(batches_per_worker, ovl.uint32)
    if batch_remainder > 0:
        with ovl.if_(cur_batch_block == batch_workers - 1):
            num_block_batches <<= batch_remainder

    num_block_filters = ovl.variable(filters_per_worker, ovl.uint32)
    if filter_remainder > 0:
        with ovl.if_(cur_filter_block == filter_workers - 1):
            num_block_filters <<= filter_remainder

    num_block_elements = ovl.variable(elements_per_worker, ovl.uint32)
    if element_remainder > 0:
        with ovl.if_(cur_element_block == element_workers - 1):
            num_block_elements <<= element_remainder

    accum = ovl.zeros(
        (batches_per_worker, filters_per_worker, elements_per_worker),
        ovl.float64)  #4*4

    filter_block = ovl.zeros((filters_per_worker, filter_size), v.dtype)  #4*10
    input_block = ovl.zeros((batches_per_worker, filter_size), x.dtype)  #4*10
    for cur_channel in ovl.arange(num_channels):

        # load all filters for this channel
        for intra_block_filter in ovl.arange(filters_per_worker):
            for f_pos in ovl.arange(filter_size):
                filter_index = [None, None, None]
                filter_index[c_axis] = cur_channel
                filter_index[n_axis] = ovl.cast(
                    intra_block_filter,
                    ovl.uint32) + cur_filter_block * filters_per_worker
                if kernel_orientation == 'as-is':
                    filter_index[e_axis] = f_pos
                elif kernel_orientation == 'flipped':
                    filter_index[e_axis] = filter_size - f_pos - 1
                else:
                    raise ValueError(
                        "kernel_orientation must be 'as-is' or 'flipped'")
                filter_block[intra_block_filter, f_pos] = v[filter_index]

        # load initial inputs for this channel
        buffer_head = ovl.variable(0, ovl.uint32)
        for intra_block_batch in ovl.arange(num_block_batches):
            cur_batch = intra_block_batch + cur_batch_block * batches_per_worker
            for f_pos in ovl.arange(filter_size):
                x_index = [None, None, None]
                x_index[c_axis] = cur_channel
                x_index[n_axis] = cur_batch

                x_elem_index = starting_element + ovl.cast(
                    cur_element_block * elements_per_worker,
                    ovl.uint64) + ovl.cast(f_pos, ovl.uint64)
                x_index[e_axis] = x_elem_index
                index_in_bounds = ovl.logical_and(x_elem_index >= 0,
                                                  x_elem_index < num_elements)
                with ovl.if_(index_in_bounds):
                    input_block[intra_block_batch, f_pos] = x[x_index]
                with ovl.else_():
                    input_block[intra_block_batch, f_pos] = 0

        for intra_block_element in ovl.arange(num_block_elements):
            cur_elem = intra_block_element + cur_element_block * elements_per_worker
            for intra_block_batch in ovl.arange(num_block_batches):
                cur_batch = intra_block_batch + cur_batch_block * batches_per_worker
                for intra_block_filter in ovl.arange(num_block_filters):
                    for f_pos in ovl.arange(filter_size):
                        x_pos = (buffer_head +
                                 ovl.cast(f_pos, ovl.uint32)) % filter_size
                        cur_x = ovl.cast(input_block[intra_block_batch, x_pos],
                                         ovl.float64)
                        cur_v = ovl.cast(
                            filter_block[intra_block_filter, f_pos],
                            ovl.float64)
                        accum[intra_block_batch, intra_block_filter, intra_block_element] = \
                            accum[intra_block_batch, intra_block_filter, intra_block_element] + cur_x * cur_v

                # load new element
                x_index = [None, None, None]
                x_index[c_axis] = cur_channel
                x_index[n_axis] = cur_batch
                x_elem_index = starting_element + cur_elem + filter_size
                x_index[e_axis] = x_elem_index
                index_in_bounds = ovl.logical_and(x_elem_index >= 0,
                                                  x_elem_index < num_elements)
                with ovl.if_(index_in_bounds):
                    input_block[intra_block_batch, buffer_head] = x[x_index]
                with ovl.else_():
                    input_block[intra_block_batch, buffer_head] = 0

            buffer_head <<= (buffer_head + 1) % filter_size

    for intra_block_batch in ovl.arange(num_block_batches):
        cur_batch = intra_block_batch + cur_batch_block * batches_per_worker
        for intra_block_filter in ovl.arange(num_block_filters):
            cur_filter = intra_block_filter + cur_filter_block * filters_per_worker
            for intra_block_element in ovl.arange(num_block_elements):
                cur_elem = intra_block_element + cur_element_block * elements_per_worker

                output_index = [None, None, None]
                output_index[n_axis] = cur_batch
                output_index[e_axis] = cur_elem
                output_index[c_axis] = cur_filter
                output[output_index] = ovl.cast(
                    accum[intra_block_batch, intra_block_filter,
                          intra_block_element], output.dtype)

    return output
コード例 #8
0
ファイル: test_graph.py プロジェクト: codeaudit/opveclib
def graph_triangle_count(startEdge, fromVertex, toVertex):
    """Counts the triangles in an undirected graph.

    Notice that this method assumes that the graph is given as an adjacency list where all lists with vertex neighbors
    are sorted.

    The parallel algorithm uses the following strategy. We map one thread per edge, This is also called the edge-based
    iterator strategy.

    The idea behind the algorithm is:
        1. Go over all edges (u, v).
        2. The neighboring indices for vertex u are N(u) and for vertex v are N(v).
        3. Increment the triangle counter by | N(u) /\ N(v) | where /\ is the set intersection operator.

    We enforce an order on the vertices that avoids counting the same triangle three times, instead each triangle is
    counted once.

    Attributes: None.

    The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the
    separate lists.

    :param startEdge: Indices into toVertex where edges start.
    :type startEdge: list.
    :param fromVertex: The from-vertex of each edge.
    :type fromVertex: list.
    :param toVertex: The to-vertex of each edge.
    :type toVertex: list.
    :return: Counts of triangles per edge.
    """
    iEdge = ovl.position_in(toVertex.shape)[0]
    count = ovl.output(toVertex.shape, ovl.uint64)
    nTriangle = ovl.variable(0, ovl.uint64)

    iFromVertex = ovl.variable(fromVertex[iEdge], fromVertex.dtype)
    iFromEdge = ovl.variable(startEdge[iFromVertex], startEdge.dtype)
    iFromEdgeEnd = ovl.variable(startEdge[iFromVertex + 1], startEdge.dtype)
    iiFromVertex = ovl.variable(toVertex[iFromEdge], toVertex.dtype)

    iToVertex = ovl.variable(toVertex[iEdge], toVertex.dtype)
    iToEdge = ovl.variable(startEdge[iToVertex], startEdge.dtype)
    iToEdgeEnd = ovl.variable(startEdge[iToVertex + 1], startEdge.dtype)
    iiToVertex = ovl.variable(toVertex[iToEdge], toVertex.dtype)

    nMerge = iToEdgeEnd - iToEdge + iFromEdgeEnd - iFromEdge  # Maximum number of merges.

    # This construction is a work-around for simulating the function of a while loop.
    #TODO([email protected]): Replace this construct by a while loop once it is available in ovl.
    for iMerge in ovl.arange(nMerge):
        doMerge = ovl.logical_and(iFromEdge < iFromEdgeEnd,
                                  iToEdge < iToEdgeEnd)
        doMerge = ovl.logical_and(doMerge, iiFromVertex < iToVertex)

        with ovl.if_(doMerge):

            with ovl.if_(iiFromVertex < iiToVertex):
                iFromEdge <<= iFromEdge + 1
                iiFromVertex <<= toVertex[iFromEdge]

            with ovl.elif_(iiFromVertex > iiToVertex):
                iToEdge <<= iToEdge + 1
                iiToVertex <<= toVertex[iToEdge]

            with ovl.else_():
                nTriangle <<= nTriangle + 1
                iFromEdge <<= iFromEdge + 1
                iToEdge <<= iToEdge + 1
                iiFromVertex <<= toVertex[iFromEdge]
                iiToVertex <<= toVertex[iToEdge]

    #TODO([email protected]): Use a reduction function that computes a partial or complete sum.
    count[iEdge] = nTriangle  # Save the triangles for each edge.

    return count
コード例 #9
0
ファイル: conv1D.py プロジェクト: hewlettpackardlabs/opveclib
def conv_1d(x, v, kernel_orientation='as-is', stride=1, mode='same', data_format='NCE'):
    """
    Define the operator function.

    :param x: An input tensor of shape [num_batches, num_channels, num_elements].
    :param v: A filter/kernel of shape [num_filters, num_channels, kernel_size].
    :param kernel_orientation: The orientation of the kernel to use: 'as-is' or 'flipped'. This language is used
        rather than 'convolution' or 'cross-correlation' since the terms have become overloaded and ambiguous across
        some fields. As defined in https://en.wikipedia.org/wiki/Cross-correlation#Properties, 'as-is' yields the
        cross-correlation and 'flipped' yields the convolution.
    :param stride: kernel stride to use.
    :param mode: border mode: 'same', 'valid', or 'full'
    :param data_format: order of the dimensions in the input: 'NCE', 'NEC' etc.
    :return: an output tensor of shape [num_batches, num_filters, num_elements]
    """

    if kernel_orientation != 'as-is' and kernel_orientation != 'flipped':
        raise ValueError("kernel_orientation must be 'as-is' or 'flipped'")

    # resolve data layout based on data_format input
    assert x.rank == 3
    assert len(data_format) == 3
    assert data_format.count('N') == 1
    assert data_format.count('C') == 1
    assert data_format.count('E') == 1

    n_axis = data_format.find('N')
    c_axis = data_format.find('C')
    e_axis = data_format.find('E')

    num_elements = x.shape[e_axis]
    num_channels = x.shape[c_axis]
    num_batches = x.shape[n_axis]

    assert v.rank == 3
    if num_channels != v.shape[c_axis]:
        raise ValueError('Channel axis size of input must match that of the filter.')

    num_filters = v.shape[n_axis]
    filter_size = v.shape[e_axis]
    left_apron = filter_size // 2
    right_apron = filter_size - left_apron - 1

    if not isinstance(stride, int) or stride < 1 or stride > num_elements:
        raise ValueError('Stride must be a positive integer')

    if mode == 'same':
        if filter_size > num_elements:
            raise ValueError('filter size, ' + str(filter_size) +
                             ',  cannot be larger than number of elements, ' + str(num_elements))

        starting_element = -left_apron
        ending_element = num_elements - left_apron
    elif mode == 'valid':
        if filter_size > num_elements:
            raise ValueError('filter size, ' + str(filter_size) +
                             ',  cannot be larger than number of elements, ' + str(num_elements))

        starting_element = 0
        ending_element = num_elements - (left_apron + right_apron)
    elif mode == 'full':
        starting_element = -(filter_size - 1)
        ending_element = num_elements
    else:
        raise ValueError("mode must be 'same', 'valid', or 'full'.")

    output_elements = (ending_element - starting_element)

    output_shape = [0, 0, 0]
    output_shape[n_axis] = num_batches
    output_shape[c_axis] = num_filters
    output_shape[e_axis] = output_elements
    output = ovl.output(output_shape, x.dtype)

    filters_per_worker = 1
    filter_workers, filter_remainder = divmod(num_filters, filters_per_worker)
    if filter_remainder > 0:
        filter_workers += 1

    batches_per_worker = 1
    batch_workers, batch_remainder = divmod(num_batches, batches_per_worker)
    if batch_remainder > 0:
        batch_workers += 1

    elements_per_worker = 10
    element_workers, element_remainder = divmod(output_elements, elements_per_worker)
    if element_remainder > 0:
        element_workers += 1

    workgroup_shape = [batch_workers, filter_workers, element_workers]
    ovl.logger.debug(u'    workgroup_shape: ' + str(workgroup_shape))
    pos = ovl.position_in(workgroup_shape)
    cur_batch_block = pos[0]
    cur_filter_block = pos[1]
    cur_element_block = pos[2]

    num_block_batches = ovl.variable(batches_per_worker, ovl.uint32)
    if batch_remainder > 0:
        with ovl.if_(cur_batch_block == batch_workers-1):
            num_block_batches <<= batch_remainder

    num_block_filters = ovl.variable(filters_per_worker, ovl.uint32)
    if filter_remainder > 0:
        with ovl.if_(cur_filter_block == filter_workers-1):
            num_block_filters <<= filter_remainder

    num_block_elements = ovl.variable(elements_per_worker, ovl.uint32)
    if element_remainder > 0:
        with ovl.if_(cur_element_block == element_workers-1):
            num_block_elements <<= element_remainder

    accum = ovl.zeros((batches_per_worker, filters_per_worker, elements_per_worker), ovl.float64) #4*4

    filter_block = ovl.zeros((filters_per_worker, filter_size), v.dtype)  #4*10
    input_block = ovl.zeros((batches_per_worker, filter_size), x.dtype)  #4*10
    for cur_channel in ovl.arange(num_channels):

        # load all filters for this channel
        for intra_block_filter in ovl.arange(filters_per_worker):
            for f_pos in ovl.arange(filter_size):
                filter_index = [None, None, None]
                filter_index[c_axis] = cur_channel
                filter_index[n_axis] = ovl.cast(intra_block_filter, ovl.uint32) + cur_filter_block * filters_per_worker
                if kernel_orientation == 'as-is':
                    filter_index[e_axis] = f_pos
                elif kernel_orientation == 'flipped':
                    filter_index[e_axis] = filter_size - f_pos - 1
                else:
                    raise ValueError("kernel_orientation must be 'as-is' or 'flipped'")
                filter_block[intra_block_filter, f_pos] = v[filter_index]

        # load initial inputs for this channel
        buffer_head = ovl.variable(0, ovl.uint32)
        for intra_block_batch in ovl.arange(num_block_batches):
            cur_batch = intra_block_batch + cur_batch_block*batches_per_worker
            for f_pos in ovl.arange(filter_size):
                x_index = [None, None, None]
                x_index[c_axis] = cur_channel
                x_index[n_axis] = cur_batch

                x_elem_index = starting_element + ovl.cast(cur_element_block * elements_per_worker, ovl.uint64) + ovl.cast(f_pos, ovl.uint64)
                x_index[e_axis] = x_elem_index
                index_in_bounds = ovl.logical_and(x_elem_index >= 0, x_elem_index < num_elements)
                with ovl.if_(index_in_bounds):
                    input_block[intra_block_batch, f_pos] = x[x_index]
                with ovl.else_():
                    input_block[intra_block_batch, f_pos] = 0

        for intra_block_element in ovl.arange(num_block_elements):
            cur_elem = intra_block_element + cur_element_block*elements_per_worker
            for intra_block_batch in ovl.arange(num_block_batches):
                cur_batch = intra_block_batch + cur_batch_block*batches_per_worker
                for intra_block_filter in ovl.arange(num_block_filters):
                    for f_pos in ovl.arange(filter_size):
                        x_pos = (buffer_head + ovl.cast(f_pos, ovl.uint32)) % filter_size
                        cur_x = ovl.cast(input_block[intra_block_batch, x_pos], ovl.float64)
                        cur_v = ovl.cast(filter_block[intra_block_filter, f_pos], ovl.float64)
                        accum[intra_block_batch, intra_block_filter, intra_block_element] = \
                            accum[intra_block_batch, intra_block_filter, intra_block_element] + cur_x * cur_v

                # load new element
                x_index = [None, None, None]
                x_index[c_axis] = cur_channel
                x_index[n_axis] = cur_batch
                x_elem_index = starting_element + cur_elem + filter_size
                x_index[e_axis] = x_elem_index
                index_in_bounds = ovl.logical_and(x_elem_index >= 0, x_elem_index < num_elements)
                with ovl.if_(index_in_bounds):
                    input_block[intra_block_batch, buffer_head] = x[x_index]
                with ovl.else_():
                    input_block[intra_block_batch, buffer_head] = 0

            buffer_head <<= (buffer_head + 1) % filter_size

    for intra_block_batch in ovl.arange(num_block_batches):
        cur_batch = intra_block_batch + cur_batch_block*batches_per_worker
        for intra_block_filter in ovl.arange(num_block_filters):
            cur_filter = intra_block_filter + cur_filter_block*filters_per_worker
            for intra_block_element in ovl.arange(num_block_elements):
                cur_elem = intra_block_element + cur_element_block*elements_per_worker

                output_index = [None, None, None]
                output_index[n_axis] = cur_batch
                output_index[e_axis] = cur_elem
                output_index[c_axis] = cur_filter
                output[output_index] = ovl.cast(accum[intra_block_batch, intra_block_filter, intra_block_element],
                                                output.dtype)

    return output
コード例 #10
0
def triangles_op(startEdge, fromVertex, toVertex):
    """Counts the triangles in an undirected graph.

    Notice that this method assumes that the graph is given as an adjacency list where all lists with vertex neighbors
    are sorted.

    The parallel algorithm uses the following strategy. We map one thread per edge, This is also called the edge-based
    iterator strategy.

    The idea behind the algorithm is:
        1. Go over all edges (u, v).
        2. The neighboring indices for vertex u are N(u) and for vertex v are N(v).
        3. Increment the triangle counter by | N(u) /\ N(v) | where /\ is the set intersection operator.

    We enforce an order on the vertices that avoids counting the same triangle three times, instead each triangle is
    counted once.

    Attributes: None.

    The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the
    separate lists.

    :param startEdge: Indices into toVertex where edges start.
    :type startEdge: list.
    :param fromVertex: The from-vertex of each edge.
    :type fromVertex: list.
    :param toVertex: The to-vertex of each edge.
    :type toVertex: list.
    :return: Counts of triangles per edge.
    """
    iEdge           = ovl.position_in(toVertex.shape)[0]
    count           = ovl.output(toVertex.shape, ovl.uint64)
    nTriangle       = ovl.variable(0, ovl.uint64)

    iFromVertex     = ovl.variable(fromVertex[iEdge], fromVertex.dtype)
    iFromEdge       = ovl.variable(startEdge[iFromVertex], startEdge.dtype)
    iFromEdgeEnd    = ovl.variable(startEdge[iFromVertex + 1], startEdge.dtype)
    iiFromVertex    = ovl.variable(toVertex[iFromEdge], toVertex.dtype)

    iToVertex       = ovl.variable(toVertex[iEdge], toVertex.dtype)
    iToEdge         = ovl.variable(startEdge[iToVertex], startEdge.dtype)
    iToEdgeEnd      = ovl.variable(startEdge[iToVertex + 1], startEdge.dtype)
    iiToVertex      = ovl.variable(toVertex[iToEdge], toVertex.dtype)

    nMerge          = iToEdgeEnd-iToEdge + iFromEdgeEnd-iFromEdge # Maximum number of merges.

    # This construction is a work-around for simulating the function of a while loop.
    #TODO([email protected]): Replace this construct by a while loop once it is available in ovl.
    for iMerge in ovl.arange(nMerge):
        doMerge = ovl.logical_and(iFromEdge < iFromEdgeEnd, iToEdge < iToEdgeEnd)
        doMerge = ovl.logical_and(doMerge, iiFromVertex < iToVertex)

        with ovl.if_(doMerge):

            with ovl.if_(iiFromVertex < iiToVertex):
                iFromEdge <<= iFromEdge+1
                iiFromVertex <<= toVertex[iFromEdge]

            with ovl.elif_(iiFromVertex > iiToVertex):
                iToEdge <<= iToEdge+1
                iiToVertex <<= toVertex[iToEdge]

            with ovl.else_():
                nTriangle <<= nTriangle+1
                iFromEdge <<= iFromEdge+1
                iToEdge <<= iToEdge+1
                iiFromVertex <<= toVertex[iFromEdge]
                iiToVertex <<= toVertex[iToEdge]


    #TODO([email protected]): Use a reduction function that computes a partial or complete sum.
    count[iEdge] = nTriangle # Save the triangles for each edge.

    return count