Ejemplo n.º 1
0
def apply_biases_to_last_layer(graph, counts):
    """
    The idea is the following. If the user provides counts file, it is a file that contains log-apriory probabilities,
    technically it should be subtracted from the bias of the last layer unless it is a SoftMax.
    
    Case 1:
        weights ---\
        biases  ---\
    some layer  ---> AffineTransform ---> SoftMax
    
    Then, counts are applied to biases of Affine Transform:
    
        weights             ---\
        (biases - counts)   ---\
    some layer              ---> AffineTransform ---> SoftMax
    
    Case 2:
        weights ---\
        biases  ---\
    some layer  ---> AffineTransform
    
    Just takes the last layer and updates biases:
    
        weights             ---\
        (biases - counts)   ---\
    some layer              ---> AffineTransform
    
    Parameters
    ----------
    graph
    counts

    Returns
    -------

    """""
    outputs_ids = find_outputs(graph)
    for output in outputs_ids.copy():
        node = Node(graph, output)
        if node.in_node().op != 'Memory':
            continue
        outputs_ids.remove(output)

    if len(outputs_ids) > 1:
        raise Error('Ambiguity in applying counts to several outputs.')
    elif len(outputs_ids) == 0:
        raise Error('No outputs were found')

    node = Node(graph, outputs_ids[0])
    target_node = node.in_node()
    if target_node and target_node['op'] == 'SoftMax':
        data_node = target_node.in_node()
        target_node = data_node.in_node()

    biases_node = target_node.in_nodes()[2]  # first - input, second - weights, third - biases
    if biases_node.value is not None:
        biases_node.value = np.subtract(biases_node.value, counts)  # pylint: disable=assignment-from-no-return
    else:
        biases_node.value = counts * -1
        biases_node.shape = counts.shape
Ejemplo n.º 2
0
def apply_biases_to_last_layer(graph, counts):
    """
    When user provides counts file, it is a file that contains log-apriory probabilities,
    technically it should be subtracted from the bias of the last layer unless it is a SoftMax.

    Case 1:
        weights ---\
        biases  ---\
    some layer  ---> AffineTransform ---> SoftMax

    Then, counts are applied to biases of Affine Transform:

        weights             ---\
        (biases - counts)   ---\
    some layer              ---> AffineTransform ---> SoftMax

    Case 2:
        weights ---\
        biases  ---\
    some layer  ---> AffineTransform

    Just takes the last layer and updates biases:

        weights             ---\
        (biases - counts)   ---\
    some layer              ---> AffineTransform

    Parameters
    ----------
    graph
    counts

    Returns
    -------

    """ ""
    outputs_ids = find_outputs(graph)
    for output in outputs_ids.copy():
        node = Node(graph, output)
        if node.op != 'Memory':
            continue
        outputs_ids.remove(output)

    if len(outputs_ids) > 1:
        raise Error('Ambiguity in applying counts to several outputs.')
    elif len(outputs_ids) == 0:
        raise Error('No outputs were found')

    target_node = Node(graph, outputs_ids[0])
    if target_node.op == 'SoftMax':
        target_node = target_node.in_port(0).get_source().node

    sub_node = create_op_node_with_second_input(graph, Add, -counts,
                                                {'name': 'sub_counts'})
    target_node.out_port(0).get_connection().set_source(sub_node.out_port(0))
    sub_node.in_port(0).connect(target_node.out_port(0))