def apply_biases_to_last_layer(graph, counts): """ The idea is the following. If the user provides counts file, it is a file that contains log-apriory probabilities, technically it should be subtracted from the bias of the last layer unless it is a SoftMax. Case 1: weights ---\ biases ---\ some layer ---> AffineTransform ---> SoftMax Then, counts are applied to biases of Affine Transform: weights ---\ (biases - counts) ---\ some layer ---> AffineTransform ---> SoftMax Case 2: weights ---\ biases ---\ some layer ---> AffineTransform Just takes the last layer and updates biases: weights ---\ (biases - counts) ---\ some layer ---> AffineTransform Parameters ---------- graph counts Returns ------- """"" outputs_ids = find_outputs(graph) for output in outputs_ids.copy(): node = Node(graph, output) if node.in_node().op != 'Memory': continue outputs_ids.remove(output) if len(outputs_ids) > 1: raise Error('Ambiguity in applying counts to several outputs.') elif len(outputs_ids) == 0: raise Error('No outputs were found') node = Node(graph, outputs_ids[0]) target_node = node.in_node() if target_node and target_node['op'] == 'SoftMax': data_node = target_node.in_node() target_node = data_node.in_node() biases_node = target_node.in_nodes()[2] # first - input, second - weights, third - biases if biases_node.value is not None: biases_node.value = np.subtract(biases_node.value, counts) # pylint: disable=assignment-from-no-return else: biases_node.value = counts * -1 biases_node.shape = counts.shape
def apply_biases_to_last_layer(graph, counts): """ When user provides counts file, it is a file that contains log-apriory probabilities, technically it should be subtracted from the bias of the last layer unless it is a SoftMax. Case 1: weights ---\ biases ---\ some layer ---> AffineTransform ---> SoftMax Then, counts are applied to biases of Affine Transform: weights ---\ (biases - counts) ---\ some layer ---> AffineTransform ---> SoftMax Case 2: weights ---\ biases ---\ some layer ---> AffineTransform Just takes the last layer and updates biases: weights ---\ (biases - counts) ---\ some layer ---> AffineTransform Parameters ---------- graph counts Returns ------- """ "" outputs_ids = find_outputs(graph) for output in outputs_ids.copy(): node = Node(graph, output) if node.op != 'Memory': continue outputs_ids.remove(output) if len(outputs_ids) > 1: raise Error('Ambiguity in applying counts to several outputs.') elif len(outputs_ids) == 0: raise Error('No outputs were found') target_node = Node(graph, outputs_ids[0]) if target_node.op == 'SoftMax': target_node = target_node.in_port(0).get_source().node sub_node = create_op_node_with_second_input(graph, Add, -counts, {'name': 'sub_counts'}) target_node.out_port(0).get_connection().set_source(sub_node.out_port(0)) sub_node.in_port(0).connect(target_node.out_port(0))