예제 #1
0
	def compose_edge(self, edge, max_size, minimal_only=False, aligned_nodes=[]):	
		tail_choices = []
		for tail in edge.tails:
			choices = []
			choices.append(((tail,), 1.0, None))

			# Composing through virtual nodes leads to us counting a ton of stuff twice.
			# Note that composing through virtual nodes will never add any extra edges either.
			if not tail.is_virtual and (not minimal_only or tail not in aligned_nodes):
				# In minimal mode, we don't compose through aligned nodes
				if not minimal_only or not tail in aligned_nodes:
					for child_edge in tail.get_child_edges(self):
						# In minimal mode, each tail must be an aligned node.
						is_valid = True
						if minimal_only:
							for t in child_edge.tails:
								if t not in aligned_nodes and not t.is_terminal_flag:
									is_valid = False
									break
						if not is_valid:
							continue
				
						if len(child_edge.tails) <= max_size - len(edge.tails) + 1:
							choices.append((child_edge.tails, self.weights[child_edge], child_edge))
			tail_choices.append(choices)	

		if len(tail_choices) > max_size:
			return

		for chosen_child_edges in enumerate_subsets(tail_choices):
			new_tails = []
			new_weight = self.weights[edge]
			composed_edges = [edge]
			for tail, weight, internal_edge in chosen_child_edges:
				assert len(tail) >= 1
				new_tails += tail
				new_weight *= weight
				if internal_edge is not None:
					composed_edges.append(internal_edge)

			if len(new_tails) <= max_size:
				new_edge = Edge(edge.head, tuple(new_tails), True)
				assert len(composed_edges) > 0
				new_edge.composed_edges = tuple(composed_edges)
				if edge.tails != new_edge.tails:
					self.add(new_edge, new_weight)
예제 #2
0
def add_experimental_virtual_edges(target_tree, source_tree, s2t_node_alignments, t2s_node_alignments, target_terminals):
	def project(source_node):
		alignments = s2t_node_alignments[source_node]
		#assert len(alignments) <= 1 # TODO: Could unaligned words invalidate this?
		return list(alignments)[0] if len(alignments) == 1 else None

	# Derivation[source_node] will hold the minimal way(s) of representing source_node using minimal constituents.
	# For terminals and well-aligned NTs, there is only one such way: using the node itself.
	# For NTs that are not node aligned, we will find sets of minimally aligned children that cover source_node.
	derivations = {}
	for source_node in source_tree.topsort():
		derivations[source_node] = []
		if source_node.is_terminal_flag:
			derivation = (source_node,)
			derivations[source_node].append((derivation, []))
		elif project(source_node) != None:
			derivation = (source_node,)
			derivations[source_node].append((derivation, []))
		else:	
			for edge in source_tree.head_index[source_node]:
				for subset in enumerate_subsets([derivations[tail] for tail in edge.tails]):
					derivation = reduce(operator.add, [derivation for derivation, _ in subset])
					skipped_edges = reduce(operator.add, [edges for _, edges in subset])
					for node in derivation:
						assert len(s2t_node_alignments[node]) >= 1 or node.is_terminal_flag
					derivations[source_node].append((derivation, [edge] + skipped_edges))	

	for edge in source_tree.edges.copy():
		source_head = edge.head
		for target_head in s2t_node_alignments[source_head]:
			for source_subset in enumerate_subsets([derivations[tail] for tail in edge.tails]):
				source_tails = reduce(operator.add, [derivation for derivation, _ in source_subset])
				composed_edge = Edge(source_head, source_tails)
				skipped_edges = reduce(operator.add, [edges for _, edges in source_subset])
				if len(skipped_edges) > 0:
					composed_edge.composed_edges = tuple([edge] + skipped_edges)
					composed_edge.is_composed = True
					assert len(edge.composed_edges) == 0
				if composed_edge != edge:
					assert len(skipped_edges) > 0
					source_tree.add(composed_edge)
				for target_subset in enumerate_subsets([list(s2t_node_alignments[tail]) for tail in source_tails if not tail.is_terminal_flag]):
					target_tails = target_subset
					for i in range(*target_head.span):
						is_included = False
						for tail in target_tails:
							if i >= tail.span.start and i < tail.span.end:
								is_included = True
								break
						if not is_included:
							target_tails.append(target_terminals[i])
					target_tails = tuple(sorted(target_tails, key=lambda node: node.span.start))
					virtual_edge = Edge(target_head, target_tails)	
					target_tree.add(virtual_edge)

	return
		
	for source_node in source_tree.topsort():
		head = project(source_node)
		if head == None:
			print >>sys.stderr, str(source_node), 'is unaligned'
			continue
		else:
			print >>sys.stderr, str(source_node), 'is aligned to', str(head)
		for edge in source_tree.head_index[source_node]:
			tails = []
			valid = True
			for tail in edge.tails:
				projection = project(tail)
				if projection is None:
					valid = False
					break
				tails.append(projection)
			if valid:
				virtual_edge = Edge(head, tuple(tails))
				target_tree.add(virtual_edge)
				print >>sys.stderr, head, tails