def parseOffer(mo, i, j, start, end, unit, root): X = mo.group(1) unit.find('characterisation/type').text = 'Offer' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = '?' #maybe we can shorten this function and make a single loop for index in range(1, i + 1): N = mo.group(2 * index) R = mo.group(2 * index + 1) if index == 1: left = start + len(X) + 24 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Givable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) for index in range(i + 1, i + j + 1): N = mo.group(2 * index) R = mo.group(2 * index + 1) if index == i + 1: left = right + 5 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Receivable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right)
def parseOffer(mo, i, j, start, end, unit, root): X = mo.group(1) unit.find('characterisation/type').text = 'Offer' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = '?' #maybe we can shorten this function and make a single loop for index in range(1, i+1): N = mo.group(2*index) R = mo.group(2*index+1) if index == 1: left = start + len(X) + 24 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Givable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) for index in range(i+1, i+j+1): N = mo.group(2*index) R = mo.group(2*index+1) if index == i+1: left = right + 5 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Receivable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right)
def parse_offer(m, start, end, unit, root): """Reimplementation of parseOffer. Parameters ---------- m: TODO Match object for the offer. start: int Start of the offer. end: int End of the offer. unit: TODO XML element for this unit annotation. root: TODO Root of the XML tree. """ X = m.group('X') # 1. update the unit annotation: # * type = 'Offer' unit.find('characterisation/type').text = 'Offer' feats = unit.find('characterisation/featureSet') # * surface act = 'Assertion' f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' # * addressee = '?' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = '?' # 2. add 'Resource' annotations for both offered and asked resources # * resources offered # expected position of the leftmost character of the first one left = start + len(X) + 24 right = left # useful when m.group('V') is None if m.group('V') is not None: resources_offered = m.group('V').split(', ') for resource in resources_offered: right = left + len(resource) # end of span qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', 'Givable'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) # expected position of the leftmost character of the next # offered resource (if any) left = right + 2 # * resources asked left = right + 5 right = left if m.group('W') is not None: resources_asked = m.group('W').split(', ') for resource in resources_asked: right = left + len(resource) # end of span qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', 'Receivable'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) # expected position of the leftmost character of the next # asked resource (if any) left = right + 2
def add_units_annotations(tree, text): """Add units annotations on non-linguistic events. Parameters ---------- tree : XML tree extracted from the .aa file to modify text : string raw text extracted from the .ac file Returns ------- root : modified XML tree with additional units annotations on non-linguistic events """ root = tree #That's the moment I hope I didn't make any typo... RejectRegEx = re.compile(r'(.+) rejected trade offer\.') GetRegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood)\.') Get2RegEx = re.compile( r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)\.' ) #It is impossible in "Settlers of Catan" to get more than 2 different types of resources with one roll dice. #That's why we actually don't need to bother with complex regular expression since there are in fact just two cases to consider. :) MonopolyRegEx = re.compile( r'(.+) monopolized (clay|ore|sheep|wheat|wood)\.') Trader = '' def parse_offer(m, start, end, unit, root): """Reimplementation of parseOffer. Parameters ---------- m: TODO Match object for the offer. start: int Start of the offer. end: int End of the offer. unit: TODO XML element for this unit annotation. root: TODO Root of the XML tree. """ X = m.group('X') # 1. update the unit annotation: # * type = 'Offer' unit.find('characterisation/type').text = 'Offer' feats = unit.find('characterisation/featureSet') # * surface act = 'Assertion' f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' # * addressee = '?' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = '?' # 2. add 'Resource' annotations for both offered and asked resources # * resources offered # expected position of the leftmost character of the first one left = start + len(X) + 24 right = left # useful when m.group('V') is None if m.group('V') is not None: resources_offered = m.group('V').split(', ') for resource in resources_offered: right = left + len(resource) # end of span qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', 'Givable'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) # expected position of the leftmost character of the next # offered resource (if any) left = right + 2 # * resources asked left = right + 5 right = left if m.group('W') is not None: resources_asked = m.group('W').split(', ') for resource in resources_asked: right = left + len(resource) # end of span qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', 'Receivable'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) # expected position of the leftmost character of the next # asked resource (if any) left = right + 2 # the eventual Y (if m comes from BANK_OFFER_PROG) is currently unused def parse_trade(m, start, end, unit, root): """Reimplementation of parseTrade. Parameters ---------- m: TODO Match object for the offer. start: int Start of the offer. end: int End of the offer. unit: TODO XML element for this unit annotation. root: TODO Root of the XML tree. """ X = m.group('X') Y = m.group('Y') # 1. update the unit annotation: # * type = 'Offer' unit.find('characterisation/type').text = 'Accept' feats = unit.find('characterisation/featureSet') # * surface act = 'Assertion' f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' # * addressee = Y or 'All' if Y = 'the bank' or 'a port' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Y == 'the bank' or Y == 'a port': f_elm2.text = 'All' else: f_elm2.text = Y # 2. add 'Resource' annotations for both offered and asked resources # * resources offered # expected position of the leftmost character of the first one left = start + len(X) + 8 right = left # not sure it's useful here, but harmless anyway if m.group('V') is not None: resources_offered = m.group('V').split(', ') for resource in resources_offered: right = left + len(resource) # end of span qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', '?'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) left = right + 2 # * resources asked left = right + 5 # ' for ' right = left if m.group('W') is not None: resources_asked = m.group('W').split(', ') for resource in resources_asked: right = left + len(resource) qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) left = right + 2 for unit in root: if unit.findtext('characterisation/type') == 'NonplayerSegment': start = int( unit.find('positioning/start/singlePosition').get('index')) end = int(unit.find('positioning/end/singlePosition').get('index')) event = text[start:end] # WIP 2016-07-11 if OFFER_PROG.search(event) is not None: # <X> made an offer to trade <N1> <R1> for <N2> <R2>. m = OFFER_PROG.search(event) parse_offer(m, start, end, unit, root) Trader = m.group('X') continue elif BANK_OFFER_PROG.search(event) is not None: # <X> made an offer to trade <N1> <R1> for <N2> <R2> with # the bank or a port. m = BANK_OFFER_PROG.search(event) parse_offer(m, start, end, unit, root) Trader = m.group('X') continue elif TRADE_PROG.search(event) is not None: m = TRADE_PROG.search(event) parse_trade(m, start, end, unit, root) continue # end WIP 2016-07-11 elif RejectRegEx.search(event) != None: # <Y> rejected trade offer. mo = RejectRegEx.search(event) Y = mo.group(1) unit.find('characterisation/type').text = 'Refusal' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Trader != '': f_elm2.text = Trader else: f_elm2.text = 'All' continue elif event == "You can't make that trade.": unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Trader != '': f_elm2.text = Trader else: f_elm2.text = 'All' continue elif GetRegEx.search(event) != None: # <Y> gets <N> <R>. mo = GetRegEx.search(event) Y = mo.group(1) N = mo.group(2) R = mo.group(3) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' left = start + len(Y) + 6 right = end - 1 append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right, author=_AUTHOR) continue elif Get2RegEx.search(event) != None: # <Y> gets <N1> <R1>, <N2> <R2>. mo = Get2RegEx.search(event) Y = mo.group(1) N1 = mo.group(2) R1 = mo.group(3) N2 = mo.group(2) R2 = mo.group(3) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' left1 = start + len(Y) + 6 right1 = left1 + len(N1) + 1 + len(R1) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N1), ('Correctness', 'True'), ('Kind', R1)], left1, right1, author=_AUTHOR) left2 = right1 + 2 right2 = left2 + len(N2) + 1 + len(R2) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N2), ('Correctness', 'True'), ('Kind', R2)], left2, right2, author=_AUTHOR) continue elif MonopolyRegEx.search(event) != None: # <X> monopolized <R>. mo = MonopolyRegEx.search(event) X = mo.group(1) R = mo.group(2) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' right = end - 1 left = right - len(R) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', '?'), ('Correctness', 'True'), ('Kind', R)], left, right, author=_AUTHOR) continue else: unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' continue return root
def parse_trade(m, start, end, unit, root): """Reimplementation of parseTrade. Parameters ---------- m: TODO Match object for the offer. start: int Start of the offer. end: int End of the offer. unit: TODO XML element for this unit annotation. root: TODO Root of the XML tree. """ X = m.group('X') Y = m.group('Y') # 1. update the unit annotation: # * type = 'Offer' unit.find('characterisation/type').text = 'Accept' feats = unit.find('characterisation/featureSet') # * surface act = 'Assertion' f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' # * addressee = Y or 'All' if Y = 'the bank' or 'a port' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Y == 'the bank' or Y == 'a port': f_elm2.text = 'All' else: f_elm2.text = Y # 2. add 'Resource' annotations for both offered and asked resources # * resources offered # expected position of the leftmost character of the first one left = start + len(X) + 8 right = left # not sure it's useful here, but harmless anyway if m.group('V') is not None: resources_offered = m.group('V').split(', ') for resource in resources_offered: right = left + len(resource) # end of span qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', '?'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) left = right + 2 # * resources asked left = right + 5 # ' for ' right = left if m.group('W') is not None: resources_asked = m.group('W').split(', ') for resource in resources_asked: right = left + len(resource) qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) left = right + 2
def add_units_annotations(tree, text): """Add units annotations on non-linguistic events. Parameters ---------- tree : XML tree extracted from the .aa file to modify text : string raw text extracted from the .ac file Returns ------- root : modified XML tree with additional units annotations on non-linguistic events """ root = tree #That's the moment I hope I didn't make any typo... RejectRegEx = re.compile(r'(.+) rejected trade offer\.') GetRegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood)\.') Get2RegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)\.') #It is impossible in "Settlers of Catan" to get more than 2 different types of resources with one roll dice. #That's why we actually don't need to bother with complex regular expression since there are in fact just two cases to consider. :) MonopolyRegEx = re.compile(r'(.+) monopolized (clay|ore|sheep|wheat|wood)\.') Trader = '' def parse_offer(m, start, end, unit, root): """Reimplementation of parseOffer. Parameters ---------- m: TODO Match object for the offer. start: int Start of the offer. end: int End of the offer. unit: TODO XML element for this unit annotation. root: TODO Root of the XML tree. """ X = m.group('X') # 1. update the unit annotation: # * type = 'Offer' unit.find('characterisation/type').text = 'Offer' feats = unit.find('characterisation/featureSet') # * surface act = 'Assertion' f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' # * addressee = '?' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = '?' # 2. add 'Resource' annotations for both offered and asked resources # * resources offered # expected position of the leftmost character of the first one left = start + len(X) + 24 right = left # useful when m.group('V') is None if m.group('V') is not None: resources_offered = m.group('V').split(', ') for resource in resources_offered: right = left + len(resource) # end of span qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', 'Givable'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) # expected position of the leftmost character of the next # offered resource (if any) left = right + 2 # * resources asked left = right + 5 right = left if m.group('W') is not None: resources_asked = m.group('W').split(', ') for resource in resources_asked: right = left + len(resource) # end of span qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', 'Receivable'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) # expected position of the leftmost character of the next # asked resource (if any) left = right + 2 # the eventual Y (if m comes from BANK_OFFER_PROG) is currently unused def parse_trade(m, start, end, unit, root): """Reimplementation of parseTrade. Parameters ---------- m: TODO Match object for the offer. start: int Start of the offer. end: int End of the offer. unit: TODO XML element for this unit annotation. root: TODO Root of the XML tree. """ X = m.group('X') Y = m.group('Y') # 1. update the unit annotation: # * type = 'Offer' unit.find('characterisation/type').text = 'Accept' feats = unit.find('characterisation/featureSet') # * surface act = 'Assertion' f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' # * addressee = Y or 'All' if Y = 'the bank' or 'a port' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Y == 'the bank' or Y == 'a port': f_elm2.text = 'All' else: f_elm2.text = Y # 2. add 'Resource' annotations for both offered and asked resources # * resources offered # expected position of the leftmost character of the first one left = start + len(X) + 8 right = left # not sure it's useful here, but harmless anyway if m.group('V') is not None: resources_offered = m.group('V').split(', ') for resource in resources_offered: right = left + len(resource) # end of span qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', '?'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) left = right + 2 # * resources asked left = right + 5 # ' for ' right = left if m.group('W') is not None: resources_asked = m.group('W').split(', ') for resource in resources_asked: right = left + len(resource) qty, kind = resource.split(' ') append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', qty), ('Correctness', 'True'), ('Kind', kind)], left, right, author=_AUTHOR) left = right + 2 for unit in root: if unit.findtext('characterisation/type') == 'NonplayerSegment': start = int(unit.find('positioning/start/singlePosition').get( 'index')) end = int(unit.find('positioning/end/singlePosition').get( 'index')) event = text[start:end] # WIP 2016-07-11 if OFFER_PROG.search(event) is not None: # <X> made an offer to trade <N1> <R1> for <N2> <R2>. m = OFFER_PROG.search(event) parse_offer(m, start, end, unit, root) Trader = m.group('X') continue elif BANK_OFFER_PROG.search(event) is not None: # <X> made an offer to trade <N1> <R1> for <N2> <R2> with # the bank or a port. m = BANK_OFFER_PROG.search(event) parse_offer(m, start, end, unit, root) Trader = m.group('X') continue elif TRADE_PROG.search(event) is not None: m = TRADE_PROG.search(event) parse_trade(m, start, end, unit, root) continue # end WIP 2016-07-11 elif RejectRegEx.search(event) != None: # <Y> rejected trade offer. mo = RejectRegEx.search(event) Y = mo.group(1) unit.find('characterisation/type').text = 'Refusal' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Trader != '': f_elm2.text = Trader else: f_elm2.text = 'All' continue elif event == "You can't make that trade.": unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Trader != '': f_elm2.text = Trader else: f_elm2.text = 'All' continue elif GetRegEx.search(event) != None: # <Y> gets <N> <R>. mo = GetRegEx.search(event) Y = mo.group(1) N = mo.group(2) R = mo.group(3) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' left = start + len(Y) + 6 right = end - 1 append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right, author=_AUTHOR) continue elif Get2RegEx.search(event) != None: # <Y> gets <N1> <R1>, <N2> <R2>. mo = Get2RegEx.search(event) Y = mo.group(1) N1 = mo.group(2) R1 = mo.group(3) N2 = mo.group(2) R2 = mo.group(3) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' left1 = start + len(Y) + 6 right1 = left1 + len(N1) + 1 + len(R1) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N1), ('Correctness', 'True'), ('Kind', R1)], left1, right1, author=_AUTHOR) left2 = right1 + 2 right2 = left2 + len(N2) + 1 + len(R2) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N2), ('Correctness', 'True'), ('Kind', R2)], left2, right2, author=_AUTHOR) continue elif MonopolyRegEx.search(event) != None: # <X> monopolized <R>. mo = MonopolyRegEx.search(event) X = mo.group(1) R = mo.group(2) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' right = end - 1 left = right - len(R) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', '?'), ('Correctness', 'True'), ('Kind', R)], left, right, author=_AUTHOR) continue else: unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' continue return root
def add_units_annotations(tree, text): """ Add units annotations for non-linguistical event Parameters ---------- tree : XML tree extracted from the .aa file to modify text : string raw text extracted from the .ac file Returns ------- root : modified XML tree with units annotations for non-linguistical event """ root = tree #So I know this is like the ugliest possible way to solve a problem #but those offer/trade events can be really tricky #and for the moment I really see no other way than #considering every possibilities in a exhaustive way... Offer11RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) Offer12RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) Offer21RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) Offer13RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) Offer22RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) Offer31RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) Offer14RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) Offer23RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) Offer32RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) Offer41RegEx = re.compile( r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.' ) FromRegEx = re.compile(r'from (.+)') Trade11RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) Trade12RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) Trade21RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) Trade13RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) Trade22RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) Trade31RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) Trade14RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) Trade23RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) Trade32RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) Trade41RegEx = re.compile( r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.' ) #That's the moment I hope I didn't make any typo... RejectRegEx = re.compile(r'(.+) rejected trade offer\.') GetRegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood)\.') Get2RegEx = re.compile( r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)\.' ) #It is impossible in "Settlers of Catan" to get more than 2 different types of resources with one roll dice. #That's why we actually don't need to bother with complex regular expression since there are in fact just two cases to consider. :) MonopolyRegEx = re.compile( r'(.+) monopolized (clay|ore|sheep|wheat|wood)\.') Trader = '' def parseOffer(mo, i, j, start, end, unit, root): X = mo.group(1) unit.find('characterisation/type').text = 'Offer' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = '?' #maybe we can shorten this function and make a single loop for index in range(1, i + 1): N = mo.group(2 * index) R = mo.group(2 * index + 1) if index == 1: left = start + len(X) + 24 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Givable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) for index in range(i + 1, i + j + 1): N = mo.group(2 * index) R = mo.group(2 * index + 1) if index == i + 1: left = right + 5 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Receivable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) def parseTrade(mo, i, j, start, end, unit, root): X = mo.group(1) Y = mo.group(2 * (i + j + 1)) unit.find('characterisation/type').text = 'Accept' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Y == 'the bank' or Y == 'a port': f_elm2.text = 'All' else: f_elm2.text = Y #maybe we can shorten this function and make a single loop for index in range(1, i + 1): N = mo.group(2 * index) R = mo.group(2 * index + 1) if index == 1: left = start + len(X) + 8 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', '?'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) for index in range(i + 1, i + j + 1): N = mo.group(2 * index) R = mo.group(2 * index + 1) if index == i + 1: left = right + 5 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) for unit in root: if unit.findtext('characterisation/type') == 'NonplayerSegment': start = int( unit.find('positioning/start/singlePosition').get('index')) end = int(unit.find('positioning/end/singlePosition').get('index')) event = text[start:end] if Offer11RegEx.search( event ) != None: #<X> made an offer to trade <N1> <R1> for <N2> <R2>. mo = Offer11RegEx.search(event) parseOffer(mo, 1, 1, start, end, unit, root) Trader = mo.group(1) continue elif Offer12RegEx.search(event) != None: mo = Offer12RegEx.search(event) parseOffer(mo, 1, 2, start, end, unit, root) Trader = mo.group(1) continue elif Offer21RegEx.search(event) != None: mo = Offer21RegEx.search(event) parseOffer(mo, 2, 1, start, end, unit, root) Trader = mo.group(1) continue elif Offer13RegEx.search(event) != None: mo = Offer13RegEx.search(event) parseOffer(mo, 1, 3, start, end, unit, root) Trader = mo.group(1) continue elif Offer22RegEx.search(event) != None: mo = Offer22RegEx.search(event) parseOffer(mo, 2, 2, start, end, unit, root) Trader = mo.group(1) continue elif Offer31RegEx.search(event) != None: mo = Offer31RegEx.search(event) parseOffer(mo, 3, 1, start, end, unit, root) Trader = mo.group(1) continue elif Offer14RegEx.search(event) != None: mo = Offer14RegEx.search(event) parseOffer(mo, 1, 4, start, end, unit, root) Trader = mo.group(1) continue elif Offer23RegEx.search(event) != None: mo = Offer23RegEx.search(event) parseOffer(mo, 2, 3, start, end, unit, root) Trader = mo.group(1) continue elif Offer32RegEx.search(event) != None: mo = Offer32RegEx.search(event) parseOffer(mo, 3, 2, start, end, unit, root) Trader = mo.group(1) continue elif Offer41RegEx.search(event) != None: mo = Offer41RegEx.search(event) parseOffer(mo, 4, 1, start, end, unit, root) Trader = mo.group(1) continue elif Trade11RegEx.search( event ) != None: #<X> traded <N1> <R1> for <N2> <R2> from <Y>. parseTrade(Trade11RegEx.search(event), 1, 1, start, end, unit, root) continue elif Trade12RegEx.search(event) != None: parseTrade(Trade12RegEx.search(event), 1, 2, start, end, unit, root) continue elif Trade21RegEx.search(event) != None: parseTrade(Trade21RegEx.search(event), 2, 1, start, end, unit, root) continue elif Trade13RegEx.search(event) != None: parseTrade(Trade13RegEx.search(event), 1, 3, start, end, unit, root) continue elif Trade22RegEx.search(event) != None: parseTrade(Trade22RegEx.search(event), 2, 2, start, end, unit, root) continue elif Trade31RegEx.search(event) != None: parseTrade(Trade31RegEx.search(event), 3, 1, start, end, unit, root) continue elif Trade14RegEx.search(event) != None: parseTrade(Trade14RegEx.search(event), 1, 4, start, end, unit, root) continue elif Trade23RegEx.search(event) != None: parseTrade(Trade23RegEx.search(event), 2, 3, start, end, unit, root) continue elif Trade32RegEx.search(event) != None: parseTrade(Trade32RegEx.search(event), 3, 2, start, end, unit, root) continue elif Trade41RegEx.search(event) != None: parseTrade(Trade41RegEx.search(event), 4, 1, start, end, unit, root) continue elif RejectRegEx.search(event) != None: #<Y> rejected trade offer. mo = RejectRegEx.search(event) Y = mo.group(1) unit.find('characterisation/type').text = 'Refusal' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Trader != '': f_elm2.text = Trader else: f_elm2.text = 'All' continue elif event == "You can't make that trade.": unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Trader != '': f_elm2.text = Trader else: f_elm2.text = 'All' continue elif GetRegEx.search(event) != None: #<Y> gets <N> <R>. mo = GetRegEx.search(event) Y = mo.group(1) N = mo.group(2) R = mo.group(3) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' left = start + len(Y) + 6 right = end - 1 append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) continue elif Get2RegEx.search( event) != None: #<Y> gets <N1> <R1>, <N2> <R2>. mo = Get2RegEx.search(event) Y = mo.group(1) N1 = mo.group(2) R1 = mo.group(3) N2 = mo.group(2) R2 = mo.group(3) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' left1 = start + len(Y) + 6 right1 = left1 + len(N1) + 1 + len(R1) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N1), ('Correctness', 'True'), ('Kind', R1)], left1, right1) left2 = right1 + 2 right2 = left2 + len(N2) + 1 + len(R2) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N2), ('Correctness', 'True'), ('Kind', R2)], left2, right2) continue elif MonopolyRegEx.search(event) != None: #<X> monopolized <R>. mo = MonopolyRegEx.search(event) X = mo.group(1) R = mo.group(2) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' right = end - 1 left = right - len(R) append_unit( root, 'Resource', [('Status', 'Possessed'), ('Quantity', '?'), ('Correctness', 'True'), ('Kind', R)], left, right, ) continue else: unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' continue return root
def add_units_annotations(tree, text): """ Add units annotations for non-linguistical event Parameters ---------- tree : XML tree extracted from the .aa file to modify text : string raw text extracted from the .ac file Returns ------- root : modified XML tree with units annotations for non-linguistical event """ root = tree #So I know this is like the ugliest possible way to solve a problem #but those offer/trade events can be really tricky #and for the moment I really see no other way than #considering every possibilities in a exhaustive way... Offer11RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') Offer12RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') Offer21RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') Offer13RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') Offer22RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') Offer31RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') Offer14RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') Offer23RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') Offer32RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') Offer41RegEx = re.compile(r'(.+) made an offer to trade (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood)( from the bank or a port)?\.') FromRegEx = re.compile(r'from (.+)') Trade11RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') Trade12RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') Trade21RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') Trade13RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') Trade22RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') Trade31RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') Trade14RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') Trade23RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') Trade32RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') Trade41RegEx = re.compile(r'(.+) traded (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood) for (\d+) (clay|ore|sheep|wheat|wood) from (.+)\.') #That's the moment I hope I didn't make any typo... RejectRegEx = re.compile(r'(.+) rejected trade offer\.') GetRegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood)\.') Get2RegEx = re.compile(r'(.+) gets (\d+) (clay|ore|sheep|wheat|wood), (\d+) (clay|ore|sheep|wheat|wood)\.') #It is impossible in "Settlers of Catan" to get more than 2 different types of resources with one roll dice. #That's why we actually don't need to bother with complex regular expression since there are in fact just two cases to consider. :) MonopolyRegEx = re.compile(r'(.+) monopolized (clay|ore|sheep|wheat|wood)\.') Trader = '' def parseOffer(mo, i, j, start, end, unit, root): X = mo.group(1) unit.find('characterisation/type').text = 'Offer' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = '?' #maybe we can shorten this function and make a single loop for index in range(1, i+1): N = mo.group(2*index) R = mo.group(2*index+1) if index == 1: left = start + len(X) + 24 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Givable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) for index in range(i+1, i+j+1): N = mo.group(2*index) R = mo.group(2*index+1) if index == i+1: left = right + 5 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Receivable'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) def parseTrade(mo, i, j, start, end, unit, root): X = mo.group(1) Y = mo.group(2*(i+j+1)) unit.find('characterisation/type').text = 'Accept' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Y == 'the bank' or Y == 'a port': f_elm2.text = 'All' else: f_elm2.text = Y #maybe we can shorten this function and make a single loop for index in range(1, i+1): N = mo.group(2*index) R = mo.group(2*index+1) if index == 1: left = start + len(X) + 8 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', '?'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) for index in range(i+1, i+j+1): N = mo.group(2*index) R = mo.group(2*index+1) if index == i+1: left = right + 5 right = left + len(N) + 1 + len(R) else: left = right + 2 right = left + len(N) + 1 + len(R) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) for unit in root: if unit.findtext('characterisation/type') == 'NonplayerSegment': start = int(unit.find('positioning/start/singlePosition').get('index')) end = int(unit.find('positioning/end/singlePosition').get('index')) event = text[start:end] if Offer11RegEx.search(event) != None: #<X> made an offer to trade <N1> <R1> for <N2> <R2>. mo = Offer11RegEx.search(event) parseOffer(mo, 1, 1, start, end, unit, root) Trader = mo.group(1) continue elif Offer12RegEx.search(event) != None: mo = Offer12RegEx.search(event) parseOffer(mo, 1, 2, start, end, unit, root) Trader = mo.group(1) continue elif Offer21RegEx.search(event) != None: mo = Offer21RegEx.search(event) parseOffer(mo, 2, 1, start, end, unit, root) Trader = mo.group(1) continue elif Offer13RegEx.search(event) != None: mo = Offer13RegEx.search(event) parseOffer(mo, 1, 3, start, end, unit, root) Trader = mo.group(1) continue elif Offer22RegEx.search(event) != None: mo = Offer22RegEx.search(event) parseOffer(mo, 2, 2, start, end, unit, root) Trader = mo.group(1) continue elif Offer31RegEx.search(event) != None: mo = Offer31RegEx.search(event) parseOffer(mo, 3, 1, start, end, unit, root) Trader = mo.group(1) continue elif Offer14RegEx.search(event) != None: mo = Offer14RegEx.search(event) parseOffer(mo, 1, 4, start, end, unit, root) Trader = mo.group(1) continue elif Offer23RegEx.search(event) != None: mo = Offer23RegEx.search(event) parseOffer(mo, 2, 3, start, end, unit, root) Trader = mo.group(1) continue elif Offer32RegEx.search(event) != None: mo = Offer32RegEx.search(event) parseOffer(mo, 3, 2, start, end, unit, root) Trader = mo.group(1) continue elif Offer41RegEx.search(event) != None: mo = Offer41RegEx.search(event) parseOffer(mo, 4, 1, start, end, unit, root) Trader = mo.group(1) continue elif Trade11RegEx.search(event) != None: #<X> traded <N1> <R1> for <N2> <R2> from <Y>. parseTrade(Trade11RegEx.search(event), 1, 1, start, end, unit, root) continue elif Trade12RegEx.search(event) != None: parseTrade(Trade12RegEx.search(event), 1, 2, start, end, unit, root) continue elif Trade21RegEx.search(event) != None: parseTrade(Trade21RegEx.search(event), 2, 1, start, end, unit, root) continue elif Trade13RegEx.search(event) != None: parseTrade(Trade13RegEx.search(event), 1, 3, start, end, unit, root) continue elif Trade22RegEx.search(event) != None: parseTrade(Trade22RegEx.search(event), 2, 2, start, end, unit, root) continue elif Trade31RegEx.search(event) != None: parseTrade(Trade31RegEx.search(event), 3, 1, start, end, unit, root) continue elif Trade14RegEx.search(event) != None: parseTrade(Trade14RegEx.search(event), 1, 4, start, end, unit, root) continue elif Trade23RegEx.search(event) != None: parseTrade(Trade23RegEx.search(event), 2, 3, start, end, unit, root) continue elif Trade32RegEx.search(event) != None: parseTrade(Trade32RegEx.search(event), 3, 2, start, end, unit, root) continue elif Trade41RegEx.search(event) != None: parseTrade(Trade41RegEx.search(event), 4, 1, start, end, unit, root) continue elif RejectRegEx.search(event) != None: #<Y> rejected trade offer. mo = RejectRegEx.search(event) Y = mo.group(1) unit.find('characterisation/type').text = 'Refusal' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Trader != '': f_elm2.text = Trader else: f_elm2.text = 'All' continue elif event == "You can't make that trade.": unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) if Trader != '': f_elm2.text = Trader else: f_elm2.text = 'All' continue elif GetRegEx.search(event) != None: #<Y> gets <N> <R>. mo = GetRegEx.search(event) Y = mo.group(1) N = mo.group(2) R = mo.group(3) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' left = start + len(Y) + 6 right = end - 1 append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N), ('Correctness', 'True'), ('Kind', R)], left, right) continue elif Get2RegEx.search(event) != None: #<Y> gets <N1> <R1>, <N2> <R2>. mo = Get2RegEx.search(event) Y = mo.group(1) N1 = mo.group(2) R1 = mo.group(3) N2 = mo.group(2) R2 = mo.group(3) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' left1 = start + len(Y) + 6 right1 = left1 + len(N1) + 1 + len(R1) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N1), ('Correctness', 'True'), ('Kind', R1)], left1, right1) left2 = right1 + 2 right2 = left2 + len(N2) + 1 + len(R2) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', N2), ('Correctness', 'True'), ('Kind', R2)], left2, right2) continue elif MonopolyRegEx.search(event) != None: #<X> monopolized <R>. mo = MonopolyRegEx.search(event) X = mo.group(1) R = mo.group(2) unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' right = end - 1 left = right - len(R) append_unit(root, 'Resource', [('Status', 'Possessed'), ('Quantity', '?'), ('Correctness', 'True'), ('Kind', R)], left, right,) continue else: unit.find('characterisation/type').text = 'Other' feats = unit.find('characterisation/featureSet') f_elm1 = ET.SubElement(feats, 'feature', {'name': 'Surface_act'}) f_elm1.text = 'Assertion' f_elm2 = ET.SubElement(feats, 'feature', {'name': 'Addressee'}) f_elm2.text = 'All' continue return root