def resolve_coreferences(self, all_tokenized_sentences, all_tags, all_lemmas, all_heads, all_deprels, all_entity_tags, language): worker = self.get_worker(language) sents = [] for j, tokenized_sentence in enumerate(all_tokenized_sentences): sent = NLPSentence() sent['words'] = tokenized_sentence sent['tags'] = all_tags[j] sent['lemmas'] = all_lemmas[j] sent['heads'] = all_heads[j] # Convert from 1-based indexing for back-compatibility. #sent['heads'] = [h-1 for h in all_heads[j]] sent['dependency_relations'] = all_deprels[j] # For now, don't use this (must be coded as spans). sent['entity_tags'] = all_entity_tags[j] sents.append(sent) doc = NLPDocument(sents) doc.compute_coreferences(worker) # Convert from spans to coref info. all_coref_info = [] for sent in doc['sentences']: spans = [] for (start, end, name) in sent['coreference_spans']: if sys.version_info[0] == 2: span = Span(start, end, name) if sys.version_info[0] == 3: span = Span(start, end, name.decode(encoding='UTF-8')) spans.append(span) coref_info = nlp_utils.construct_coreference_info_from_spans( \ spans, len(sent['words'])) all_coref_info.append(coref_info) return all_coref_info
def remove_entites(train_insts: List[Instance], config: Config) -> None: """ Remove certain number of entities and make them become O label :param train_insts: :param config: :return: """ all_spans = [] for inst in train_insts: output = inst.output start = -1 for i in range(len(output)): if output[i].startswith("B-"): start = i if output[i].startswith("E-"): end = i all_spans.append( Span(start, end, output[i][2:], inst_id=inst.id)) if output[i].startswith("S-"): all_spans.append(Span(i, i, output[i][2:], inst_id=inst.id)) shuffle(all_spans) span_set = set() num_entity_removed = round(len(all_spans) * (1 - config.entity_keep_ratio)) for i in range(num_entity_removed): span = all_spans[i] id = span.inst_id output = train_insts[id].output for j in range(span.left, span.right + 1): output[j] = config.O span_str = ' '.join(train_insts[id].input.words[span.left:(span.right + 1)]) span_str = span.type + " " + span_str span_set.add(span_str) return span_set
def test_equality(self): """ .__eq__(), .__ne__() """ s1 = Span(now, now + timedelta(minutes=60)) s2 = Span(now, now + timedelta(minutes=60)) s3 = Span(now, now + timedelta(minutes=59)) self.assertTrue(s1 == s2) self.assertTrue(s1 != s3)
def try_align_as_single_concept(self,cur_var,cur_concept,amr,alignment,tokens,unmatched_vars,triples): span = None update = True rule_type = 'SingleConcept' tmp = cur_concept.rsplit('-',1) sense = None if not isinstance(cur_var,StrLiteral) and len(tmp) == 2 and re.match('[0-9]+',tmp[1]): sense = tmp[1] cur_concept = tmp[0].lower() for idx,token in tokens: t = token.lower() cur_concept = cur_concept.lower() if t == cur_concept: # exact match span = Span(idx,idx+1,Aligner.ENTITY_TAG_TABLE[rule_type],[t]) break elif self.fuzzy_match(t,cur_concept,Aligner.fuzzy_max_len): span = Span(idx,idx+1,Aligner.ENTITY_TAG_TABLE[rule_type],[t]) break elif self.is_neg_form(t,cur_concept): #print cur_concept neg_var = None span = Span(idx,idx+1,Aligner.ENTITY_TAG_TABLE['NegForm'],[t]) if 'polarity' in amr[cur_var]: neg_var = amr[cur_var]['polarity'][0] self.remove_aligned_concepts(cur_var,'polarity',neg_var,unmatched_vars,triples) alignment[neg_var].append(span) elif 'possible' in amr[cur_var]: posb_var = amr[cur_var]['possible'][0] neg_var = amr[posb_var]['polarity'][0] alignment[posb_var].append(span) alignment[neg_var].append(span) else: pass break elif self.WN_lemma_match(t,cur_concept,sense): span = Span(idx,idx+1,Aligner.ENTITY_TAG_TABLE[rule_type],[t]) break elif self.is_spec_form(t,cur_concept): span = Span(idx,idx+1,Aligner.ENTITY_TAG_TABLE[rule_type],[t]) break #elif len(cur_concept) > 1 and self.is_plural(t,cur_concept): # span = Span(idx,idx+1,Aligner.ENTITY_TAG_TABLE[rule_type],[t]) # break else: pass if span: alignment[cur_var].append(span) else: print >> sys.stderr, 'Variable/Concept %s/%s cannot be aligned'%(cur_var,cur_concept) #alignment[matched_variable].append(matched_variable) update = False return update,span
def test_intersects(self): """ .intersects(), .lintersects(), .rintersects() """ s1 = Span(now, now + timedelta(minutes=60)) s2 = Span(now + timedelta(minutes=30), now + timedelta(minutes=90)) self.assertTrue(s1.lintersects(s2)) self.assertFalse(s1.rintersects(s2)) self.assertTrue(s1.intersects(s2)) self.assertTrue(s2.rintersects(s1)) self.assertFalse(s2.lintersects(s1)) self.assertTrue(s2.intersects(s1))
def test_touches(self): """ .touches(), .ltouches(), .rtouches() """ s1 = Span(now, now + timedelta(minutes=60)) s2 = Span(now + timedelta(minutes=60), now + timedelta(minutes=120)) self.assertTrue(s1.ltouches(s2)) self.assertFalse(s1.rtouches(s2)) self.assertTrue(s1.touches(s2)) self.assertFalse(s2.ltouches(s1)) self.assertTrue(s2.rtouches(s1)) self.assertTrue(s2.touches(s1))
def source(source_file: SourceFile, args: Args) -> List[Instruction]: code: List[Instruction] = [] errors: List[ParseError] = [] span = source_file.span() if args.assertions: # An assertion at the start makes the property tests happy code.append(StartTapeAssertion(Span(source_file, 0, 0))) for sub in _split_on(span, set(['\n'])): try: code += _line(sub, args) except ParseError as err: errors.append(err) loops = [] for instr in code: if instr.loop_level_change() == 1: loops.append(instr) elif instr.loop_level_change() == -1: if len(loops): loops.pop(-1) else: errors.append(SingleParseError('Unmatched "]"', instr.span())) elif instr.loop_level_change() != 0: assert False, 'Invalid value ' + str( instr.loop_level_change()) + ' for loop level change' for instr in loops: errors.append(SingleParseError('Unmatched "["', instr.span())) if errors: raise MultiParseError(errors) if args.optimize: optimize.optimize(code) return code
def _line(span: Span, args: Args) -> List[Instruction]: span = span.strip() text = span.text() if not text: return [] code = _code(span) if args.assertions and text[0] in ('=', '$'): if code: raise SingleParseError('Brainfuck code in assertion line', span) if text[0] == '=': return [_tape_assertion(span)] elif text[0] == '$': return [_test_input(span)] else: assert False, 'unreachable' else: return code
def _split_on(span: Span, split: Set[str]) -> List[Span]: start = 0 result: List[Span] = [] for i, c in enumerate(span.text() + list(split)[0]): if c in split: if i > start: result.append(span[start:i]) start = i + 1 return result
def test_encompasses(self): """ .encompasses(), .encompassed_by() """ s1 = Span(now, now + timedelta(minutes=60)) s2 = Span(now + timedelta(minutes=15), now + timedelta(minutes=45)) self.assertTrue(s1.encompasses(s2)) self.assertFalse(s2.encompasses(s1)) self.assertTrue(s2.encompassed_by(s1)) self.assertFalse(s1.encompassed_by(s2))
def get_spans(self, data): # создаем список с объектами пролетов for mark, span, length, lamp in zip(data.list_of_marks, data.list_of_span_support_numbers, data.list_of_length, data.list_of_lamps): flag = False if lamp: flag = True span = Span(mark, span, length, flag) self.spans.append(span)
def findFreeUsers(emails, reqStart, reqEnd): availableUsers =[] for email in emails: free = True eventList = getCalEvents(email, beg, done) for event in eventList: if Span.isConflict (eventStart, eventEnd, reqStart, reqEnd): free = False if free == True: availableUsers.append(email) return availableUsers
def evaluate(insts): p = 0 total_entity = 0 total_predict = 0 for inst in insts: output = inst.output prediction = inst.prediction #convert to span output_spans = set() start = -1 for i in range(len(output)): if output[i].startswith("B-"): start = i if output[i].startswith("E-"): end = i output_spans.add(Span(start, end, output[i][2:])) if output[i].startswith("S-"): output_spans.add(Span(i, i, output[i][2:])) predict_spans = set() for i in range(len(prediction)): if prediction[i].startswith("B-"): start = i if prediction[i].startswith("E-"): end = i predict_spans.add(Span(start, end, prediction[i][2:])) if prediction[i].startswith("S-"): predict_spans.add(Span(i, i, prediction[i][2:])) total_entity += len(output_spans) total_predict += len(predict_spans) p += len(predict_spans.intersection(output_spans)) precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0 recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0 fscore = 2.0 * precision * recall / ( precision + recall) if precision != 0 or recall != 0 else 0 return [precision, recall, fscore]
def _character_literal(span: Span) -> int: text = span.text() if text.startswith('\\'): if len(text) != 2 or text[1] not in escapes: raise SingleParseError('Invalid escape sequence: "' + text + '"', span) return ord(escapes[text[1]]) else: if len(text) > 1 or ord(text) >= 256: raise SingleParseError('Invalid character literal: "' + text + '"', span) return ord(text)
def construct_coreference_spans_from_text(span_lines): left_bracket = '(' right_bracket = ')' # characters_to_ignore = '*-' # name = '' # span_names_stack = [] # span_start_stack = [] spans = [] for i in xrange(len(span_lines)): line = span_lines[i] fields = line.split('|') for field in fields: if field[0] == left_bracket and field[-1] == right_bracket: start_position = i end_position = i name = field[1:-1] span = Span(start_position, end_position, name) spans.append(span) elif field[0] == left_bracket: start_position = i end_position = -1 name = field[1:] span = Span(start_position, end_position, name) spans.append(span) elif field[-1] == right_bracket: name = field[:-1] selected_span = None for span in reversed(spans): if span.name == name and span.end == -1: assert selected_span is None # , pdb.set_trace() selected_span = span break assert selected_span is not None # , pdb.set_trace() selected_span.end = i for span in spans: assert span.end != -1 return spans
def postProcessVerbList(amr, sent, alignment): for i, tok in enumerate(sent.split()): tok = tok.lower() start = i + 1 end = i + 2 if tok in VERB_LIST: subgraph_list = VERB_LIST[ tok] # multiple subgraph may be mapped to the token for subgraph in subgraph_list: variable, sub_match = amr.get_match( subgraph) # get the first match if sub_match and variable not in alignment: span = Span(start, end, tok, ETag('+'.join(sub_match))) alignment[variable].append(span)
def get_all_matches(self, text): """ returns list of start, end indices of all matches of filters in the text if several matches start at the same position, only keep the longest one :param text :return: """ spans_of_matches = [] for filter in self.filters: matches = re.finditer(filter, text) for match in matches: start, end = match.span() span = Span(start, end) spans_of_matches.append(span) return get_longest_spans(spans_of_matches)
def _matcher(span: Span) -> Matcher: text = span.text() if text.startswith('!'): return InverseMatcher(_matcher(span[1:])) if text == '*': return WildcardMatcher() if text.startswith('@'): return LiteralMatcher(text, _character_literal(span[1:])) number_matches = re.findall('^[0-9]+$', text) if number_matches: value = int(text) if value < 0 or value >= 256: raise SingleParseError( 'Invalid cell value ' + str(value) + ', must be in range 0-255', span) return LiteralMatcher(text, int(text)) ident_matches = re.findall('^[a-zA-Z_][a-zA-Z_0-9]*$', text) if ident_matches: return VariableMatcher(text) raise SingleParseError('Invalid assertion cell: "' + text + '"', span)
def readJAMRAlignment(amr, JAMR_alignment): alignment = defaultdict(list) s2c_alignment = defaultdict(list) # span to concept mapping for one_alignment in JAMR_alignment.split(): if one_alignment.startswith('*'): continue offset, fragment = one_alignment.split('|') start = int(offset.split('-')[0]) + 1 end = int(offset.split('-')[1]) + 1 posIDs = fragment.split('+') if len(posIDs) == 1: variable = amr.get_variable(posIDs[0]) if variable in amr.node_to_concepts: concept = amr.node_to_concepts[variable] span = Span(start, end, [concept], concept) else: # constant variable concept = variable span = Span(start, end, [concept], ConstTag(concept)) alignment[variable].append(span) s2c_alignment[(start, end)].append(variable) else: tokens = [] tags = [] level = 0 all_variables = [] succ_variables = [] pre_variable = None variable = None while posIDs: pid = posIDs.pop() #if pid == '0.2.1.0.0.0.0.0.0': # import pdb # pdb.set_trace() pre_level = level level = len(pid.split('.')) pre_variable = variable variable = amr.get_variable(pid) if variable == None: #import pdb #pdb.set_trace() raise Exception( 'Cannot find variable position id of %s' % (variable)) if pre_level > level: concept = amr.node_to_concepts[variable] concept_tag = concept #if pre_variable in amr.node_to_concepts: # concept_tag = concept+'@'+rel[0] succ_tags = [] for i, pre_var in enumerate( succ_variables): # revisit all the successors rel = amr.find_rel(variable, pre_var) cpt = amr.node_to_concepts[pre_var] if rel: succ_tags.append(rel[0] + '@' + cpt) #if i < len(succ_variables) - 1: tags.insert(0,'=') if succ_tags: tags.insert(0, '='.join(succ_tags)) tags.insert(0, concept_tag) succ_variables = [variable] all_variables.append(variable) else: if variable in amr.node_to_concepts: concept = amr.node_to_concepts[variable] tokens.insert(0, concept) #rel = amr.find_rel(variable,pre_variable) #concept_tag = #tags.insert(0,concept) succ_variables.append(variable) all_variables.append(variable) else: if variable == '-': # negation tags.insert(0, variable) tokens.insert(0, variable) span = Span(start, end, tokens, ETag('+'.join(tags))) for v in all_variables: alignment[v].append(span) s2c_alignment[(start, end)].extend(all_variables) return alignment, s2c_alignment
def __init__(self): # Create a Network object self.net = Network() nodes = [] # Create the nodes of the network n1 = Node(1, amplifier=Amplifier(target_gain=9)) nodes.append(n1) n2 = Node(2, amplifier=Amplifier(target_gain=9)) nodes.append(n2) n3 = Node(3) nodes.append(n3) n4 = Node(4, amplifier=Amplifier(target_gain=18)) nodes.append(n4) n5 = Node(5, amplifier=Amplifier(target_gain=18)) nodes.append(n5) n6 = Node(6, amplifier=Amplifier(target_gain=9)) nodes.append(n6) n7 = Node(7) nodes.append(n7) n8 = Node(8) nodes.append(n8) for node in nodes: self.net.add_node(node) links = [] # Create links of the network l1 = Link(n1, n2) links.append(l1) l2 = Link(n2, n3) links.append(l2) l3 = Link(n3, n4) links.append(l3) l4 = Link(n3, n5) links.append(l4) l5 = Link(n5, n6) links.append(l5) l6 = Link(n6, n7) links.append(l6) l7 = Link(n4, n7) links.append(l7) l8 = Link(n7, n8) links.append(l8) for link in links: self.net.add_link(link) # Create spans of the links fibre_attenuation = 0.2 span_link1 = Span(length=45, fibre_attenuation=fibre_attenuation) span_link2 = Span(length=70, fibre_attenuation=fibre_attenuation) span_link5 = Span(length=45, fibre_attenuation=fibre_attenuation) span_link6 = Span(length=20, fibre_attenuation=fibre_attenuation) span_link7 = Span(length=25, fibre_attenuation=fibre_attenuation) # Add spans to the links self.net.add_span_to_link( l1, span_link1, Amplifier(target_gain=9, wavelength_dependent_gain_id='wdg1')) self.net.add_span_to_link( l2, span_link2, Amplifier(target_gain=14, wavelength_dependent_gain_id='wdg1')) self.net.add_span_to_link( l5, span_link5, Amplifier(target_gain=9, wavelength_dependent_gain_id='wdg1')) self.net.add_span_to_link( l6, span_link6, Amplifier(target_gain=4, wavelength_dependent_gain_id='wdg1')) self.net.add_span_to_link( l7, span_link7, Amplifier(target_gain=5, wavelength_dependent_gain_id='wdg1')) self.net.build() route = [(n1, l1), (n2, l2), (n3, l4), (n5, l5), (n6, l6), (n7, l8), (n8, None)] # OpticalSignal index starts from 1 signals = [ OpticalSignal(83), OpticalSignal(81), OpticalSignal(82), OpticalSignal(84), OpticalSignal(85) ] self.net.transmit(n1, n8, signals, route) channel = 1529.2 + 83 * 0.4 osnr_values = [] spans_length = [] osnr_values.append( abs_to_db((10**(-2.0 / 10.0) * 0.8 - (10**(-39.0 / 10.0) * 4)) / (10**(-39.0 / 10.0)))) # print(abs_to_db((10**(-2.0/10.0)*0.8-(10**(-39.0/10.0)*4))/(10**(-39.0/10.0)))) spans_length.append(0) osnr = self.net.monitor(l1, span_link1, 83, links) # print("OSNR of channel %s (nm) is %s dB at span %s." % ( # str(channel), str(osnr), span_link1.span_id)) osnr_values.append(osnr) spans_length.append(span_link1.length) osnr = self.net.monitor(l2, span_link2, 83, links) # print("OSNR of channel %s (nm) is %s dB at span %s." % ( # str(channel), str(osnr), span_link2.span_id)) osnr_values.append(osnr) spans_length.append(span_link2.length) osnr = self.net.monitor(l5, span_link5, 83, links) # print("OSNR of channel %s (nm) is %s dB at span %s." % ( # str(channel), str(osnr), span_link5.span_id)) osnr_values.append(osnr) spans_length.append(span_link5.length) osnr = self.net.monitor(l6, span_link6, 83, links) # print("OSNR of channel %s (nm) is %s dB at span %s." % ( # str(channel), str(osnr), span_link6.span_id)) osnr_values.append(osnr) spans_length.append(span_link6.length) graphics = Graphic() graphics.plot_osnr_increment(osnr_values, spans_length)
def __init__(self, full_text, start, end, type): Span.__init__(self, full_text, start, end) self._type = type
def __init__(self, full_text, token_type, start, end): Span.__init__(self, full_text, start, end) self._token_type = token_type
def readJAMRAlignment(amr,JAMR_alignment): alignment = defaultdict(list) s2c_alignment = defaultdict(list) for one_alignment in JAMR_alignment.split(): if one_alignment.startswith('*'): continue offset, fragment = one_alignment.split('|') start = int(offset.split('-')[0])+1 end = int(offset.split('-')[1])+1 posIDs = fragment.split('+') if len(posIDs) == 1: variable = amr.get_variable(posIDs[0]) if variable in amr.node_to_concepts: concept = amr.node_to_concepts[variable] span = Span(start,end,[concept],concept) else: # constant variable concept = variable span = Span(start,end,[concept],ConstTag(concept)) alignment[variable].append(span) s2c_alignment[(start,end)].append(variable) else: tokens = [] tags = [] level = 0 all_variables = [] pre_variable = None variable = None while posIDs: pid = posIDs.pop() #if pid == '0.2.1.0.0.0.0.0.0': # import pdb # pdb.set_trace() pre_level = level level = len(pid.split('.')) pre_variable = variable variable = amr.get_variable(pid) if variable == None: import pdb pdb.set_trace() if pre_level > level: concept = amr.node_to_concepts[variable] rel = amr.find_rel(variable,pre_variable) concept_tag = concept if pre_variable in amr.node_to_concepts: concept_tag = concept+'@'+rel[0] tags.insert(0,concept_tag) all_variables.append(variable) else: if variable in amr.node_to_concepts: concept = amr.node_to_concepts[variable] tokens.insert(0,concept) #tags.insert(0,concept) all_variables.append(variable) else: if variable == '-': # negation tags.insert(0,variable) tokens.insert(0,variable) span = Span(start,end,tokens,ETag('+'.join(tags))) for v in all_variables:alignment[v].append(span) s2c_alignment[(start,end)].extend(all_variables) return alignment,s2c_alignment
def _code(span: Span) -> List[Instruction]: code: List[Instruction] = [] for i, c in enumerate(span.text()): if c in op_set: code.append(Op(c, span[i:i + 1])) return code
def __init__(self, full_text, start, end, indentation): Span.__init__(self, full_text, start, end) self._indentation = indentation self._tokens = list(self._tokenize())
def span(self): from span import Span return Span(self, 0, len(self._contents))
def __init__(self, topo='cian', link_length=1, span_length=1, channels=None): """ :param topo: topology to be used :param link_length: only used when topo='linear :param span_length: only used when topo='linear :param channels: only used when topo='linear """ # Create a Network object self.net = Network() if topo == 'cian': nodes = [] # Create the nodes of the network self.n1 = Node(1, amplifier=Amplifier(target_gain=9)) # Tx node nodes.append(self.n1) self.n2 = Node(2, amplifier=Amplifier(target_gain=9)) # in-line node nodes.append(self.n2) self.n3 = Node(3) # in-line node nodes.append(self.n3) self.n4 = Node(4, amplifier=Amplifier(target_gain=18)) # in-line node nodes.append(self.n4) self.n5 = Node(5, amplifier=Amplifier(target_gain=18)) # in-line node nodes.append(self.n5) self.n6 = Node(6, amplifier=Amplifier(target_gain=9)) # in-line node nodes.append(self.n6) self.n7 = Node(7) # in-line node nodes.append(self.n7) self.n8 = Node(8) # Rx node nodes.append(self.n8) # Add nodes to the network object for node in nodes: self.net.add_node(node) self.links = [] # Create links of the network self.l1 = Link(self.n1, self.n2) self.links.append(self.l1) self.l2 = Link(self.n2, self.n3) self.links.append(self.l2) self.l3 = Link(self.n3, self.n4) self.links.append(self.l3) self.l4 = Link(self.n3, self.n5) self.links.append(self.l4) self.l5 = Link(self.n5, self.n6) self.links.append(self.l5) self.l6 = Link(self.n6, self.n7) self.links.append(self.l6) self.l7 = Link(self.n4, self.n7) self.links.append(self.l7) self.l8 = Link(self.n7, self.n8) self.links.append(self.l8) # Add links to the network object for link in self.links: self.net.add_link(link) # Create spans of the links fibre_attenuation = 0.22 self.span_link1 = Span(length=45, fibre_attenuation=fibre_attenuation) self.span_link2 = Span(length=70, fibre_attenuation=fibre_attenuation) self.span_link5 = Span(length=45, fibre_attenuation=fibre_attenuation) self.span_link6 = Span(length=20, fibre_attenuation=fibre_attenuation) self.span_link7 = Span(length=25, fibre_attenuation=fibre_attenuation) # Add spans to the links self.net.add_span_to_link( self.l1, self.span_link1, Amplifier(target_gain=9.9, wavelength_dependent_gain_id='wdg1')) self.net.add_span_to_link( self.l2, self.span_link2, Amplifier(target_gain=15.4, wavelength_dependent_gain_id='wdg2')) self.net.add_span_to_link( self.l5, self.span_link5, Amplifier(target_gain=9.9, wavelength_dependent_gain_id='wdg2')) self.net.add_span_to_link( self.l6, self.span_link6, Amplifier(target_gain=4.4, wavelength_dependent_gain_id='wdg1')) self.net.add_span_to_link( self.l7, self.span_link7, Amplifier(target_gain=5.5, wavelength_dependent_gain_id='wdg2')) # Build network self.net.build() # Create a route to use for transmission route = [(self.n1, self.l1), (self.n2, self.l2), (self.n3, self.l4), (self.n5, self.l5), (self.n6, self.l6), (self.n7, self.l8), (self.n8, None)] # OpticalSignal index starts from 1 # Create OpticalSignal instances to sequencially add to transmission signals = [ OpticalSignal(81), OpticalSignal(82), OpticalSignal(83), OpticalSignal(84), OpticalSignal(85) ] # Invoke network function for transmission self.net.transmit(self.n1, self.n8, signals, route) if topo == 'linear': nodes = [] self.n1 = Node(1, amplifier=Amplifier(target_gain=9)) # Tx node nodes.append(self.n1) self.n2 = Node(2, amplifier=Amplifier(target_gain=9)) # in-line node nodes.append(self.n2) for node in nodes: self.net.add_node(node) links = [] self.l1 = Link(self.n1, self.n2) links.append(self.l1) for link in links: self.net.add_link(link) number_of_spans = link_length / span_length fibre_attenuation = 0.22 self.spans = [] while number_of_spans > 0: span = Span(length=span_length, fibre_attenuation=fibre_attenuation) self.net.add_span_to_link( self.l1, span, Amplifier(target_gain=span_length * fibre_attenuation, wavelength_dependent_gain_id='wdg1')) self.spans.append(span) number_of_spans -= 1 self.net.build() route = [(self.n1, self.l1), (self.n2, None)] # OpticalSignal index starts from 1 signals = [] for channel in channels: signals.append(OpticalSignal(channel)) self.net.transmit(self.n1, self.n2, signals, route)
# import span.py from datetime import datetime, date, time from span import Span d = date(2005, 7, 14) t = time(12, 30) start = datetime.combine(d, t) d = date(2005, 7, 14) t = time(12, 45) end = datetime.combine(d, t) span1 = Span(start, end) d = date(2005, 8, 14) t = time(12, 30) start = datetime.combine(d, t) d = date(2005, 8, 14) t = time(12, 45) end = datetime.combine(d, t) span2 = Span(start, end) print "testing printspans" spanlist = [span1, span2] span1.printSpans(spanlist) # list = [1,2,3,4]
def align_single_concept(self, sent, tokens, cur_var, amr, alignment, unmatched_vars, triples, NEXT=False): '''align single concept''' if cur_var in amr.node_to_concepts: cur_concept = amr.node_to_concepts[cur_var] else: cur_concept = cur_var if cur_var in alignment and not NEXT and not isinstance( cur_var, (StrLiteral, Quantity, Polarity)): # already aligned return True, sent, tokens match = self.concept_patterns.match(cur_concept) if match: rule_type = match.lastgroup span = None update = True if rule_type == "NameEntity": NE_items = [ v[0] for k, v in amr[cur_var].items() if isinstance(v[0], StrLiteral) ] nep = r'%s|%s' % (r'\s'.join(NE_items), r'\s'.join( n[:4] if len(n) > 3 else n for n in NE_items)) NE_pattern = re.compile(nep, re.IGNORECASE) start, end = self._search_sent(NE_pattern, sent, tokens) assert end - start == len(NE_items) span = Span(start, end, Aligner.ENTITY_TAG_TABLE[rule_type], NE_items) alignment[cur_var].append(span) for k, v in amr[cur_var].items(): if isinstance(v[0], StrLiteral): self.remove_aligned_concepts(cur_var, k, v[0], unmatched_vars, triples) elif rule_type in ["DateEntity", "haveOrgRole91", "RateEntity"]: EN_items = [] EN_spans = [] for k, v in amr[cur_var].items(): vconcept = amr.node_to_concepts[ v[0]] if v[0] in amr.node_to_concepts else v[0] EN_items.append(vconcept) success, sent, tokens = self.align_single_concept( sent, tokens, v[0], amr, alignment, unmatched_vars, triples) sp = alignment[v[0]][-1] sp.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type]) EN_spans.append(sp) self.remove_aligned_concepts(cur_var, k, v[0], unmatched_vars, triples) #print NE_spans,alignment start = EN_spans[0].start end = EN_spans[-1].end span = Span(start, end, Aligner.ENTITY_TAG_TABLE[rule_type], EN_items) span.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type]) alignment[cur_var].append(span) elif rule_type == "QuantityEntity": quantity = '' unit = '' unit_var = None q_success = False u_success = False for k, v in amr[cur_var].items(): if k == 'quant': quantity = v[0] q_success, sent, tokens = self.align_single_concept( sent, tokens, quantity, amr, alignment, unmatched_vars, triples) elif k == 'unit': unit_var = v[0] unit = amr.node_to_concepts[v[0]] u_success, sent, tokens = self.align_single_concept( sent, tokens, unit_var, amr, alignment, unmatched_vars, triples) else: pass if q_success and u_success: #QTY_pattern = r'(%s|%s)\s+(%s)s?' % (quantity,english_number(int(quantity)),unit) #QTY_items = [quantity,unit] #start,end = self._search_sent(QTY_pattern,QTY_items,sent,tokens) #assert end - start == len(QTY_items) quantity_span = alignment[quantity][-1] unit_span = alignment[unit_var][0] start = quantity_span.start if quantity_span.start < unit_span.end else unit_span.start end = unit_span.end if quantity_span.start < unit_span.end else quantity_span.end while not ( end - len(quantity_span.words) - len(unit_span.words) - start < 2 ): # wrong match more than one quantity to map in sentence alignment[quantity].pop() q_success, sent, tokens = self.align_single_concept( sent, tokens, quantity, amr, alignment, unmatched_vars, triples, NEXT=True) # redo it on updated sentence quantity_span = alignment[quantity][-1] start = quantity_span.start #assert start == end - 2 span = Span(start, end, Aligner.ENTITY_TAG_TABLE[rule_type], [quantity, unit]) self.remove_aligned_concepts(cur_var, 'quant', quantity, unmatched_vars, triples) alignment[cur_var].append(span) elif q_success and not u_success: # does not have unit or unit cannot be aligned quantity_span = alignment[quantity][0] start = quantity_span.start end = quantity_span.end span = Span(start, end, Aligner.ENTITY_TAG_TABLE[rule_type], [quantity]) self.remove_aligned_concepts(cur_var, 'quant', quantity, unmatched_vars, triples) alignment[cur_var].append(span) #self.remove_aligned_concepts(unmatched_vars,amr[cur_var].items()) elif not q_success and u_success: unit_span = alignment[unit_var][0] span = Span(unit_span.start, unit_span.end, Aligner.ENTITY_TAG_TABLE[rule_type], [unit]) self.remove_aligned_concepts(cur_var, 'unit', unit_var, unmatched_vars, triples) alignment[cur_var].append(span) else: rule_type = 'SingleConcept' elif rule_type == "Number": ''' aligned = False num = [cur_var] num.extend(english_number(int(cur_var)).split('|')) for i,token in tokens: if token.lower() in num: aligned = True break if aligned: span = Span(i,i+1,Aligner.ENTITY_TAG_TABLE[rule_type],[token]) alignment[cur_var].append(span) else: print >> sys.stderr, 'Variable/Concept %s/%s cannot be aligned'%(cur_var,cur_concept) update = False ''' if re.match('[0-9]+:[0-9]+', cur_concept): num = [('time', '(\\s|^)(' + cur_concept + ')(\\s|&)'), ('english', '(\\s|^)(' + to_time(cur_concept) + ')(\\s|&)')] else: num = [ ('digit', '(\\s|^)(' + cur_concept + '|' + format_num(cur_concept) + ')(\\s|&)'), ('string', '(\\s|^)(' + english_number(int(cur_concept)) + ')(\\s|&)'), ('order', '(\\s|^)(' + to_order(cur_concept) + ')(\\s|&)'), ('round', '(\\s|^)(' + to_round(int(cur_concept)) + ')(\\s|&)') ] NUM_pattern = self._compile_regex_rule(num) #print NUM_pattern.pattern try: start, end = self._search_sent(NUM_pattern, sent, tokens) span = Span( start, end, Aligner.ENTITY_TAG_TABLE[rule_type], [w for i, w in tokens if i in range(start, end)]) alignment[cur_var].append(span) except Exception as e: update = False print >> sys.stderr, e #raw_input('CONTINUE') elif rule_type == 'multiple': op1 = amr[cur_var]['op1'][0] success, sent, tokens = self.align_single_concept( sent, tokens, op1, amr, alignment, unmatched_vars, triples) if success: span = alignment[op1][0] alignment[cur_var].append(span) self.remove_aligned_concepts(cur_var, 'op1', op1, unmatched_vars, triples) else: update = False elif rule_type in [ "person", "picture", "country", "state", "city", "desert", "organization" ]: if 'name' in amr[cur_var]: k_var = amr[cur_var]['name'][0] success, sent, tokens = self.align_single_concept( sent, tokens, k_var, amr, alignment, unmatched_vars, triples) span = alignment[k_var][0] span.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type + '-name']) alignment[cur_var].append(span) else: ind, span = self.try_align_as_single_concept( cur_var, cur_concept, amr, alignment, tokens, unmatched_vars, triples) if ind: pass elif 'ARG0-of' in amr[cur_var]: k_var = amr[cur_var]['ARG0-of'][0] success, sent, tokens = self.align_single_concept( sent, tokens, k_var, amr, alignment, unmatched_vars, triples) if success: span = alignment[k_var][0] span.set_entity_tag( Aligner.ENTITY_TAG_TABLE[rule_type]) alignment[cur_var].append(span) else: update = False else: update = False elif rule_type == "NegPolarity": aligned = False for i, token in tokens: if token.lower() in Aligner.neg_polarity: aligned = True break if aligned: span = Span(i, i + 1, Aligner.ENTITY_TAG_TABLE[rule_type], [token]) alignment[cur_var].append(span) else: print >> sys.stderr, 'Variable/Concept %s/%s cannot be aligned' % ( cur_var, cur_concept) update = False elif rule_type == "thing": if 'ARG1-of' in amr[cur_var]: k_var = amr[cur_var]['ARG1-of'][0] success, sent, tokens = self.align_single_concept( sent, tokens, k_var, amr, alignment, unmatched_vars, triples) if success: span = alignment[k_var][0] span.set_entity_tag( Aligner.ENTITY_TAG_TABLE[rule_type]) alignment[cur_var].append(span) else: update = False else: rule_type = 'SingleConcept' elif rule_type == 'OrdinalEntity': val = amr[cur_var]['value'][0] success, sent, tokens = self.align_single_concept( sent, tokens, val, amr, alignment, unmatched_vars, triples) self.remove_aligned_concepts(cur_var, 'value', val, unmatched_vars, triples) span = alignment[val][0] span.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type]) alignment[cur_var].append(span) elif rule_type == 'RelativePosition': if 'direction' in amr[cur_var]: dir_var = amr[cur_var]['direction'][0] if amr.node_to_concepts[dir_var] == 'away': aligned = False for i, tok in tokens: if tok.lower() == 'from': aligned = True break if aligned: span = Span(i, i + 1, Aligner.ENTITY_TAG_TABLE[rule_type], [tok]) alignment[cur_var].append(span) alignment[dir_var].append(span) else: print >> sys.stderr, 'Variable/Concept %s/%s cannot be aligned' % ( cur_var, cur_concept) update = False else: rule_type = 'SingleConcept' else: rule_type = 'SingleConcept' elif self.is_ago(cur_var, cur_concept, amr): k_var = amr[cur_var]['op1'][0] aligned = False for i, tok in tokens: if tok.lower() == 'ago': aligned = True break if aligned: span = Span(i, i + 1, Aligner.ENTITY_TAG_TABLE['ago'], [tok]) alignment[cur_var].append(span) alignment[k_var].append(span) else: print >> sys.stderr, '(%s/%s) :op1 (%s/%s) cannot be aligned' % ( cur_var, cur_concept, k_var, amr.node_to_concepts[k_var]) update = False elif self.is_why_question(cur_var, amr): arg0_var = amr[cur_var]['ARG0'][0] aligned = False for i, tok in tokens: if tok.lower() == 'why': aligned = True break if aligned: span = Span(i, i + 1, Aligner.ENTITY_TAG_TABLE['cause'], [tok]) alignment[cur_var].append(span) alignment[arg0_var].append(span) else: print >> sys.stderr, '(%s/%s) :op1 (%s/%s) cannot be aligned' % ( cur_var, cur_concept, arg0_var, amr.node_to_concepts[arg0_var]) update = False else: pass if rule_type == "SingleConcept": update, span = self.try_align_as_single_concept( cur_var, cur_concept, amr, alignment, tokens, unmatched_vars, triples) elif cur_var in alignment: pass else: print >> sys.stderr, 'Can not find type of concept %s / %s' % ( cur_var, cur_concept) # update #print cur_concept,rule_type if update: tokens = [(i, tk) for i, tk in tokens if i not in range(span.start, span.end)] sent = ' '.join(x for i, x in tokens) if self.verbose > 2: print >> sys.stderr, "Concept '%s' Matched to span '%s' " % ( cur_concept, ' '.join( w for i, w in enumerate(sentence.split()) if i + 1 in range(span[0], span[1]))) print(sent) print(alignment) #raw_input('ENTER to continue') return update, sent, tokens
def span_align(self, sentence, amr): ''' use rules to align amr concepts to sentence spans ''' sent = sentence[:] alignment = defaultdict(list) alignment['root'] = 0 tokens = [(i + 1, x) for i, x in enumerate(sent.split())] unmatched_vars = list( set([ var for var in amr.bfs()[0] if not isinstance(var, StrLiteral) ])) while unmatched_vars: cur = unmatched_vars.pop(0) if cur in amr.node_to_concepts: cur_concept = amr.node_to_concepts[cur] else: cur_concept = cur match = self.concept_patterns.match(cur_concept) if match: rule_type = match.lastgroup span = None update = True if rule_type == "NameEntity": NE_items = [v[0] for k, v in amr[cur].items()] NE_pattern = re.compile(r"\s".join(NE_items), re.IGNORECASE) start, end = self._search_sent(NE_pattern, sent, tokens) assert end - start == len(NE_items) span = Span(start, end, Aligner.ENTITY_TAG_TABLE[rule_type], NE_items) alignment[cur].append(span) elif rule_type == "QuantityEntity": quantity = '' unit = '' unit_var = None for k, v in amr[cur].items(): if k == 'quant': quantity = v[0] elif k == 'unit': unit_var = v[0] unit = amr.node_to_concepts[v[0]] else: pass if quantity and unit: QTY_pattern = re.compile( '(%s|%s)\s+(%s)s?' % (quantity, english_number(int(quantity)), unit), re.IGNORECASE) QTY_items = [quantity, unit] start, end = self._search_sent(QTY_pattern, sent, tokens) assert end - start == len(QTY_items) span = Span(start, end, Aligner.ENTITY_TAG_TABLE[rule_type], QTY_items) alignment[cur].append(span) self.remove_aligned_concepts(unmatched_vars, amr[cur].items()) elif rule_type == "NegPolarity": aligned = False for i, token in tokens: if token.lower() in Aligner.neg_polarity: aligned = True break if aligned: span = Span(i, i + 1, Aligner.ENTITY_TAG_TABLE[rule_type], [token]) alignment[cur].append(span) else: update = False elif rule_type == "SingleConcept": tmp = cur_concept.rsplit('-', 1) sense = None if len(tmp) == 2: sense = tmp[1] cur_concept = tmp[0].lower() for idx, token in tokens: t = token.lower() if t == cur_concept: # exact match span = Span(idx, idx + 1, Aligner.ENTITY_TAG_TABLE[rule_type], [t]) break elif self.fuzzy_match(t, cur_concept, Aligner.fuzzy_max_len): span = Span(idx, idx + 1, Aligner.ENTITY_TAG_TABLE[rule_type], [t]) break elif self.is_neg_form(t, cur_concept): span = Span(idx, idx + 1, Aligner.ENTITY_TAG_TABLE[rule_type], [t]) break elif self.WN_lemma_match(t, cur_concept, sense): span = Span(idx, idx + 1, Aligner.ENTITY_TAG_TABLE[rule_type], [t]) break elif self.is_spec_form(t, cur_concept): span = Span(idx, idx + 1, Aligner.ENTITY_TAG_TABLE[rule_type], [t]) break else: pass if span: alignment[cur].append(span) else: print >> sys.stderr, 'Variable/Concept %s/%s cannot be aligned' % ( cur, cur_concept) #alignment[matched_variable].append(matched_variable) update = False else: raise Exception('Can not find type of concept %s / %s' % (cur, cur_concept)) # update if update: tokens = [(i, tk) for i, tk in tokens if i not in range(span.start, span.end)] sent = ' '.join(x for i, x in tokens) if self.verbose > 2: print >> sys.stderr, "Concept '%s' Matched to span '%s' " % ( cur_concept, ' '.join( w for i, w in enumerate(sentence.split()) if i + 1 in range(span[0], span[1]))) print(sent) print(alignment) #raw_input('ENTER to continue') return alignment
def setUp(self): self.empty_span_code = 'empty_span' self.emptySpan = Span(self.empty_span_code) self.span_code = 'test_span' self.testSpan = Span(self.span_code) self.unitDeltaTestsSpan = Span('unit_delta_tests_span') self.unit_label_one = 'unit_one' self.unit_delta_test_list_one = [0.3, 13.4, 0.8] self.unit_label_two = 'unit_two' self.unit_delta_test_list_two = [2.8, 13.7, 0.9] self.unit_label_three = 'unit_three' self.unit_delta_test_list_three = [10.9, 14.4, 15.3] self.unit_label_four = 'unit_four' self.unit_delta_test_list_four = [15.3, 16.3, 17.3] self.unit_label_five = 'unit_five' self.unit_delta_test_list_five = [37.3, 16.3, 34.1] self.unit_dict = {self.unit_label_one : self.unit_delta_test_list_one, self.unit_label_two : self.unit_delta_test_list_two, self.unit_label_three : self.unit_delta_test_list_three, self.unit_label_four : self.unit_delta_test_list_four, self.unit_label_five : self.unit_delta_test_list_five} self.expected_delta_tests_units_as_list = [] for label, list_of_prices in self.unit_dict.items(): for price in list_of_prices: self.unitDeltaTestsSpan.addSpanUnit([label], close_price = price, open_price = price) self.expected_delta_tests_units_as_list.append(price) self.max_unit_delta = 4.4 self.max_unit_delta_label = self.unit_label_three self.min_unit_delta = -3.2 self.min_unit_delta_label = self.unit_label_five self.max_unit_delta_percentage = 1.66667 self.max_unit_delta_percentage_label = self.unit_label_one self.min_unit_delta_percentage = -0.678571429 self.min_unit_delta_percentage_label = self.unit_label_two self.zero_tests_span_code = 'zero_tests' self.zeroTestsSpan = Span(self.zero_tests_span_code) self.zero_delta_span_code = 'zero_delta' self.zero_delta_span_list = [0.0, 3.1, 4.4, 0.0] self.expected_zero_tests_units_as_list = [] for value in self.zero_delta_span_list: self.zeroTestsSpan.addSpanUnit([self.zero_delta_span_code], value, value) self.expected_zero_tests_units_as_list.append(value) self.zero_delta_non_zero_close_span_code = 'zero_delta_non_zero_close' self.zero_delta_non_zero_close_span_list = [0.0, 3.1, 4.4, 2.0] for value in self.zero_delta_non_zero_close_span_list: self.zeroTestsSpan.addSpanUnit([self.zero_delta_non_zero_close_span_code], value, value) self.expected_zero_tests_units_as_list.append(value) self.zero_average_test_span_code = 'zero_span_close_average' self.zero_average_test_span_list = [0.0, -1.0, -1.0, 2.0] for value in self.zero_average_test_span_list: self.zeroTestsSpan.addSpanUnit([self.zero_average_test_span_code], value, value) self.expected_zero_tests_units_as_list.append(value) self.rangeTestSpan = Span('range_test_span') self.range_test_prices = [[5.6, 7.6, 6.6], [3.6, 5.3, 4.4], [10.1, 15.9, 13.0], [1.3, 7.6, 5.1], [0.0, None, 3.3], [None, 100.0, 3.3]] self.expected_span_range = 14.6 self.range_test_span_average_close_price = 5.95 self.expected_span_range_to_close_price_ratio = round(self.expected_span_range / self.range_test_span_average_close_price, 7) for open_and_close_price in self.range_test_prices: low_price, high_price, close_price = open_and_close_price self.rangeTestSpan.addSpanUnit(low_price = low_price, high_price = high_price, close_price = close_price)
def align_single_concept(self,sent,tokens,cur_var,amr,alignment,unmatched_vars,triples,NEXT=False): '''align single concept''' if cur_var in amr.node_to_concepts: cur_concept = amr.node_to_concepts[cur_var] else: cur_concept = cur_var if cur_var in alignment and not NEXT and not isinstance(cur_var,(StrLiteral,Quantity,Polarity)) : # already aligned return True, sent,tokens match = self.concept_patterns.match(cur_concept) if match: rule_type = match.lastgroup span = None update = True if rule_type == "NameEntity": NE_items = [v[0] for k,v in amr[cur_var].items() if isinstance(v[0],StrLiteral)] nep = r'%s|%s'%(r'\s'.join(NE_items),r'\s'.join(n[:4] if len(n) > 3 else n for n in NE_items)) NE_pattern = re.compile(nep,re.IGNORECASE) start,end = self._search_sent(NE_pattern,sent,tokens) assert end-start == len(NE_items) span = Span(start,end,Aligner.ENTITY_TAG_TABLE[rule_type],NE_items) alignment[cur_var].append(span) for k,v in amr[cur_var].items(): if isinstance(v[0],StrLiteral): self.remove_aligned_concepts(cur_var,k,v[0],unmatched_vars,triples) elif rule_type in ["DateEntity", "haveOrgRole91","RateEntity"]: EN_items = [] EN_spans = [] for k,v in amr[cur_var].items(): vconcept = amr.node_to_concepts[v[0]] if v[0] in amr.node_to_concepts else v[0] EN_items.append(vconcept) success, sent, tokens = self.align_single_concept(sent,tokens,v[0],amr,alignment,unmatched_vars,triples) sp = alignment[v[0]][-1] sp.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type]) EN_spans.append(sp) self.remove_aligned_concepts(cur_var,k,v[0],unmatched_vars,triples) #print NE_spans,alignment start = EN_spans[0].start end = EN_spans[-1].end span = Span(start,end,Aligner.ENTITY_TAG_TABLE[rule_type],EN_items) span.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type]) alignment[cur_var].append(span) elif rule_type == "QuantityEntity": quantity = '' unit = '' unit_var = None q_success = False u_success = False for k,v in amr[cur_var].items(): if k == 'quant': quantity = v[0] q_success, sent, tokens = self.align_single_concept(sent,tokens,quantity,amr,alignment,unmatched_vars,triples) elif k == 'unit': unit_var = v[0] unit = amr.node_to_concepts[v[0]] u_success, sent, tokens = self.align_single_concept(sent,tokens,unit_var,amr,alignment,unmatched_vars,triples) else: pass if q_success and u_success: #QTY_pattern = r'(%s|%s)\s+(%s)s?' % (quantity,english_number(int(quantity)),unit) #QTY_items = [quantity,unit] #start,end = self._search_sent(QTY_pattern,QTY_items,sent,tokens) #assert end - start == len(QTY_items) quantity_span = alignment[quantity][-1] unit_span = alignment[unit_var][0] start = quantity_span.start if quantity_span.start < unit_span.end else unit_span.start end = unit_span.end if quantity_span.start < unit_span.end else quantity_span.end while not (end - len(quantity_span.words)-len(unit_span.words) - start < 2): # wrong match more than one quantity to map in sentence alignment[quantity].pop() q_success, sent, tokens = self.align_single_concept(sent,tokens,quantity,amr,alignment,unmatched_vars,triples,NEXT=True) # redo it on updated sentence quantity_span = alignment[quantity][-1] start = quantity_span.start #assert start == end - 2 span = Span(start,end,Aligner.ENTITY_TAG_TABLE[rule_type],[quantity,unit]) self.remove_aligned_concepts(cur_var,'quant',quantity,unmatched_vars,triples) alignment[cur_var].append(span) elif q_success and not u_success: # does not have unit or unit cannot be aligned quantity_span = alignment[quantity][0] start = quantity_span.start end = quantity_span.end span = Span(start,end,Aligner.ENTITY_TAG_TABLE[rule_type],[quantity]) self.remove_aligned_concepts(cur_var,'quant',quantity,unmatched_vars,triples) alignment[cur_var].append(span) #self.remove_aligned_concepts(unmatched_vars,amr[cur_var].items()) elif not q_success and u_success: unit_span = alignment[unit_var][0] span = Span(unit_span.start,unit_span.end,Aligner.ENTITY_TAG_TABLE[rule_type],[unit]) self.remove_aligned_concepts(cur_var,'unit',unit_var,unmatched_vars,triples) alignment[cur_var].append(span) else: rule_type = 'SingleConcept' elif rule_type == "Number": ''' aligned = False num = [cur_var] num.extend(english_number(int(cur_var)).split('|')) for i,token in tokens: if token.lower() in num: aligned = True break if aligned: span = Span(i,i+1,Aligner.ENTITY_TAG_TABLE[rule_type],[token]) alignment[cur_var].append(span) else: print >> sys.stderr, 'Variable/Concept %s/%s cannot be aligned'%(cur_var,cur_concept) update = False ''' if re.match('[0-9]+:[0-9]+',cur_concept): num = [('time','(\\s|^)('+cur_concept+')(\\s|&)'), ('english','(\\s|^)('+to_time(cur_concept)+')(\\s|&)')] else: num = [('digit','(\\s|^)('+cur_concept+'|'+format_num(cur_concept)+')(\\s|&)'), ('string','(\\s|^)('+english_number(int(cur_concept))+')(\\s|&)'), ('order','(\\s|^)('+to_order(cur_concept)+')(\\s|&)'), ('round','(\\s|^)('+to_round(int(cur_concept))+')(\\s|&)') ] NUM_pattern = self._compile_regex_rule(num) #print NUM_pattern.pattern try: start,end = self._search_sent(NUM_pattern,sent,tokens) span = Span(start,end,Aligner.ENTITY_TAG_TABLE[rule_type],[w for i,w in tokens if i in range(start,end)]) alignment[cur_var].append(span) except Exception as e: update = False print >> sys.stderr,e #raw_input('CONTINUE') elif rule_type == 'multiple': op1 = amr[cur_var]['op1'][0] success, sent, tokens = self.align_single_concept(sent,tokens,op1,amr,alignment,unmatched_vars,triples) if success: span = alignment[op1][0] alignment[cur_var].append(span) self.remove_aligned_concepts(cur_var,'op1',op1,unmatched_vars,triples) else: update = False elif rule_type in ["person","picture","country","state","city","desert","organization"]: if 'name' in amr[cur_var]: k_var = amr[cur_var]['name'][0] success, sent, tokens = self.align_single_concept(sent,tokens,k_var,amr,alignment,unmatched_vars,triples) span = alignment[k_var][0] span.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type+'-name']) alignment[cur_var].append(span) else: ind,span = self.try_align_as_single_concept(cur_var,cur_concept,amr,alignment,tokens,unmatched_vars,triples) if ind: pass elif 'ARG0-of' in amr[cur_var]: k_var = amr[cur_var]['ARG0-of'][0] success, sent, tokens = self.align_single_concept(sent,tokens,k_var,amr,alignment,unmatched_vars,triples) if success: span = alignment[k_var][0] span.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type]) alignment[cur_var].append(span) else: update = False else: update = False elif rule_type == "NegPolarity": aligned = False for i,token in tokens: if token.lower() in Aligner.neg_polarity: aligned = True break if aligned: span = Span(i,i+1,Aligner.ENTITY_TAG_TABLE[rule_type],[token]) alignment[cur_var].append(span) else: print >> sys.stderr, 'Variable/Concept %s/%s cannot be aligned'%(cur_var,cur_concept) update = False elif rule_type == "thing": if 'ARG1-of' in amr[cur_var]: k_var = amr[cur_var]['ARG1-of'][0] success, sent, tokens = self.align_single_concept(sent,tokens,k_var,amr,alignment,unmatched_vars,triples) if success: span = alignment[k_var][0] span.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type]) alignment[cur_var].append(span) else: update = False else: rule_type = 'SingleConcept' elif rule_type == 'OrdinalEntity': val = amr[cur_var]['value'][0] success, sent, tokens = self.align_single_concept(sent,tokens,val,amr,alignment,unmatched_vars,triples) self.remove_aligned_concepts(cur_var,'value',val,unmatched_vars,triples) span = alignment[val][0] span.set_entity_tag(Aligner.ENTITY_TAG_TABLE[rule_type]) alignment[cur_var].append(span) elif rule_type == 'RelativePosition': if 'direction' in amr[cur_var]: dir_var = amr[cur_var]['direction'][0] if amr.node_to_concepts[dir_var] == 'away': aligned = False for i,tok in tokens: if tok.lower() == 'from': aligned = True break if aligned: span = Span(i,i+1,Aligner.ENTITY_TAG_TABLE[rule_type],[tok]) alignment[cur_var].append(span) alignment[dir_var].append(span) else: print >> sys.stderr, 'Variable/Concept %s/%s cannot be aligned'%(cur_var,cur_concept) update = False else: rule_type = 'SingleConcept' else: rule_type = 'SingleConcept' elif self.is_ago(cur_var,cur_concept,amr): k_var = amr[cur_var]['op1'][0] aligned = False for i,tok in tokens: if tok.lower() == 'ago': aligned = True break if aligned: span = Span(i,i+1,Aligner.ENTITY_TAG_TABLE['ago'],[tok]) alignment[cur_var].append(span) alignment[k_var].append(span) else: print >> sys.stderr, '(%s/%s) :op1 (%s/%s) cannot be aligned'%(cur_var,cur_concept,k_var,amr.node_to_concepts[k_var]) update = False elif self.is_why_question(cur_var,amr): arg0_var = amr[cur_var]['ARG0'][0] aligned = False for i,tok in tokens: if tok.lower() == 'why': aligned = True break if aligned: span = Span(i,i+1,Aligner.ENTITY_TAG_TABLE['cause'],[tok]) alignment[cur_var].append(span) alignment[arg0_var].append(span) else: print >> sys.stderr, '(%s/%s) :op1 (%s/%s) cannot be aligned'%(cur_var,cur_concept,arg0_var,amr.node_to_concepts[arg0_var]) update = False else: pass if rule_type == "SingleConcept": update,span = self.try_align_as_single_concept(cur_var,cur_concept,amr,alignment,tokens,unmatched_vars,triples) elif cur_var in alignment: pass else: print >> sys.stderr, 'Can not find type of concept %s / %s'%(cur_var,cur_concept) # update #print cur_concept,rule_type if update: tokens = [(i,tk) for i,tk in tokens if i not in range(span.start,span.end)] sent = ' '.join(x for i,x in tokens) if self.verbose > 2: print >> sys.stderr, "Concept '%s' Matched to span '%s' "%(cur_concept,' '.join(w for i,w in enumerate(sentence.split()) if i+1 in range(span[0],span[1]))) print sent print alignment #raw_input('ENTER to continue') return update, sent, tokens
class TestSpan(unittest.TestCase): def tearDown(self): self.unitDeltaTestsSpan = None self.testSpan = None def setUp(self): self.empty_span_code = 'empty_span' self.emptySpan = Span(self.empty_span_code) self.span_code = 'test_span' self.testSpan = Span(self.span_code) self.unitDeltaTestsSpan = Span('unit_delta_tests_span') self.unit_label_one = 'unit_one' self.unit_delta_test_list_one = [0.3, 13.4, 0.8] self.unit_label_two = 'unit_two' self.unit_delta_test_list_two = [2.8, 13.7, 0.9] self.unit_label_three = 'unit_three' self.unit_delta_test_list_three = [10.9, 14.4, 15.3] self.unit_label_four = 'unit_four' self.unit_delta_test_list_four = [15.3, 16.3, 17.3] self.unit_label_five = 'unit_five' self.unit_delta_test_list_five = [37.3, 16.3, 34.1] self.unit_dict = {self.unit_label_one : self.unit_delta_test_list_one, self.unit_label_two : self.unit_delta_test_list_two, self.unit_label_three : self.unit_delta_test_list_three, self.unit_label_four : self.unit_delta_test_list_four, self.unit_label_five : self.unit_delta_test_list_five} self.expected_delta_tests_units_as_list = [] for label, list_of_prices in self.unit_dict.items(): for price in list_of_prices: self.unitDeltaTestsSpan.addSpanUnit([label], close_price = price, open_price = price) self.expected_delta_tests_units_as_list.append(price) self.max_unit_delta = 4.4 self.max_unit_delta_label = self.unit_label_three self.min_unit_delta = -3.2 self.min_unit_delta_label = self.unit_label_five self.max_unit_delta_percentage = 1.66667 self.max_unit_delta_percentage_label = self.unit_label_one self.min_unit_delta_percentage = -0.678571429 self.min_unit_delta_percentage_label = self.unit_label_two self.zero_tests_span_code = 'zero_tests' self.zeroTestsSpan = Span(self.zero_tests_span_code) self.zero_delta_span_code = 'zero_delta' self.zero_delta_span_list = [0.0, 3.1, 4.4, 0.0] self.expected_zero_tests_units_as_list = [] for value in self.zero_delta_span_list: self.zeroTestsSpan.addSpanUnit([self.zero_delta_span_code], value, value) self.expected_zero_tests_units_as_list.append(value) self.zero_delta_non_zero_close_span_code = 'zero_delta_non_zero_close' self.zero_delta_non_zero_close_span_list = [0.0, 3.1, 4.4, 2.0] for value in self.zero_delta_non_zero_close_span_list: self.zeroTestsSpan.addSpanUnit([self.zero_delta_non_zero_close_span_code], value, value) self.expected_zero_tests_units_as_list.append(value) self.zero_average_test_span_code = 'zero_span_close_average' self.zero_average_test_span_list = [0.0, -1.0, -1.0, 2.0] for value in self.zero_average_test_span_list: self.zeroTestsSpan.addSpanUnit([self.zero_average_test_span_code], value, value) self.expected_zero_tests_units_as_list.append(value) self.rangeTestSpan = Span('range_test_span') self.range_test_prices = [[5.6, 7.6, 6.6], [3.6, 5.3, 4.4], [10.1, 15.9, 13.0], [1.3, 7.6, 5.1], [0.0, None, 3.3], [None, 100.0, 3.3]] self.expected_span_range = 14.6 self.range_test_span_average_close_price = 5.95 self.expected_span_range_to_close_price_ratio = round(self.expected_span_range / self.range_test_span_average_close_price, 7) for open_and_close_price in self.range_test_prices: low_price, high_price, close_price = open_and_close_price self.rangeTestSpan.addSpanUnit(low_price = low_price, high_price = high_price, close_price = close_price) def test_getSpanPriceRangeToAveragePriceRatio(self): test_span_price_range_to_average_close_price = round(self.rangeTestSpan.getSpanPriceRangeToAveragePriceRatio(), 7) self.assertEqual(test_span_price_range_to_average_close_price, self.expected_span_range_to_close_price_ratio) test_empty_span_ratio = self.emptySpan.getSpanPriceRangeToAveragePriceRatio() self.assertIsNone(test_empty_span_ratio) test_span_with_no_high_low_prices_ratio = self.testSpan.getSpanPriceRangeToAveragePriceRatio() self.assertIsNone(test_span_with_no_high_low_prices_ratio) def test_getSpanPriceRange(self): test_span_range = self.rangeTestSpan.getSpanPriceRange() self.assertEqual(test_span_range, self.expected_span_range) test_empty_span_range = self.emptySpan.getSpanPriceRange() self.assertIsNone(test_empty_span_range) test_span_with_no_low_high_range = self.testSpan.getSpanPriceRange() self.assertIsNone(test_span_with_no_low_high_range) def test_getUnitFieldValuesAsList(self): test_units_as_list = self.unitDeltaTestsSpan.getUnitFieldValuesAsList('close_price') self.assertEqual(test_units_as_list, self.expected_delta_tests_units_as_list) test_zero_span_units_as_list = self.zeroTestsSpan.getUnitFieldValuesAsList('close_price') self.assertEqual(test_zero_span_units_as_list, self.expected_zero_tests_units_as_list) def test_getUnitsCount(self): self.assertEqual(self.zeroTestsSpan.getUnitsCount(), 12) self.assertEqual(self.emptySpan.getUnitsCount(), 0) self.assertEqual(self.unitDeltaTestsSpan.getUnitsCount(), 15) def test_getSpanDelta(self): test_zero_delta_span_delta = self.zeroTestsSpan.getSpanDelta(self.zero_delta_span_code) self.assertEqual(test_zero_delta_span_delta, 0.0) test_zero_delta_span_delta_percentage = self.zeroTestsSpan.getSpanDelta(self.zero_delta_span_code, get_percentage_delta = True) self.assertEqual(test_zero_delta_span_delta_percentage, 0.0) test_zero_delta_non_zero_close_span_delta = self.zeroTestsSpan.getSpanDelta(self.zero_delta_non_zero_close_span_code) self.assertEqual(test_zero_delta_non_zero_close_span_delta, 2.0) test_zero_delta_non_zero_close_span_delta_percentage = self.zeroTestsSpan.getSpanDelta(self.zero_delta_non_zero_close_span_code, get_percentage_delta = True) self.assertEqual(test_zero_delta_non_zero_close_span_delta_percentage, float("inf")) def test_getMaxUnitDelta(self): test_max_unit_delta = self.unitDeltaTestsSpan.getMaxUnitDelta() test_max_unit_delta_value = test_max_unit_delta['delta'] test_max_unit_delta_value_label = test_max_unit_delta['label'] self.assertEqual(test_max_unit_delta_value, self.max_unit_delta) self.assertEqual(test_max_unit_delta_value_label, self.max_unit_delta_label) test_max_unit_delta_percentage = self.unitDeltaTestsSpan.getMaxUnitDelta(True) test_max_unit_delta_percentage_value = round(test_max_unit_delta_percentage['delta'], 5) test_max_unit_delta_percentage_value_label = test_max_unit_delta_percentage['label'] self.assertEqual(test_max_unit_delta_percentage_value, self.max_unit_delta_percentage) self.assertEqual(test_max_unit_delta_percentage_value_label, self.max_unit_delta_percentage_label) def test_getMaxUnitDeltaValue(self): test_max_unit_delta_value_explicit = self.unitDeltaTestsSpan.getMaxUnitDeltaValue() self.assertEqual(test_max_unit_delta_value_explicit, self.max_unit_delta) test_max_unit_delta_percentage_value_explicit = round(self.unitDeltaTestsSpan.getMaxUnitDeltaValue(True), 5) self.assertEqual(test_max_unit_delta_percentage_value_explicit, self.max_unit_delta_percentage) def test_getMinUnitDelta(self): test_min_unit_delta = self.unitDeltaTestsSpan.getMinUnitDelta() test_min_unit_delta_value = round(test_min_unit_delta['delta'], 2) test_min_unit_delta_value_label = test_min_unit_delta['label'] self.assertEqual(test_min_unit_delta_value, self.min_unit_delta) self.assertEqual(test_min_unit_delta_value_label, self.min_unit_delta_label) test_min_unit_delta_percentage = self.unitDeltaTestsSpan.getMinUnitDelta(True) test_min_unit_delta_percentage_value = round(test_min_unit_delta_percentage['delta'], 9) test_min_unit_delta_percentage_value_label = test_min_unit_delta_percentage['label'] self.assertEqual(test_min_unit_delta_percentage_value, self.min_unit_delta_percentage) self.assertEqual(test_min_unit_delta_percentage_value_label, self.min_unit_delta_percentage_label) def test_getMinUnitDeltaValue(self): test_min_unit_delta_value_explicit = round(self.unitDeltaTestsSpan.getMinUnitDeltaValue(), 2) self.assertEqual(test_min_unit_delta_value_explicit, self.min_unit_delta) test_min_unit_delta_percentage_value_explicit = round(self.unitDeltaTestsSpan.getMinUnitDeltaValue(True), 9) self.assertEqual(test_min_unit_delta_percentage_value_explicit, self.min_unit_delta_percentage) def test_spanInit(self): self.assertEquals(self.span_code, self.testSpan.code, 'The code returned from a Span object does not match the code used to initialize the object') def test_addSpanUnit(self): span_delta = self.emptySpan.getSpanDelta() self.assertIsNone(span_delta) first_unit_label = 'first_unit' second_unit_label = 'second_unit' third_unit_label = 'third_unit' span_test_one_label = 'span_test_one' span_test_two_label = 'span_test_two' expected_first_unit_labels = [first_unit_label, span_test_one_label] expected_first_unit_open_price = 100.0 expected_first_unit_close_price = 95.0 expected_first_unit_high_price = 110.0 expected_first_unit_low_price = 900.0 expected_first_unit_delta = 5 expected_first_unit_delta_percentage = .04 expected_second_unit_labels = [second_unit_label, span_test_two_label, span_test_one_label] expected_second_unit_open_price = 95.0 expected_second_unit_close_price = 93.0 expected_second_unit_high_price = 98.0 expected_second_unit_low_price = 90.0 expected_second_unit_delta = -2 expected_second_unit_delta_percentage = -0.021052632 self.testSpan.addSpanUnit(expected_first_unit_labels, expected_first_unit_close_price, expected_first_unit_open_price, expected_first_unit_high_price, expected_first_unit_low_price, expected_first_unit_delta, expected_first_unit_delta_percentage) self.testSpan.addSpanUnit(expected_second_unit_labels, expected_second_unit_close_price, expected_second_unit_open_price, expected_second_unit_high_price, expected_second_unit_low_price, expected_second_unit_delta, expected_second_unit_delta_percentage) # Test the return for the first unit label test_first_unit_label_dict = self.testSpan.getSpanUnitsByLabel(first_unit_label) self.assertEqual(len(test_first_unit_label_dict), 1) test_first_span_unit_index = test_first_unit_label_dict.items()[0][0] testFirstSpanUnit = test_first_unit_label_dict.items()[0][1] self.assertEqual(test_first_span_unit_index, 0) self.assertEqual(expected_first_unit_open_price, testFirstSpanUnit.open_price) self.assertEqual(expected_first_unit_close_price, testFirstSpanUnit.close_price) self.assertEqual(expected_first_unit_high_price, testFirstSpanUnit.high_price) self.assertEqual(expected_first_unit_low_price, testFirstSpanUnit.low_price) self.assertEqual(expected_first_unit_delta, testFirstSpanUnit.delta) self.assertEqual(expected_first_unit_delta_percentage, testFirstSpanUnit.delta_percentage) # Test retrieving a unit which has not been added testThirdSpanUnit = self.testSpan.getSpanUnitsByLabel('third_unit') self.assertIsNone(testThirdSpanUnit) expected_third_unit_open_price = 93.0 expected_third_unit_close_price = 88.0 expected_third_unit_high_price = 97.0 expected_third_unit_low_price = 86.0 expected_third_unit_delta = -5 expected_third_unit_delta_percentage = -0.053763441 expected_close_price_average = 92.0 expected_third_unit_labels = [third_unit_label, span_test_two_label] testThirdSpanUnit = self.testSpan.addSpanUnit(expected_third_unit_labels, expected_third_unit_close_price, expected_third_unit_open_price, expected_third_unit_high_price, expected_third_unit_low_price, expected_third_unit_delta, expected_third_unit_delta_percentage) # Test the return for the span test one label test_span_one_unit_label_dict = self.testSpan.getSpanUnitsByLabel(span_test_one_label) self.assertEqual(len(test_span_one_unit_label_dict), 2) test_span_one_first_unit_index = test_span_one_unit_label_dict.items()[0][0] self.assertEqual(test_span_one_first_unit_index, 0) testSpanOneFirstUnit = test_span_one_unit_label_dict.items()[0][1] self.assertEqual(expected_first_unit_open_price, testSpanOneFirstUnit.open_price) self.assertEqual(expected_first_unit_close_price, testSpanOneFirstUnit.close_price) self.assertEqual(expected_first_unit_high_price, testSpanOneFirstUnit.high_price) self.assertEqual(expected_first_unit_low_price, testSpanOneFirstUnit.low_price) self.assertEqual(expected_first_unit_delta, testSpanOneFirstUnit.delta) self.assertEqual(expected_first_unit_delta_percentage, testSpanOneFirstUnit.delta_percentage) test_span_one_second_unit_index = test_span_one_unit_label_dict.items()[1][0] self.assertEqual(test_span_one_second_unit_index, 1) testSpanOneSecondUnit = test_span_one_unit_label_dict.items()[1][1] self.assertEqual(expected_second_unit_open_price, testSpanOneSecondUnit.open_price) self.assertEqual(expected_second_unit_close_price, testSpanOneSecondUnit.close_price) self.assertEqual(expected_second_unit_high_price, testSpanOneSecondUnit.high_price) self.assertEqual(expected_second_unit_low_price, testSpanOneSecondUnit.low_price) self.assertEqual(expected_second_unit_delta, testSpanOneSecondUnit.delta) self.assertEqual(expected_second_unit_delta_percentage, testSpanOneSecondUnit.delta_percentage) # Test the return for the span test two label test_span_two_unit_label_dict = self.testSpan.getSpanUnitsByLabel(span_test_two_label) self.assertEqual(len(test_span_two_unit_label_dict), 2) test_span_two_first_unit_index = test_span_two_unit_label_dict.items()[0][0] self.assertEqual(test_span_two_first_unit_index, 1) testSpanTwoFirstUnit = test_span_two_unit_label_dict.items()[0][1] self.assertEqual(expected_second_unit_open_price, testSpanTwoFirstUnit.open_price) self.assertEqual(expected_second_unit_close_price, testSpanTwoFirstUnit.close_price) self.assertEqual(expected_second_unit_high_price, testSpanTwoFirstUnit.high_price) self.assertEqual(expected_second_unit_low_price, testSpanTwoFirstUnit.low_price) self.assertEqual(expected_second_unit_delta, testSpanTwoFirstUnit.delta) self.assertEqual(expected_second_unit_delta_percentage, testSpanTwoFirstUnit.delta_percentage) test_span_two_second_unit_index = test_span_two_unit_label_dict.items()[1][0] self.assertEqual(test_span_two_second_unit_index, 2) testSpanTwoSecondUnit = test_span_two_unit_label_dict.items()[1][1] self.assertEqual(expected_third_unit_open_price, testSpanTwoSecondUnit.open_price) self.assertEqual(expected_third_unit_close_price, testSpanTwoSecondUnit.close_price) self.assertEqual(expected_third_unit_high_price, testSpanTwoSecondUnit.high_price) self.assertEqual(expected_third_unit_low_price, testSpanTwoSecondUnit.low_price) self.assertEqual(expected_third_unit_delta, testSpanTwoSecondUnit.delta) self.assertEqual(expected_third_unit_delta_percentage, testSpanTwoSecondUnit.delta_percentage) test_close_price_average = self.testSpan.getSpanCloseAverage() self.assertEqual(expected_close_price_average, test_close_price_average) # Test Span Deltas expected_span_delta = -12.0 test_span_delta = self.testSpan.getSpanDelta() self.assertEqual(expected_span_delta, test_span_delta) expected_span_delta_percentage = expected_span_delta / expected_first_unit_open_price test_span_delta_percentage = self.testSpan.getSpanDelta(get_percentage_delta = True) self.assertEqual(expected_span_delta_percentage, test_span_delta_percentage) # Test Span Deltas with unit label test_span_test_two_label_delta = self.testSpan.getSpanDelta(span_test_two_label) expected_span_test_two_label_delta = -7.0 self.assertEqual(expected_span_test_two_label_delta, test_span_test_two_label_delta) test_span_test_two_label_delta_percentage = round(self.testSpan.getSpanDelta(span_test_two_label, get_percentage_delta = True), 6) expected_span_test_two_label_delta_percentage = round(expected_span_test_two_label_delta / expected_second_unit_open_price, 6) self.assertEqual(expected_span_test_two_label_delta_percentage, test_span_test_two_label_delta_percentage) test_span_test_one_label_delta = self.testSpan.getSpanDelta(span_test_one_label) expected_span_test_one_label_delta = -7.0 self.assertEqual(expected_span_test_one_label_delta, test_span_test_one_label_delta) test_span_test_one_label_delta_percentage = round(self.testSpan.getSpanDelta(span_test_one_label, get_percentage_delta = True), 6) expected_span_test_one_label_delta_percentage = round(expected_span_test_one_label_delta / expected_first_unit_open_price, 6) self.assertEqual(expected_span_test_one_label_delta_percentage, test_span_test_one_label_delta_percentage)