def getTransition(self, stack, buff, leftmostChildren, rightmostChildren, arcs, labeled): """This function should return a Transition object representing the correct action to to take according to the oracle.""" if len(stack) > 1: top = stack[-1] pre_top = stack[-2] rmc_top = rightmostChildren.get(p.get_id(top), -1) rmc_pre_top = rightmostChildren.get(p.get_id(pre_top), -1) lmc_top = leftmostChildren.get(p.get_id(top), p.INFINITY) lmc_pre_top = leftmostChildren.get(p.get_id(pre_top), p.INFINITY) if ( p.get_head(pre_top) == p.get_id(top) and self.is_removable(pre_top, arcs, lmc_pre_top, rmc_pre_top) ): if labeled: return(Transition(Transition.LeftArc, p.get_deprel(pre_top))) else: return(Transition(Transition.LeftArc, None)) elif ( p.get_head(top) == p.get_id(pre_top) and self.is_removable(top, arcs, lmc_top, rmc_top) ): if labeled: return(Transition(Transition.RightArc, p.get_deprel(top))) else: return(Transition(Transition.RightArc, None)) else: return(Transition(Transition.Shift, None)) else: if len(buff) >= 1: return(Transition(Transition.Shift, None)) else: return(None)
def __prepare_for_first_contour_connecting(self): # cv2.imshow("first", self.cv_image) dilated_image = Helper.dilate_image(self.cv_image) # cv2.imshow("first dilate", dilated_image) # self.steps.append(("con_dilated", dilated_image)) dilated_mask = Helper.convert_image_to_mask(dilated_image) # cv2.imshow("mask", dilated_mask) self.steps.append(("con_gray", dilated_mask)) gray_denoised_image = cv2.fastNlMeansDenoising(dilated_mask, None, 5, 7, 21) self.steps.append(("con_denoised", gray_denoised_image)) # cv2.imshow("denoised", gray_denoised_image) threshold_image = cv2.adaptiveThreshold(gray_denoised_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) self.steps.append(("con_threshold", threshold_image)) # cv2.imshow("threshold", threshold_image) # ERODED = cv2.erode(threshold_image, (2,2), iterations=1) # cv2.imshow("ERODED", ERODED) # DILATED = cv2.dilate(ERODED, (2,2), iterations=1) # cv2.imshow("DILATED", DILATED) # DENOISED = cv2.fastNlMeansDenoising(threshold_image, None, 5, 7, 21) # cv2.imshow("DENOISED", DENOISED) prepared_mask = cv2.bitwise_and(threshold_image, threshold_image, mask=self.image_masks.topo_mask) self.steps.append(("con_mask", prepared_mask)) cv2.imshow('prepared', prepared_mask) return prepared_mask
def getTransition(self, stack, buff, leftmostChildren, rightmostChildren, arcs, labeled): """This function should return a Transition object representing the correct action to to take according to the oracle.""" if len(stack) > 1: top = stack[-1] pre_top = stack[-2] rmc_top = rightmostChildren.get(p.get_id(top), -1) rmc_pre_top = rightmostChildren.get(p.get_id(pre_top), -1) lmc_top = leftmostChildren.get(p.get_id(top), p.INFINITY) lmc_pre_top = leftmostChildren.get(p.get_id(pre_top), p.INFINITY) if (p.get_head(pre_top) == p.get_id(top) and self.is_removable( pre_top, arcs, lmc_pre_top, rmc_pre_top)): if labeled: return (Transition(Transition.LeftArc, p.get_deprel(pre_top))) else: return (Transition(Transition.LeftArc, None)) elif (p.get_head(top) == p.get_id(pre_top) and self.is_removable(top, arcs, lmc_top, rmc_top)): if labeled: return (Transition(Transition.RightArc, p.get_deprel(top))) else: return (Transition(Transition.RightArc, None)) else: return (Transition(Transition.Shift, None)) else: if len(buff) >= 1: return (Transition(Transition.Shift, None)) else: return (None)
def get_input_offset_token(self, token, input_sentence, input_offset): #CONFIRM THIS!!! if h.get_id(token) + input_offset <= 0: #To prevent wraparound return (None) return (self.try_get_token(input_sentence, -(h.get_id(token) + input_offset)) ) # No +1 since the token id starts from 1 instead of 0
def get_all_siblings(self, token, input_sentence, arcs): if arcs.get(h.get_id(token), None) is None: return([]) all_siblings = [] for word in input_sentence: if h.get_id(word) != 0 and h.get_id(word) != h.get_id(token) and arcs.get(h.get_id(word), None) is not None and arcs[h.get_id(word)] == arcs[h.get_id(token)]: all_siblings += [word] return(all_siblings)
def __connect_contours_by_distances(self, mask, distances, min_contour_area): contour_connector = ContourConnector(mask) for distance in distances: contour_connector.connect_contours_within_distance(distance) Helper.reduce_image_contours(contour_connector.connected_contours_mask, min_contour_area) return contour_connector.connected_contours_mask
def output(self, sentence): for token in sentence: head = self.arcs.get(h.get_id(token), '0') label = self.labels.get(h.get_id(token), '_') label = label if label is not None else '_' token[6] = str(head) token[7] = str(label) print '\t'.join(token) print
def initialize(self, sentence): self.root = ['0', 'ROOT', 'ROOT', 'ROOT', 'ROOT', 'ROOT', '-1', 'ROOT', 'ROOT', 'ROOT'] self.buff = [self.root] + list(reversed(sentence)) self.stack = list() self.arcs = {} #arcs is actually a mapping from child to parent (child's head) self.labels = {} self.transitions = list() self.leftmostChildren = h.get_leftmost_children(sentence) #Map from parent to leftmost child self.rightmostChildren = h.get_rightmost_children(sentence) #Map from parent to rightmost child
def __prepare_for_second_contour_connecting(self, mask): reduced_mask = Helper.reduce_image_contours(mask, 1) self.steps.append(("con_reduced", reduced_mask)) # cv2.imshow("reduced", reduced_mask) dilated_mask = Helper.dilate_image(reduced_mask) # self.steps.append(("con_dilated", dilated_mask)) # cv2.imshow("second dilated", dilated_mask) # Helper.show_images_and_wait([]) return dilated_mask
def __generate_general_color_lines_mask(self, low_range, high_range): color_range = self.__get_image_in_range_from_hsv(low_range, high_range) filled_contours = self.__get_filled_contours_from_image(color_range) contour_mask = Helper.convert_image_to_contour_mask(filled_contours) dilated = Helper.dilate_image(contour_mask, array=(2, 2)) mask = Helper.reduce_image_contours(dilated, 6, line_thickness=cv2.FILLED) return mask
def __generate_green_mask(self): green_range = self.__get_image_in_range_from_hsv( MaskGenerator.low_green, MaskGenerator.high_green) filled_green_contours = self.__get_filled_contours_from_image( green_range) green_mask = Helper.convert_image_to_mask(filled_green_contours) green_mask_reduced = Helper.reduce_image_contours( green_mask, 200, line_thickness=cv2.FILLED) return green_mask_reduced
def get_all_children(self, token, input_sentence, arcs): all_children = {} head_id = h.get_id(token) for tail in arcs.keys(): if arcs[tail] == head_id: child_token = self.try_get_token(input_sentence, -(tail)) if child_token is not None: all_children[h.get_id(child_token)] = child_token else: print >> sys.stderr, 'Non-existent child, should NOT happen!!!' return (all_children)
def get_all_children(self, token, input_sentence, arcs): all_children = {} head_id = h.get_id(token) for tail in arcs.keys(): if arcs[tail] == head_id: child_token = self.try_get_token(input_sentence, -(tail)) if child_token is not None: all_children[h.get_id(child_token)] = child_token else: print >>sys.stderr, 'Non-existent child, should NOT happen!!!' return(all_children)
def __generate_black_mask(self): black_range = self.__get_image_in_range_from_hsv( MaskGenerator.low_black, MaskGenerator.high_black) filled_contours = self.__get_filled_contours_from_image(black_range) contours_mask = Helper.convert_image_to_mask(filled_contours) dilated = Helper.dilate_image(contours_mask, array=self.dilate_array) black_mask = Helper.reduce_image_contours(dilated, 6, line_thickness=cv2.FILLED) return black_mask
def __generate_blue_mask(self): blue_range = self.__get_image_in_range_from_hsv( MaskGenerator.low_blue, MaskGenerator.high_blue) self.image_masks.steps.append(("blue_range", blue_range)) filled_blue_contours = self.__get_filled_contours_from_image( blue_range) self.image_masks.steps.append(("blue_filled", filled_blue_contours)) blue_mask = Helper.convert_image_to_mask(filled_blue_contours) dilated = Helper.dilate_image(blue_mask, array=(2, 2)) self.image_masks.steps.append(("blue_dilated", dilated)) blue_mask = Helper.reduce_image_contours(dilated, 15, line_thickness=cv2.FILLED) return blue_mask
def initialize(self, sentence): self.root = [ '0', 'ROOT', 'ROOT', 'ROOT', 'ROOT', 'ROOT', '-1', 'ROOT', 'ROOT', 'ROOT' ] self.buff = [self.root] + list(reversed(sentence)) self.stack = list() self.arcs = { } #arcs is actually a mapping from child to parent (child's head) self.labels = {} self.transitions = list() self.leftmostChildren = h.get_leftmost_children( sentence) #Map from parent to leftmost child self.rightmostChildren = h.get_rightmost_children( sentence) #Map from parent to rightmost child
def __generate_red_mask(self): red_range = self.__get_image_in_range_from_hsv(MaskGenerator.low_red, MaskGenerator.high_red) # cv2.imshow("red range", red_range) filled_contours = self.__get_filled_contours_from_image(red_range) # cv2.imshow("filled contours", filled_contours) contours_mask = Helper.convert_image_to_mask(filled_contours) # cv2.imshow("contours mask", contours_mask) dilated = Helper.dilate_image(contours_mask, array=self.dilate_array) # cv2.imshow("dilated", dilated) reduced = Helper.reduce_image_contours(dilated, 6, line_thickness=cv2.FILLED) # cv2.imshow("red range", red_range) return reduced
def __get_min_contour_dist(self): angle = Helper.convert_grade_to_angle(self.user_settings.max_grade) min_feet_dist = self.user_settings.get_contour_interval_dist( ) / math.tan(math.radians(angle)) min_pixel_dist = int( min_feet_dist / self.user_settings.get_feet_per_pixel()) + 1 return min_pixel_dist
def is_removable(self, word, arcs, leftmost_child, rightmost_child): if ((leftmost_child != p.INFINITY and not arcs.get(leftmost_child, False)) or (rightmost_child != -1 and not arcs.get( rightmost_child, False))): #CHeck if thie value is correct return (False) if (p.is_root(word)): return (False) return True
def get_model7_params(self, stack, buff, input_sentence, arcs, labels, tType, feat_type, source_type, source_offset = 0, input_offset = 0, head_multiplier = 0, left_rightmost_multiplier = 0, left_right_sibling_specifier = 0, suffix_len = 0): #Described here: http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf assert feat_type in [self.DEP_FEAT, self.POS_FEAT, self.LEX_FEAT], "Invalid feat_type specified" assert source_type in [self.BUFF_SOURCE, self.STACK_SOURCE, self.INPUT_SOURCE], "Invalid source type specified" assert source_offset >= 0, "Invalid source_offset" assert head_multiplier >= 0, "Invalid head multiplier specified" rev_input_sentence = input_sentence[::-1] source = self.get_source(stack, buff, input_sentence, source_type)#Reverse the input sentence if it isn't already reversed by get_source token = self.try_get_token(source, -(source_offset + 1)) if token is None: return(None) if input_offset != 0: token = self.get_input_offset_token(token, rev_input_sentence, input_offset) if token is None: return(None) token = self.get_head_offset_token(token, rev_input_sentence, head_multiplier, arcs) if token is None: return(None) token = self.get_left_rightmost_child(token, rev_input_sentence, arcs, left_rightmost_multiplier) if token is None: return(None) token = self.get_left_right_sibling(token, rev_input_sentence, arcs, left_right_sibling_specifier) if token is None: return(None) ret_str = 'transition=%d,feat_type=%d,source_type=%d,source_offset=%d,input_offset=%d,head_multiplier=%d,left_rightmost_multiplier=%d,left_right_sibling_specifier=%d' % (tType, feat_type, source_type, source_offset, input_offset, head_multiplier , left_rightmost_multiplier, left_right_sibling_specifier) if feat_type == self.LEX_FEAT: #suffix len can be specified via argument lex_feat = h.get_word(token) if suffix_len > 0: ret_str += 'lex_feat=%s' %(lex_feat[-suffix_len:]) else: ret_str += 'lex_feat=%s' %(lex_feat) elif feat_type == self.DEP_FEAT: dep_feat = labels.get(h.get_id(token), None) if dep_feat is not None: ret_str += 'dep_feat=%s' %(dep_feat) else: return(None) elif feat_type == self.POS_FEAT: pos_feat = h.get_postag(token) ret_str += 'pos_feat=%s' % (pos_feat) else: return(None) return(ret_str)
def get_head_offset_token(self, token, input_sentence, head_multiplier, arcs): while (token is not None and head_multiplier > 0): head_multiplier -= 1 token_id = h.get_id(token) head_id = arcs.get(token_id, None) if head_id == None: token = None else: token = self.try_get_token(input_sentence, -head_id) return(token)
def __get_grade_for_pixel_distance(self, pixel_dist): feet_dist = (pixel_dist * self.user_settings.get_feet_per_pixel()) theta = math.atan(self.user_settings.get_contour_interval_dist() / feet_dist) angle = math.degrees(theta) grade = Helper.convert_angle_to_grade(angle) return grade
def get_head_offset_token(self, token, input_sentence, head_multiplier, arcs): while (token is not None and head_multiplier > 0): head_multiplier -= 1 token_id = h.get_id(token) head_id = arcs.get(token_id, None) if head_id == None: token = None else: token = self.try_get_token(input_sentence, -head_id) return (token)
def execute_transition(self, transition): """This function should take a transition object and apply to the current parser state. It need not return anything.""" self.transitions.append(transition.transitionType) if (transition.transitionType == Transition.Shift): self.stack.append(self.buff.pop()) elif (transition.transitionType == Transition.LeftArc): top = self.stack.pop() top_id = h.get_id(top) pre_top = self.stack.pop() pre_top_id = h.get_id(pre_top) self.stack.append(top) self.arcs[pre_top_id] = top_id self.labels[pre_top_id] = transition.label else: top = self.stack.pop() top_id = h.get_id(top) pre_top = self.stack[-1] pre_top_id = h.get_id(pre_top) self.arcs[top_id] = pre_top_id self.labels[top_id] = transition.label
def execute_transition(self, transition): """This function should take a transition object and apply to the current parser state. It need not return anything.""" self.transitions.append(transition.transitionType) if (transition.transitionType == Transition.Shift): self.stack.append(self.buff.pop()) elif (transition.transitionType == Transition.LeftArc): top = self.stack.pop() top_id= h.get_id(top) pre_top = self.stack.pop() pre_top_id = h.get_id(pre_top) self.stack.append(top) self.arcs[pre_top_id] = top_id self.labels[pre_top_id] = transition.label else: top = self.stack.pop() top_id = h.get_id(top) pre_top = self.stack[-1] pre_top_id = h.get_id(pre_top) self.arcs[top_id] = pre_top_id self.labels[top_id] = transition.label
def get_leftmost_child(self, token, input_sentence, arcs, leftmost_multiplier): assert leftmost_multiplier < 0, "Invalid leftmost_multiplier passed" while token is not None and leftmost_multiplier < 0: leftmost_multiplier += 1 all_children = self.get_all_children(token, input_sentence, arcs) if len(all_children) == 0 : return(None) min_candidate = min(all_children.keys()) if (min_candidate < h.get_id(token)): token = all_children[min_candidate] else: return(None) return(token)
def get_rightmost_child(self, token, input_sentence, arcs, rightmost_multiplier): assert rightmost_multiplier > 0, "Invalid rightmost_multiplier passed" while token is not None and rightmost_multiplier > 0: rightmost_multiplier -= 1 all_children = self.get_all_children(token, input_sentence, arcs) if len(all_children) == 0 : return(None) max_candidate = max(all_children.keys()) if (max_candidate > h.get_id(token)): token = all_children[max_candidate] else: return(None) return(token)
def get_all_siblings(self, token, input_sentence, arcs): if arcs.get(h.get_id(token), None) is None: return ([]) all_siblings = [] for word in input_sentence: if h.get_id(word) != 0 and h.get_id( word) != h.get_id(token) and arcs.get( h.get_id(word), None) is not None and arcs[h.get_id( word)] == arcs[h.get_id(token)]: all_siblings += [word] return (all_siblings)
def get_leftmost_child(self, token, input_sentence, arcs, leftmost_multiplier): assert leftmost_multiplier < 0, "Invalid leftmost_multiplier passed" while token is not None and leftmost_multiplier < 0: leftmost_multiplier += 1 all_children = self.get_all_children(token, input_sentence, arcs) if len(all_children) == 0: return (None) min_candidate = min(all_children.keys()) if (min_candidate < h.get_id(token)): token = all_children[min_candidate] else: return (None) return (token)
def get_rightmost_child(self, token, input_sentence, arcs, rightmost_multiplier): assert rightmost_multiplier > 0, "Invalid rightmost_multiplier passed" while token is not None and rightmost_multiplier > 0: rightmost_multiplier -= 1 all_children = self.get_all_children(token, input_sentence, arcs) if len(all_children) == 0: return (None) max_candidate = max(all_children.keys()) if (max_candidate > h.get_id(token)): token = all_children[max_candidate] else: return (None) return (token)
def get_right_sibling(self, token, input_sentence, arcs, right_sibling_multiplier): assert right_sibling_multiplier > 0, "Invalid right sibling multiplier" all_siblings = self.get_all_siblings(token, input_sentence, arcs) while(token is not None and right_sibling_multiplier > 0): right_sibling_multiplier -= 1 if len(all_siblings) == 0: return(None) min_dist = h.INFINITY nearest_sibling = None for sibling in all_siblings: if h.get_id(sibling) > h.get_id(token) and abs(h.get_id(sibling) - h.get_id(token)) < min_dist: min_dist = abs(h.get_id(sibling) - h.get_id(token)) nearest_sibling = sibling token = nearest_sibling # NOTE: It is possible NOT that we keep cycling between siblings. return(token)
def __get_sub_image(self): rows, cols, chan = self.image.shape sub_image = self.image[int(self.__bottom_thresh * rows):rows, # bottom rows int(self.__left_thresh * cols):int(self.__right_thresh * cols) # middle rows ] sub_image = cv2.resize(sub_image, None, fx=self.__resize_factor, fy=self.__resize_factor, interpolation=cv2.INTER_LINEAR) sub_image = Helper.convert_image_to_mask(sub_image) gray_denoised_image = cv2.fastNlMeansDenoising(sub_image, None, 5, 7, 21) threshold_image = cv2.threshold(gray_denoised_image, 225, 255, cv2.THRESH_BINARY_INV)[1] return sub_image
def get_right_sibling(self, token, input_sentence, arcs, right_sibling_multiplier): assert right_sibling_multiplier > 0, "Invalid right sibling multiplier" all_siblings = self.get_all_siblings(token, input_sentence, arcs) while (token is not None and right_sibling_multiplier > 0): right_sibling_multiplier -= 1 if len(all_siblings) == 0: return (None) min_dist = h.INFINITY nearest_sibling = None for sibling in all_siblings: if h.get_id(sibling) > h.get_id(token) and abs( h.get_id(sibling) - h.get_id(token)) < min_dist: min_dist = abs(h.get_id(sibling) - h.get_id(token)) nearest_sibling = sibling token = nearest_sibling # NOTE: It is possible NOT that we keep cycling between siblings. return (token)
def get_input_offset_token(self, token, input_sentence, input_offset): if h.get_id(token) + input_offset <= 0: #To prevent wraparound return(None) return(self.try_get_token(input_sentence, -(h.get_id(token) + input_offset))) # No +1 since the token id starts from 1 instead of 0
def get_model7_params( self, stack, buff, input_sentence, arcs, labels, tType, feat_type, source_type, source_offset=0, input_offset=0, head_multiplier=0, left_rightmost_multiplier=0, left_right_sibling_specifier=0, suffix_len=0 ): #Described here: http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf assert feat_type in [self.DEP_FEAT, self.POS_FEAT, self.LEX_FEAT], "Invalid feat_type specified" assert source_type in [ self.BUFF_SOURCE, self.STACK_SOURCE, self.INPUT_SOURCE ], "Invalid source type specified" assert source_offset >= 0, "Invalid source_offset" assert head_multiplier >= 0, "Invalid head multiplier specified" rev_input_sentence = input_sentence[::-1] source = self.get_source( stack, buff, input_sentence, source_type ) #Reverse the input sentence if it isn't already reversed by get_source token = self.try_get_token(source, -(source_offset + 1)) if token is None: return (None) if input_offset != 0: token = self.get_input_offset_token(token, rev_input_sentence, input_offset) if token is None: return (None) token = self.get_head_offset_token(token, rev_input_sentence, head_multiplier, arcs) if token is None: return (None) token = self.get_left_rightmost_child(token, rev_input_sentence, arcs, left_rightmost_multiplier) if token is None: return (None) token = self.get_left_right_sibling(token, rev_input_sentence, arcs, left_right_sibling_specifier) if token is None: return (None) ret_str = 'transition=%d,feat_type=%d,source_type=%d,source_offset=%d,input_offset=%d,head_multiplier=%d,left_rightmost_multiplier=%d,left_right_sibling_specifier=%d' % ( tType, feat_type, source_type, source_offset, input_offset, head_multiplier, left_rightmost_multiplier, left_right_sibling_specifier) if feat_type == self.LEX_FEAT: #suffix len can be specified via argument lex_feat = h.get_word(token) if suffix_len > 0: ret_str += 'lex_feat=%s' % (lex_feat[-suffix_len:]) else: ret_str += 'lex_feat=%s' % (lex_feat) elif feat_type == self.DEP_FEAT: dep_feat = labels.get(h.get_id(token), None) if dep_feat is not None: ret_str += 'dep_feat=%s' % (dep_feat) else: return (None) elif feat_type == self.POS_FEAT: pos_feat = h.get_postag(token) ret_str += 'pos_feat=%s' % (pos_feat) else: return (None) return (ret_str)
def extract_features(self, transition, stack, buff, labels, previous_transitions, arcs, input_sentence): features = defaultdict(float) #tType = transition.transitionType tType = -1 #Dummy value since this is not encoded in the feature for SVM label = 'dummy_label' #Dummy label since this is not encoded in the feature for SVM #Model7 features as described in http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf feat21_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1, 0, 0, -1) #pos for pre-top's lmc feat31_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1, 0, 0, 1) #pos or pre-top's rmc feat41_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0, 0, 0, -1) #pos for top's lmc feat42_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0, 0, 0, 1) #pos for top's rmc feat5_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.STACK_SOURCE, 1) #lex for pre-top feat6_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.STACK_SOURCE, 0) #lex for top feat71_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.BUFF_SOURCE) #pos for next buffer item feat76_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.BUFF_SOURCE, 1) #pos for next-next buffer item feat10_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1, 1) #pos for word after pre-top in input feat11_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1, -1) #pos for word before pre-top in input feat12_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 1, 1) #dep for word after pre-top in input feat13_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 1, -1) #dep for word before pre-top in input feat14_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.STACK_SOURCE, 0, 1) #lex for word after top in input feat16_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0, 1) #pos for word after top in input feat17_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0, -1) #pos for word before top in input feat18_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 0, 1) #dep for word after top in input feat19_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 0, -1) #dep for word before top in input pre_top_pos = self.get_model7_params(stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1) top_pos = self.get_model7_params(stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0) cfeat_11 = self.compose_feats( features, [pre_top_pos, top_pos]) #pos of both pre_top and top cfeat_12 = self.compose_feats( features, [top_pos, feat71_model7]) #pos of top and next buff cfeat_13 = self.compose_feats(features, [top_pos, feat71_model7, feat76_model7 ]) #pos for top next and next next cfeat_17 = self.compose_feats(features, [pre_top_pos, top_pos, feat41_model7 ]) #pos for pre-top, top and top's lmc cfeat_175 = self.compose_feats(features, [pre_top_pos, top_pos, feat42_model7 ]) #pos for pre-top, top and top's rmc # Top two POS tags from the stack for i in range(3): #was originally 2 if i >= len(stack): break s = stack[-(i + 1)] pos = s[3] features['transition=%d,s%d.pos=%s' % (tType, i, pos)] = 1 # Next 2 POS tags from the buffer for i in range(2): if i >= len(buff): break b = buff[-(i + 1)] pos = b[3] features['transition=%d,b%d.pos=%s' % (tType, i, pos)] = 1 # Previous transition type if len(previous_transitions) > 0: prev = previous_transitions[-1] features['transition=%d,prev_transition=%d' % (tType, prev)] = 1 else: features['transition=%d,prev_transition=None' % (tType)] = 1 if self.labeled and transition is not None: #We don't care about labelled case and transition should not be passed for SVM # Action and label pair features['transition=%d,label=%s' % (transition.transitionType, transition.label)] = 1 # Label bias features['label=%s' % (transition.label)] = 1 #Features based on http://dl.acm.org/citation.cfm?id=2002777 #Distance function if len(stack) > 0 and len(buff) > 0: dist = h.get_id(stack[-1]) - h.get_id(buff[-1]) if dist < 0: features['transition=%d,neg_dist=' % (tType)] = dist else: features['transition=%d,pos_dist=' % (tType)] = dist #Valency function if (len(stack) > 1): [left_valency, right_valency] = self.get_valency(arcs, h.get_id(stack[-1])) left_val_feat = 'transition=%d,head_left_valency=%d' % ( tType, left_valency) features[left_val_feat + top_pos] = 1 features[left_val_feat] = 1 right_val_feat = 'transition=%d,head_right_valency=%d' % ( tType, right_valency) features[right_val_feat] = 1 features[right_val_feat + top_pos] = 1 return features
def extract_features(self, transition, stack, buff, labels, previous_transitions, arcs, input_sentence): features = defaultdict(float) tType = transition.transitionType label = transition.label #Model7 features as described in http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf feat1_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1)#dep for pre-top feat2_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1, 0, 0, -1) #dep for pre-top's lmc feat21_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 1, 0, 0, -1) #pos for pre-top's lmc feat3_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1, 0, 0, 1)#dep or pre-top's rmc feat31_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 1, 0, 0, 1)#pos or pre-top's rmc feat4_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 0, 0, 0, -1)#dep for top's lmc feat41_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 0, 0, 0, -1)#pos for top's lmc feat5_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 1)#lex for pre-top feat6_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 0)#lex for top feat7_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.BUFF_SOURCE)#lex for next buffer item feat71_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.BUFF_SOURCE)#pos for next buffer item feat75_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.BUFF_SOURCE, 1)#lex for next-next buffer item feat76_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.BUFF_SOURCE, 1)#pos for next-next buffer item feat8_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 1, 1)#lex for word after pre-top in input feat9_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 1, -1)#lex for word before pre-top in input feat10_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 1, 1)#pos for word after pre-top in input feat11_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 1, -1)#pos for word before pre-top in input feat12_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1, 1)#dep for word after pre-top in input feat13_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1, -1)#dep for word before pre-top in input feat14_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 0, 1)#lex for word after top in input feat15_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 0, -1)#lex for word before top in input feat16_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 0, 1)#pos for word after top in input feat17_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 0, -1)#pos for word before top in input feat18_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 0, 1)#dep for word after top in input feat19_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 0, -1)#dep for word before top in input pre_top_pos = self.get_model7_params(stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1) top_pos = self.get_model7_params(stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0) cfeat_1 = self.compose_feats(features, [feat5_model7, pre_top_pos])#lex_pos of pre-top cfeat_2 = self.compose_feats(features, [feat6_model7, top_pos])#lex_pos for top cfeat_3 = self.compose_feats(features, [feat7_model7, feat71_model7])#lex_pos for next buffer item cfeat_4 = self.compose_feats(features, [feat75_model7,feat76_model7])#lex_pos for next-next buffer item cfeat_5 = self.compose_feats(features, [cfeat_1, cfeat_2])#lex_pos for both pre-top and top cfeat_6 = self.compose_feats(features, [cfeat_1, feat6_model7])#lex_pos of pre-top with lex of top cfeat_7 = self.compose_feats(features, [feat5_model7, cfeat_2])#lex of pre-top with lex_pos of top cfeat_8 = self.compose_feats(features, [cfeat_1, top_pos])#lex_pos of pre-top with pos of top cfeat_9 = self.compose_feats(features, [pre_top_pos, cfeat_2]) #pos of pre-top with lex_pos of top cfeat_10 = self.compose_feats(features, [feat5_model7, feat6_model7])#lex of both pre_top and top cfeat_11 = self.compose_feats(features, [pre_top_pos, top_pos])#pos of both pre_top and top cfeat_12 = self.compose_feats(features, [top_pos, feat71_model7])#pos of top and next buff cfeat_13 = self.compose_feats(features, [top_pos, feat71_model7, feat76_model7])#pos for top next and next next cfeat_14 = self.compose_feats(features, [pre_top_pos, top_pos, feat71_model7])#pos for pre-top, top and next cfeat_15 = self.compose_feats(features, [pre_top_pos, feat21_model7, top_pos])#pos for pre-top pre top lmc and top cfeat_16 = self.compose_feats(features, [pre_top_pos, feat31_model7, top_pos])#pos for pre-top, pre-top rmc and top cfeat_17 = self.compose_feats(features, [pre_top_pos, top_pos, feat41_model7])#pos for pre-top, top and top's lmc # Top two POS tags from the stack for i in range(3):#was originally 2 if i >= len(stack): break s = stack[-(i+1)] pos = s[3] features['transition=%d,s%d.pos=%s' % (tType, i, pos)] = 1 # Next four POS tags from the buffer for i in range(3): if i >= len(buff): break b = buff[-(i+1)] pos = b[3] features['transition=%d,b%d.pos=%s' % (tType, i, pos)] = 1 # Previous transition type if len(previous_transitions) > 0: prev = previous_transitions[-1] features['transition=%d,prev_transition=%d' % (tType, prev)] = 1 else: features['transition=%d,prev_transition=None' % (tType)] = 1 # Bias feature features['transition=%d' % (transition.transitionType)] = 1 if self.labeled: # Action and label pair features['transition=%d,label=%s' % (transition.transitionType, transition.label)] = 1 # Label bias features['label=%s' % (transition.label)] = 1 #Features based on http://dl.acm.org/citation.cfm?id=2002777 #Distance function if len(stack) > 0 and len(buff) > 0: dist = h.get_id(stack[-1]) - h.get_id(buff[-1]) if dist < 0: features['transition=%d,neg_dist=' % (tType)] = dist else: features['transition=%d,pos_dist=' % (tType)] = dist #Valency function if len(stack) > 1: if tType == Transition.LeftArc: # Left Arc [left_valency, right_valency] = self.get_valency(arcs, h.get_id(stack[-1])) features['transition=%d,head_left_valency=%d' % (tType, left_valency)] = 1 features['transition=%d,head_right_valency=%d' % (tType, right_valency)] = 1 elif tType == Transition.RightArc:#should probably check for right arc here! [left_valency, right_valency] = self.get_valency(arcs, h.get_id(stack[-2])) features['transition=%d,head_left_valency=%d' % (tType, left_valency)] = 1 features['transition=%d,head_right_valency=%d' % (tType, right_valency)] = 1 return features
def test_helper(): df = pd.DataFrame([[1, 2, 3], [2, 2, 3], [3, 3, 3]]) help = Helper() assert type(help) == pd.Dataframe
def is_removable(self, word, arcs, leftmost_child, rightmost_child): if ( (leftmost_child != p.INFINITY and not arcs.get(leftmost_child, False)) or (rightmost_child != -1 and not arcs.get(rightmost_child, False)) ):#CHeck if thie value is correct return(False) if ( p.is_root(word) ): return(False) return True
def extract_features(self, transition, stack, buff, labels, previous_transitions, arcs, input_sentence): features = defaultdict(float) tType = transition.transitionType label = transition.label #Model7 features as described in http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf feat1_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 1) #dep for pre-top feat2_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 1, 0, 0, -1) #dep for pre-top's lmc feat21_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1, 0, 0, -1) #pos for pre-top's lmc feat3_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 1, 0, 0, 1) #dep or pre-top's rmc feat31_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1, 0, 0, 1) #pos or pre-top's rmc feat4_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 0, 0, 0, -1) #dep for top's lmc feat41_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0, 0, 0, -1) #pos for top's lmc feat5_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.STACK_SOURCE, 1) #lex for pre-top feat6_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.STACK_SOURCE, 0) #lex for top feat7_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.BUFF_SOURCE) #lex for next buffer item feat71_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.BUFF_SOURCE) #pos for next buffer item feat75_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.BUFF_SOURCE, 1) #lex for next-next buffer item feat76_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.BUFF_SOURCE, 1) #pos for next-next buffer item feat8_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.STACK_SOURCE, 1, 1) #lex for word after pre-top in input feat9_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.STACK_SOURCE, 1, -1) #lex for word before pre-top in input feat10_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1, 1) #pos for word after pre-top in input feat11_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1, -1) #pos for word before pre-top in input feat12_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 1, 1) #dep for word after pre-top in input feat13_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 1, -1) #dep for word before pre-top in input feat14_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.STACK_SOURCE, 0, 1) #lex for word after top in input feat15_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.LEX_FEAT, self.STACK_SOURCE, 0, -1) #lex for word before top in input feat16_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0, 1) #pos for word after top in input feat17_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0, -1) #pos for word before top in input feat18_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 0, 1) #dep for word after top in input feat19_model7 = self.add_model7_feat( features, stack, buff, input_sentence, arcs, labels, tType, self.DEP_FEAT, self.STACK_SOURCE, 0, -1) #dep for word before top in input pre_top_pos = self.get_model7_params(stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1) top_pos = self.get_model7_params(stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0) cfeat_1 = self.compose_feats( features, [feat5_model7, pre_top_pos]) #lex_pos of pre-top cfeat_2 = self.compose_feats(features, [feat6_model7, top_pos]) #lex_pos for top cfeat_3 = self.compose_feats( features, [feat7_model7, feat71_model7]) #lex_pos for next buffer item cfeat_4 = self.compose_feats( features, [feat75_model7, feat76_model7]) #lex_pos for next-next buffer item cfeat_5 = self.compose_feats( features, [cfeat_1, cfeat_2]) #lex_pos for both pre-top and top cfeat_6 = self.compose_feats( features, [cfeat_1, feat6_model7]) #lex_pos of pre-top with lex of top cfeat_7 = self.compose_feats( features, [feat5_model7, cfeat_2]) #lex of pre-top with lex_pos of top cfeat_8 = self.compose_feats( features, [cfeat_1, top_pos]) #lex_pos of pre-top with pos of top cfeat_9 = self.compose_feats( features, [pre_top_pos, cfeat_2]) #pos of pre-top with lex_pos of top cfeat_10 = self.compose_feats( features, [feat5_model7, feat6_model7]) #lex of both pre_top and top cfeat_11 = self.compose_feats( features, [pre_top_pos, top_pos]) #pos of both pre_top and top cfeat_12 = self.compose_feats( features, [top_pos, feat71_model7]) #pos of top and next buff cfeat_13 = self.compose_feats(features, [top_pos, feat71_model7, feat76_model7 ]) #pos for top next and next next cfeat_14 = self.compose_feats(features, [pre_top_pos, top_pos, feat71_model7 ]) #pos for pre-top, top and next cfeat_15 = self.compose_feats(features, [pre_top_pos, feat21_model7, top_pos ]) #pos for pre-top pre top lmc and top cfeat_16 = self.compose_feats( features, [pre_top_pos, feat31_model7, top_pos ]) #pos for pre-top, pre-top rmc and top cfeat_17 = self.compose_feats(features, [pre_top_pos, top_pos, feat41_model7 ]) #pos for pre-top, top and top's lmc # Top two POS tags from the stack for i in range(3): #was originally 2 if i >= len(stack): break s = stack[-(i + 1)] pos = s[3] features['transition=%d,s%d.pos=%s' % (tType, i, pos)] = 1 # Next four POS tags from the buffer for i in range(3): if i >= len(buff): break b = buff[-(i + 1)] pos = b[3] features['transition=%d,b%d.pos=%s' % (tType, i, pos)] = 1 # Previous transition type if len(previous_transitions) > 0: prev = previous_transitions[-1] features['transition=%d,prev_transition=%d' % (tType, prev)] = 1 else: features['transition=%d,prev_transition=None' % (tType)] = 1 # Bias feature features['transition=%d' % (transition.transitionType)] = 1 if self.labeled: # Action and label pair features['transition=%d,label=%s' % (transition.transitionType, transition.label)] = 1 # Label bias features['label=%s' % (transition.label)] = 1 #Features based on http://dl.acm.org/citation.cfm?id=2002777 #Distance function if len(stack) > 0 and len(buff) > 0: dist = h.get_id(stack[-1]) - h.get_id(buff[-1]) if dist < 0: features['transition=%d,neg_dist=' % (tType)] = dist else: features['transition=%d,pos_dist=' % (tType)] = dist #Valency function if len(stack) > 1: if tType == Transition.LeftArc: # Left Arc [left_valency, right_valency] = self.get_valency(arcs, h.get_id(stack[-1])) features['transition=%d,head_left_valency=%d' % (tType, left_valency)] = 1 features['transition=%d,head_right_valency=%d' % (tType, right_valency)] = 1 elif tType == Transition.RightArc: #should probably check for right arc here! [left_valency, right_valency] = self.get_valency(arcs, h.get_id(stack[-2])) features['transition=%d,head_left_valency=%d' % (tType, left_valency)] = 1 features['transition=%d,head_right_valency=%d' % (tType, right_valency)] = 1 return features