Example #1
0
 def getTransition(self, stack, buff, leftmostChildren, rightmostChildren, arcs, labeled):
     """This function should return a Transition object representing the correct action to
     to take according to the oracle."""
     if len(stack) > 1:
         top = stack[-1]
         pre_top = stack[-2]
         rmc_top = rightmostChildren.get(p.get_id(top), -1)
         rmc_pre_top = rightmostChildren.get(p.get_id(pre_top), -1)
         lmc_top = leftmostChildren.get(p.get_id(top), p.INFINITY)
         lmc_pre_top = leftmostChildren.get(p.get_id(pre_top), p.INFINITY)
         if ( p.get_head(pre_top) == p.get_id(top) and self.is_removable(pre_top, arcs, lmc_pre_top, rmc_pre_top) ):
             if labeled:
                 return(Transition(Transition.LeftArc, p.get_deprel(pre_top)))
             else:
                 return(Transition(Transition.LeftArc, None))
         elif ( p.get_head(top) == p.get_id(pre_top) and self.is_removable(top, arcs, lmc_top, rmc_top) ):
             if labeled:
                 return(Transition(Transition.RightArc, p.get_deprel(top)))
             else:
                 return(Transition(Transition.RightArc, None))
         else:
             return(Transition(Transition.Shift, None))
     else:
         if len(buff) >= 1:
             return(Transition(Transition.Shift, None))
         else:
             return(None)
	def __prepare_for_first_contour_connecting(self):
		# cv2.imshow("first", self.cv_image)
		dilated_image = Helper.dilate_image(self.cv_image)
		# cv2.imshow("first dilate", dilated_image)
		# self.steps.append(("con_dilated", dilated_image))
		dilated_mask = Helper.convert_image_to_mask(dilated_image)
		# cv2.imshow("mask", dilated_mask)
		self.steps.append(("con_gray", dilated_mask))
		gray_denoised_image = cv2.fastNlMeansDenoising(dilated_mask, None, 5, 7, 21)
		self.steps.append(("con_denoised", gray_denoised_image))
		# cv2.imshow("denoised", gray_denoised_image)
		threshold_image = cv2.adaptiveThreshold(gray_denoised_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
		self.steps.append(("con_threshold", threshold_image))
		# cv2.imshow("threshold", threshold_image)

		# ERODED = cv2.erode(threshold_image, (2,2), iterations=1)
		# cv2.imshow("ERODED", ERODED)
		# DILATED = cv2.dilate(ERODED, (2,2), iterations=1)
		# cv2.imshow("DILATED", DILATED)
		# DENOISED = cv2.fastNlMeansDenoising(threshold_image, None, 5, 7, 21)
		# cv2.imshow("DENOISED", DENOISED)

		prepared_mask = cv2.bitwise_and(threshold_image, threshold_image, mask=self.image_masks.topo_mask)
		self.steps.append(("con_mask", prepared_mask))
		cv2.imshow('prepared', prepared_mask)
		return prepared_mask
Example #3
0
 def getTransition(self, stack, buff, leftmostChildren, rightmostChildren,
                   arcs, labeled):
     """This function should return a Transition object representing the correct action to
     to take according to the oracle."""
     if len(stack) > 1:
         top = stack[-1]
         pre_top = stack[-2]
         rmc_top = rightmostChildren.get(p.get_id(top), -1)
         rmc_pre_top = rightmostChildren.get(p.get_id(pre_top), -1)
         lmc_top = leftmostChildren.get(p.get_id(top), p.INFINITY)
         lmc_pre_top = leftmostChildren.get(p.get_id(pre_top), p.INFINITY)
         if (p.get_head(pre_top) == p.get_id(top) and self.is_removable(
                 pre_top, arcs, lmc_pre_top, rmc_pre_top)):
             if labeled:
                 return (Transition(Transition.LeftArc,
                                    p.get_deprel(pre_top)))
             else:
                 return (Transition(Transition.LeftArc, None))
         elif (p.get_head(top) == p.get_id(pre_top)
               and self.is_removable(top, arcs, lmc_top, rmc_top)):
             if labeled:
                 return (Transition(Transition.RightArc, p.get_deprel(top)))
             else:
                 return (Transition(Transition.RightArc, None))
         else:
             return (Transition(Transition.Shift, None))
     else:
         if len(buff) >= 1:
             return (Transition(Transition.Shift, None))
         else:
             return (None)
Example #4
0
 def get_input_offset_token(self, token, input_sentence,
                            input_offset):  #CONFIRM THIS!!!
     if h.get_id(token) + input_offset <= 0:  #To prevent wraparound
         return (None)
     return (self.try_get_token(input_sentence,
                                -(h.get_id(token) + input_offset))
             )  # No +1 since the token id starts from 1 instead of 0
Example #5
0
 def get_all_siblings(self, token, input_sentence, arcs):
     if arcs.get(h.get_id(token), None) is None:
         return([])
     all_siblings = []
     for word in input_sentence:
         if h.get_id(word) != 0 and h.get_id(word) != h.get_id(token) and arcs.get(h.get_id(word), None) is not None and arcs[h.get_id(word)] == arcs[h.get_id(token)]:
             all_siblings += [word]
     return(all_siblings)
	def __connect_contours_by_distances(self, mask, distances, min_contour_area):
		contour_connector = ContourConnector(mask)

		for distance in distances:
			contour_connector.connect_contours_within_distance(distance)
			Helper.reduce_image_contours(contour_connector.connected_contours_mask, min_contour_area)

		return contour_connector.connected_contours_mask
Example #7
0
 def output(self, sentence):
     for token in sentence:
         head = self.arcs.get(h.get_id(token), '0')
         label = self.labels.get(h.get_id(token), '_')
         label = label if label is not None else '_'
         token[6] = str(head)
         token[7] = str(label)
         print '\t'.join(token)
     print
Example #8
0
 def initialize(self, sentence):
     self.root = ['0', 'ROOT', 'ROOT', 'ROOT', 'ROOT', 'ROOT', '-1', 'ROOT', 'ROOT', 'ROOT']
     self.buff = [self.root] + list(reversed(sentence))
     self.stack = list()
     self.arcs = {} #arcs is actually a mapping from child to parent (child's head)
     self.labels = {}
     self.transitions = list()
     self.leftmostChildren = h.get_leftmost_children(sentence) #Map from parent to leftmost child
     self.rightmostChildren = h.get_rightmost_children(sentence) #Map from parent to rightmost child
	def __prepare_for_second_contour_connecting(self, mask):
		reduced_mask = Helper.reduce_image_contours(mask, 1)
		self.steps.append(("con_reduced", reduced_mask))
		# cv2.imshow("reduced", reduced_mask)
		dilated_mask = Helper.dilate_image(reduced_mask)
		# self.steps.append(("con_dilated", dilated_mask))
		# cv2.imshow("second dilated", dilated_mask)
		# Helper.show_images_and_wait([])
		return dilated_mask
Example #10
0
 def output(self, sentence):
     for token in sentence:
         head = self.arcs.get(h.get_id(token), '0')
         label = self.labels.get(h.get_id(token), '_')
         label = label if label is not None else '_'
         token[6] = str(head)
         token[7] = str(label)
         print '\t'.join(token)
     print
    def __generate_general_color_lines_mask(self, low_range, high_range):
        color_range = self.__get_image_in_range_from_hsv(low_range, high_range)
        filled_contours = self.__get_filled_contours_from_image(color_range)
        contour_mask = Helper.convert_image_to_contour_mask(filled_contours)
        dilated = Helper.dilate_image(contour_mask, array=(2, 2))
        mask = Helper.reduce_image_contours(dilated,
                                            6,
                                            line_thickness=cv2.FILLED)

        return mask
    def __generate_green_mask(self):
        green_range = self.__get_image_in_range_from_hsv(
            MaskGenerator.low_green, MaskGenerator.high_green)
        filled_green_contours = self.__get_filled_contours_from_image(
            green_range)
        green_mask = Helper.convert_image_to_mask(filled_green_contours)
        green_mask_reduced = Helper.reduce_image_contours(
            green_mask, 200, line_thickness=cv2.FILLED)

        return green_mask_reduced
Example #13
0
 def get_all_children(self, token, input_sentence, arcs):
     all_children = {}
     head_id = h.get_id(token)
     for tail in arcs.keys():
         if arcs[tail] == head_id:
             child_token = self.try_get_token(input_sentence, -(tail))
             if child_token is not None:
                 all_children[h.get_id(child_token)] = child_token
             else:
                 print >> sys.stderr, 'Non-existent child, should NOT happen!!!'
     return (all_children)
Example #14
0
 def get_all_children(self, token, input_sentence, arcs):
     all_children = {}
     head_id = h.get_id(token)
     for tail in arcs.keys():
         if arcs[tail] == head_id:
             child_token = self.try_get_token(input_sentence, -(tail))
             if child_token is not None:
                 all_children[h.get_id(child_token)] = child_token
             else:
                 print >>sys.stderr, 'Non-existent child, should NOT happen!!!'
     return(all_children)
    def __generate_black_mask(self):
        black_range = self.__get_image_in_range_from_hsv(
            MaskGenerator.low_black, MaskGenerator.high_black)
        filled_contours = self.__get_filled_contours_from_image(black_range)
        contours_mask = Helper.convert_image_to_mask(filled_contours)
        dilated = Helper.dilate_image(contours_mask, array=self.dilate_array)
        black_mask = Helper.reduce_image_contours(dilated,
                                                  6,
                                                  line_thickness=cv2.FILLED)

        return black_mask
    def __generate_blue_mask(self):
        blue_range = self.__get_image_in_range_from_hsv(
            MaskGenerator.low_blue, MaskGenerator.high_blue)
        self.image_masks.steps.append(("blue_range", blue_range))
        filled_blue_contours = self.__get_filled_contours_from_image(
            blue_range)
        self.image_masks.steps.append(("blue_filled", filled_blue_contours))
        blue_mask = Helper.convert_image_to_mask(filled_blue_contours)
        dilated = Helper.dilate_image(blue_mask, array=(2, 2))
        self.image_masks.steps.append(("blue_dilated", dilated))
        blue_mask = Helper.reduce_image_contours(dilated,
                                                 15,
                                                 line_thickness=cv2.FILLED)

        return blue_mask
Example #17
0
 def initialize(self, sentence):
     self.root = [
         '0', 'ROOT', 'ROOT', 'ROOT', 'ROOT', 'ROOT', '-1', 'ROOT', 'ROOT',
         'ROOT'
     ]
     self.buff = [self.root] + list(reversed(sentence))
     self.stack = list()
     self.arcs = {
     }  #arcs is actually a mapping from child to parent (child's head)
     self.labels = {}
     self.transitions = list()
     self.leftmostChildren = h.get_leftmost_children(
         sentence)  #Map from parent to leftmost child
     self.rightmostChildren = h.get_rightmost_children(
         sentence)  #Map from parent to rightmost child
    def __generate_red_mask(self):
        red_range = self.__get_image_in_range_from_hsv(MaskGenerator.low_red,
                                                       MaskGenerator.high_red)
        # cv2.imshow("red range", red_range)
        filled_contours = self.__get_filled_contours_from_image(red_range)
        # cv2.imshow("filled contours", filled_contours)
        contours_mask = Helper.convert_image_to_mask(filled_contours)
        # cv2.imshow("contours mask", contours_mask)
        dilated = Helper.dilate_image(contours_mask, array=self.dilate_array)
        # cv2.imshow("dilated", dilated)
        reduced = Helper.reduce_image_contours(dilated,
                                               6,
                                               line_thickness=cv2.FILLED)
        # cv2.imshow("red range", red_range)

        return reduced
    def __get_min_contour_dist(self):
        angle = Helper.convert_grade_to_angle(self.user_settings.max_grade)
        min_feet_dist = self.user_settings.get_contour_interval_dist(
        ) / math.tan(math.radians(angle))
        min_pixel_dist = int(
            min_feet_dist / self.user_settings.get_feet_per_pixel()) + 1

        return min_pixel_dist
Example #20
0
 def is_removable(self, word, arcs, leftmost_child, rightmost_child):
     if ((leftmost_child != p.INFINITY
          and not arcs.get(leftmost_child, False))
             or (rightmost_child != -1 and not arcs.get(
                 rightmost_child, False))):  #CHeck if thie value is correct
         return (False)
     if (p.is_root(word)):
         return (False)
     return True
Example #21
0
    def get_model7_params(self, stack, buff, input_sentence, arcs, labels, tType, feat_type, source_type, source_offset = 0, input_offset = 0, head_multiplier = 0, left_rightmost_multiplier = 0, left_right_sibling_specifier = 0, suffix_len = 0): #Described here: http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf
        assert feat_type in [self.DEP_FEAT, self.POS_FEAT, self.LEX_FEAT], "Invalid feat_type specified"
        assert source_type in [self.BUFF_SOURCE, self.STACK_SOURCE, self.INPUT_SOURCE], "Invalid source type specified"
        assert source_offset >= 0, "Invalid source_offset"
        assert head_multiplier >= 0, "Invalid head multiplier specified"
        rev_input_sentence = input_sentence[::-1]
        source = self.get_source(stack, buff, input_sentence, source_type)#Reverse the input sentence if it isn't already reversed by get_source

        token = self.try_get_token(source, -(source_offset + 1))
        if token is None:
            return(None)
        if input_offset != 0:
            token = self.get_input_offset_token(token, rev_input_sentence, input_offset)
        if token is None:
            return(None)
        token = self.get_head_offset_token(token, rev_input_sentence, head_multiplier, arcs)
        if token is None:
            return(None)
        token = self.get_left_rightmost_child(token, rev_input_sentence, arcs, left_rightmost_multiplier)
        if token is None:
            return(None)
        token = self.get_left_right_sibling(token, rev_input_sentence, arcs, left_right_sibling_specifier)
        if token is None:
            return(None)
        ret_str = 'transition=%d,feat_type=%d,source_type=%d,source_offset=%d,input_offset=%d,head_multiplier=%d,left_rightmost_multiplier=%d,left_right_sibling_specifier=%d' % (tType, feat_type, source_type, source_offset, input_offset, head_multiplier , left_rightmost_multiplier, left_right_sibling_specifier)
        if feat_type == self.LEX_FEAT:
            #suffix len can be specified via argument
            lex_feat = h.get_word(token)
            if suffix_len > 0:
                ret_str += 'lex_feat=%s' %(lex_feat[-suffix_len:])
            else:
                ret_str += 'lex_feat=%s' %(lex_feat)
        elif feat_type == self.DEP_FEAT:
            dep_feat = labels.get(h.get_id(token), None)
            if  dep_feat is not None:
                ret_str += 'dep_feat=%s' %(dep_feat)
            else:
                return(None)
        elif feat_type == self.POS_FEAT:
            pos_feat = h.get_postag(token)
            ret_str += 'pos_feat=%s' % (pos_feat)
        else:
            return(None)
        return(ret_str)
Example #22
0
 def get_head_offset_token(self, token, input_sentence, head_multiplier, arcs):
     while (token is not None and head_multiplier > 0):
         head_multiplier -= 1
         token_id = h.get_id(token)
         head_id = arcs.get(token_id, None)
         if head_id == None:
             token = None
         else:
             token = self.try_get_token(input_sentence, -head_id)
     return(token)
    def __get_grade_for_pixel_distance(self, pixel_dist):
        feet_dist = (pixel_dist * self.user_settings.get_feet_per_pixel())

        theta = math.atan(self.user_settings.get_contour_interval_dist() /
                          feet_dist)
        angle = math.degrees(theta)

        grade = Helper.convert_angle_to_grade(angle)

        return grade
Example #24
0
 def get_head_offset_token(self, token, input_sentence, head_multiplier,
                           arcs):
     while (token is not None and head_multiplier > 0):
         head_multiplier -= 1
         token_id = h.get_id(token)
         head_id = arcs.get(token_id, None)
         if head_id == None:
             token = None
         else:
             token = self.try_get_token(input_sentence, -head_id)
     return (token)
Example #25
0
 def execute_transition(self, transition):
     """This function should take a transition object and apply to the
 	current parser state. It need not return anything."""
     self.transitions.append(transition.transitionType)
     if (transition.transitionType == Transition.Shift):
         self.stack.append(self.buff.pop())
     elif (transition.transitionType == Transition.LeftArc):
         top = self.stack.pop()
         top_id = h.get_id(top)
         pre_top = self.stack.pop()
         pre_top_id = h.get_id(pre_top)
         self.stack.append(top)
         self.arcs[pre_top_id] = top_id
         self.labels[pre_top_id] = transition.label
     else:
         top = self.stack.pop()
         top_id = h.get_id(top)
         pre_top = self.stack[-1]
         pre_top_id = h.get_id(pre_top)
         self.arcs[top_id] = pre_top_id
         self.labels[top_id] = transition.label
Example #26
0
 def execute_transition(self, transition):
     """This function should take a transition object and apply to the
 	current parser state. It need not return anything."""
     self.transitions.append(transition.transitionType)
     if (transition.transitionType == Transition.Shift):
         self.stack.append(self.buff.pop())
     elif (transition.transitionType == Transition.LeftArc):
         top = self.stack.pop()
         top_id= h.get_id(top)
         pre_top = self.stack.pop()
         pre_top_id = h.get_id(pre_top)
         self.stack.append(top)
         self.arcs[pre_top_id] = top_id
         self.labels[pre_top_id] = transition.label
     else:
         top = self.stack.pop()
         top_id = h.get_id(top)
         pre_top = self.stack[-1]
         pre_top_id = h.get_id(pre_top)
         self.arcs[top_id] = pre_top_id
         self.labels[top_id] = transition.label
Example #27
0
 def get_leftmost_child(self, token, input_sentence, arcs, leftmost_multiplier):
     assert leftmost_multiplier < 0, "Invalid leftmost_multiplier passed"
     while token is not None and leftmost_multiplier < 0:
         leftmost_multiplier += 1
         all_children = self.get_all_children(token, input_sentence, arcs)
         if len(all_children) == 0 :
             return(None)
         min_candidate = min(all_children.keys())
         if (min_candidate < h.get_id(token)):
             token = all_children[min_candidate]
         else:
             return(None)
     return(token)
Example #28
0
 def get_rightmost_child(self, token, input_sentence, arcs, rightmost_multiplier):
     assert rightmost_multiplier > 0, "Invalid rightmost_multiplier passed"
     while token is not None and rightmost_multiplier > 0:
         rightmost_multiplier -= 1
         all_children = self.get_all_children(token, input_sentence, arcs)
         if len(all_children) == 0 :
             return(None)
         max_candidate = max(all_children.keys())
         if (max_candidate > h.get_id(token)):
             token = all_children[max_candidate]
         else:
             return(None)
     return(token)
Example #29
0
 def get_all_siblings(self, token, input_sentence, arcs):
     if arcs.get(h.get_id(token), None) is None:
         return ([])
     all_siblings = []
     for word in input_sentence:
         if h.get_id(word) != 0 and h.get_id(
                 word) != h.get_id(token) and arcs.get(
                     h.get_id(word), None) is not None and arcs[h.get_id(
                         word)] == arcs[h.get_id(token)]:
             all_siblings += [word]
     return (all_siblings)
Example #30
0
 def get_leftmost_child(self, token, input_sentence, arcs,
                        leftmost_multiplier):
     assert leftmost_multiplier < 0, "Invalid leftmost_multiplier passed"
     while token is not None and leftmost_multiplier < 0:
         leftmost_multiplier += 1
         all_children = self.get_all_children(token, input_sentence, arcs)
         if len(all_children) == 0:
             return (None)
         min_candidate = min(all_children.keys())
         if (min_candidate < h.get_id(token)):
             token = all_children[min_candidate]
         else:
             return (None)
     return (token)
Example #31
0
 def get_rightmost_child(self, token, input_sentence, arcs,
                         rightmost_multiplier):
     assert rightmost_multiplier > 0, "Invalid rightmost_multiplier passed"
     while token is not None and rightmost_multiplier > 0:
         rightmost_multiplier -= 1
         all_children = self.get_all_children(token, input_sentence, arcs)
         if len(all_children) == 0:
             return (None)
         max_candidate = max(all_children.keys())
         if (max_candidate > h.get_id(token)):
             token = all_children[max_candidate]
         else:
             return (None)
     return (token)
Example #32
0
 def get_right_sibling(self, token, input_sentence, arcs, right_sibling_multiplier):
     assert right_sibling_multiplier > 0, "Invalid right sibling multiplier"
     all_siblings = self.get_all_siblings(token, input_sentence, arcs)
     while(token is not None and right_sibling_multiplier > 0):
         right_sibling_multiplier -= 1
         if len(all_siblings) == 0:
             return(None)
         min_dist = h.INFINITY
         nearest_sibling = None
         for sibling in all_siblings:
             if h.get_id(sibling) > h.get_id(token) and abs(h.get_id(sibling) - h.get_id(token)) < min_dist:
                 min_dist = abs(h.get_id(sibling) - h.get_id(token))
                 nearest_sibling = sibling
         token = nearest_sibling
         # NOTE: It is possible NOT that we keep cycling between siblings.
     return(token)
    def __get_sub_image(self):
        rows, cols, chan = self.image.shape

        sub_image = self.image[int(self.__bottom_thresh *
                                   rows):rows,  # bottom rows
                               int(self.__left_thresh *
                                   cols):int(self.__right_thresh *
                                             cols)  # middle rows
                               ]

        sub_image = cv2.resize(sub_image,
                               None,
                               fx=self.__resize_factor,
                               fy=self.__resize_factor,
                               interpolation=cv2.INTER_LINEAR)

        sub_image = Helper.convert_image_to_mask(sub_image)
        gray_denoised_image = cv2.fastNlMeansDenoising(sub_image, None, 5, 7,
                                                       21)
        threshold_image = cv2.threshold(gray_denoised_image, 225, 255,
                                        cv2.THRESH_BINARY_INV)[1]

        return sub_image
Example #34
0
 def get_right_sibling(self, token, input_sentence, arcs,
                       right_sibling_multiplier):
     assert right_sibling_multiplier > 0, "Invalid right sibling multiplier"
     all_siblings = self.get_all_siblings(token, input_sentence, arcs)
     while (token is not None and right_sibling_multiplier > 0):
         right_sibling_multiplier -= 1
         if len(all_siblings) == 0:
             return (None)
         min_dist = h.INFINITY
         nearest_sibling = None
         for sibling in all_siblings:
             if h.get_id(sibling) > h.get_id(token) and abs(
                     h.get_id(sibling) - h.get_id(token)) < min_dist:
                 min_dist = abs(h.get_id(sibling) - h.get_id(token))
                 nearest_sibling = sibling
         token = nearest_sibling
         # NOTE: It is possible NOT that we keep cycling between siblings.
     return (token)
Example #35
0
 def get_input_offset_token(self, token, input_sentence, input_offset):
     if h.get_id(token) + input_offset <= 0: #To prevent wraparound
         return(None)
     return(self.try_get_token(input_sentence, -(h.get_id(token) + input_offset))) # No +1 since the token id starts from 1 instead of 0
Example #36
0
    def get_model7_params(
        self,
        stack,
        buff,
        input_sentence,
        arcs,
        labels,
        tType,
        feat_type,
        source_type,
        source_offset=0,
        input_offset=0,
        head_multiplier=0,
        left_rightmost_multiplier=0,
        left_right_sibling_specifier=0,
        suffix_len=0
    ):  #Described here: http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf
        assert feat_type in [self.DEP_FEAT, self.POS_FEAT,
                             self.LEX_FEAT], "Invalid feat_type specified"
        assert source_type in [
            self.BUFF_SOURCE, self.STACK_SOURCE, self.INPUT_SOURCE
        ], "Invalid source type specified"
        assert source_offset >= 0, "Invalid source_offset"
        assert head_multiplier >= 0, "Invalid head multiplier specified"
        rev_input_sentence = input_sentence[::-1]
        source = self.get_source(
            stack, buff, input_sentence, source_type
        )  #Reverse the input sentence if it isn't already reversed by get_source

        token = self.try_get_token(source, -(source_offset + 1))
        if token is None:
            return (None)
        if input_offset != 0:
            token = self.get_input_offset_token(token, rev_input_sentence,
                                                input_offset)
        if token is None:
            return (None)
        token = self.get_head_offset_token(token, rev_input_sentence,
                                           head_multiplier, arcs)
        if token is None:
            return (None)
        token = self.get_left_rightmost_child(token, rev_input_sentence, arcs,
                                              left_rightmost_multiplier)
        if token is None:
            return (None)
        token = self.get_left_right_sibling(token, rev_input_sentence, arcs,
                                            left_right_sibling_specifier)
        if token is None:
            return (None)
        ret_str = 'transition=%d,feat_type=%d,source_type=%d,source_offset=%d,input_offset=%d,head_multiplier=%d,left_rightmost_multiplier=%d,left_right_sibling_specifier=%d' % (
            tType, feat_type, source_type, source_offset, input_offset,
            head_multiplier, left_rightmost_multiplier,
            left_right_sibling_specifier)
        if feat_type == self.LEX_FEAT:
            #suffix len can be specified via argument
            lex_feat = h.get_word(token)
            if suffix_len > 0:
                ret_str += 'lex_feat=%s' % (lex_feat[-suffix_len:])
            else:
                ret_str += 'lex_feat=%s' % (lex_feat)
        elif feat_type == self.DEP_FEAT:
            dep_feat = labels.get(h.get_id(token), None)
            if dep_feat is not None:
                ret_str += 'dep_feat=%s' % (dep_feat)
            else:
                return (None)
        elif feat_type == self.POS_FEAT:
            pos_feat = h.get_postag(token)
            ret_str += 'pos_feat=%s' % (pos_feat)
        else:
            return (None)
        return (ret_str)
Example #37
0
    def extract_features(self, transition, stack, buff, labels,
                         previous_transitions, arcs, input_sentence):
        features = defaultdict(float)
        #tType = transition.transitionType
        tType = -1  #Dummy value since this is not encoded in the feature for SVM
        label = 'dummy_label'  #Dummy label since this is not encoded in the feature for SVM

        #Model7 features as described in http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf

        feat21_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.POS_FEAT,
                                             self.STACK_SOURCE, 1, 0, 0,
                                             -1)  #pos for pre-top's lmc
        feat31_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.POS_FEAT,
                                             self.STACK_SOURCE, 1, 0, 0,
                                             1)  #pos or pre-top's rmc
        feat41_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.POS_FEAT,
                                             self.STACK_SOURCE, 0, 0, 0,
                                             -1)  #pos for top's lmc
        feat42_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.POS_FEAT,
                                             self.STACK_SOURCE, 0, 0, 0,
                                             1)  #pos for top's rmc
        feat5_model7 = self.add_model7_feat(features, stack, buff,
                                            input_sentence, arcs, labels,
                                            tType, self.LEX_FEAT,
                                            self.STACK_SOURCE,
                                            1)  #lex for pre-top
        feat6_model7 = self.add_model7_feat(features, stack, buff,
                                            input_sentence, arcs, labels,
                                            tType, self.LEX_FEAT,
                                            self.STACK_SOURCE, 0)  #lex for top

        feat71_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.BUFF_SOURCE)  #pos for next buffer item

        feat76_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.POS_FEAT,
                                             self.BUFF_SOURCE,
                                             1)  #pos for next-next buffer item

        feat10_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.STACK_SOURCE, 1,
            1)  #pos for word after pre-top in input
        feat11_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.STACK_SOURCE, 1,
            -1)  #pos for word before pre-top in input

        feat12_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.DEP_FEAT, self.STACK_SOURCE, 1,
            1)  #dep for word after pre-top in input
        feat13_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.DEP_FEAT, self.STACK_SOURCE, 1,
            -1)  #dep for word before pre-top in input

        feat14_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.LEX_FEAT, self.STACK_SOURCE, 0,
            1)  #lex for word after top in input

        feat16_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.STACK_SOURCE, 0,
            1)  #pos for word after top in input
        feat17_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.STACK_SOURCE, 0,
            -1)  #pos for word before top in input

        feat18_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.DEP_FEAT, self.STACK_SOURCE, 0,
            1)  #dep for word after top in input
        feat19_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.DEP_FEAT, self.STACK_SOURCE, 0,
            -1)  #dep for word before top in input

        pre_top_pos = self.get_model7_params(stack, buff, input_sentence, arcs,
                                             labels, tType, self.POS_FEAT,
                                             self.STACK_SOURCE, 1)
        top_pos = self.get_model7_params(stack, buff, input_sentence, arcs,
                                         labels, tType, self.POS_FEAT,
                                         self.STACK_SOURCE, 0)

        cfeat_11 = self.compose_feats(
            features, [pre_top_pos, top_pos])  #pos of both pre_top and top

        cfeat_12 = self.compose_feats(
            features, [top_pos, feat71_model7])  #pos of top and next buff

        cfeat_13 = self.compose_feats(features,
                                      [top_pos, feat71_model7, feat76_model7
                                       ])  #pos for top next and next next

        cfeat_17 = self.compose_feats(features,
                                      [pre_top_pos, top_pos, feat41_model7
                                       ])  #pos for pre-top, top and top's lmc
        cfeat_175 = self.compose_feats(features,
                                       [pre_top_pos, top_pos, feat42_model7
                                        ])  #pos for pre-top, top and top's rmc

        # Top two POS tags from the stack
        for i in range(3):  #was originally 2
            if i >= len(stack):
                break
            s = stack[-(i + 1)]
            pos = s[3]
            features['transition=%d,s%d.pos=%s' % (tType, i, pos)] = 1

        # Next 2 POS tags from the buffer
        for i in range(2):
            if i >= len(buff):
                break
            b = buff[-(i + 1)]
            pos = b[3]
            features['transition=%d,b%d.pos=%s' % (tType, i, pos)] = 1

        # Previous transition type
        if len(previous_transitions) > 0:
            prev = previous_transitions[-1]
            features['transition=%d,prev_transition=%d' % (tType, prev)] = 1
        else:
            features['transition=%d,prev_transition=None' % (tType)] = 1

        if self.labeled and transition is not None:  #We don't care about labelled case and transition should not be passed for SVM
            # Action and label pair
            features['transition=%d,label=%s' %
                     (transition.transitionType, transition.label)] = 1
            # Label bias
            features['label=%s' % (transition.label)] = 1

        #Features based on http://dl.acm.org/citation.cfm?id=2002777
        #Distance function
        if len(stack) > 0 and len(buff) > 0:
            dist = h.get_id(stack[-1]) - h.get_id(buff[-1])
            if dist < 0:
                features['transition=%d,neg_dist=' % (tType)] = dist
            else:
                features['transition=%d,pos_dist=' % (tType)] = dist

        #Valency function
        if (len(stack) > 1):
            [left_valency,
             right_valency] = self.get_valency(arcs, h.get_id(stack[-1]))
            left_val_feat = 'transition=%d,head_left_valency=%d' % (
                tType, left_valency)
            features[left_val_feat + top_pos] = 1
            features[left_val_feat] = 1
            right_val_feat = 'transition=%d,head_right_valency=%d' % (
                tType, right_valency)
            features[right_val_feat] = 1
            features[right_val_feat + top_pos] = 1

        return features
Example #38
0
    def extract_features(self, transition, stack, buff, labels, previous_transitions, arcs, input_sentence):
        features = defaultdict(float)
        tType = transition.transitionType
        label = transition.label

        #Model7 features as described in http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf

        feat1_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1)#dep for pre-top
        feat2_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1, 0, 0, -1) #dep for pre-top's lmc
        feat21_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 1, 0, 0, -1) #pos for pre-top's lmc
        feat3_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1, 0, 0, 1)#dep or pre-top's rmc
        feat31_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 1, 0, 0, 1)#pos or pre-top's rmc
        feat4_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 0, 0, 0, -1)#dep for top's lmc
        feat41_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 0, 0, 0, -1)#pos for top's lmc
        feat5_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 1)#lex for pre-top
        feat6_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 0)#lex for top

        feat7_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.BUFF_SOURCE)#lex for next buffer item
        feat71_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.BUFF_SOURCE)#pos for next buffer item

        feat75_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.BUFF_SOURCE, 1)#lex for next-next buffer item
        feat76_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.BUFF_SOURCE, 1)#pos for next-next buffer item

        feat8_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 1, 1)#lex for word after pre-top in input
        feat9_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 1, -1)#lex for word before pre-top in input

        feat10_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 1, 1)#pos for word after pre-top in input
        feat11_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 1, -1)#pos for word before pre-top in input

        feat12_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1, 1)#dep for word after pre-top in input
        feat13_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 1, -1)#dep for word before pre-top in input

        feat14_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 0, 1)#lex for word after top in input
        feat15_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.LEX_FEAT, self.STACK_SOURCE, 0, -1)#lex for word before top in input

        feat16_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 0, 1)#pos for word after top in input
        feat17_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.POS_FEAT, self.STACK_SOURCE, 0, -1)#pos for word before top in input

        feat18_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 0, 1)#dep for word after top in input
        feat19_model7 = self.add_model7_feat(features, stack, buff, input_sentence, arcs, labels, tType,self.DEP_FEAT, self.STACK_SOURCE, 0, -1)#dep for word before top in input

        pre_top_pos = self.get_model7_params(stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 1)
        top_pos = self.get_model7_params(stack, buff, input_sentence, arcs, labels, tType, self.POS_FEAT, self.STACK_SOURCE, 0)
        cfeat_1 = self.compose_feats(features, [feat5_model7, pre_top_pos])#lex_pos of pre-top
        cfeat_2 = self.compose_feats(features, [feat6_model7, top_pos])#lex_pos for top

        cfeat_3 = self.compose_feats(features, [feat7_model7, feat71_model7])#lex_pos for next buffer item
        cfeat_4 = self.compose_feats(features, [feat75_model7,feat76_model7])#lex_pos for next-next buffer item

        cfeat_5 = self.compose_feats(features, [cfeat_1, cfeat_2])#lex_pos for both pre-top and top
        cfeat_6 = self.compose_feats(features, [cfeat_1, feat6_model7])#lex_pos of pre-top with lex of top
        cfeat_7 = self.compose_feats(features, [feat5_model7, cfeat_2])#lex of pre-top with lex_pos of top
        cfeat_8 = self.compose_feats(features, [cfeat_1, top_pos])#lex_pos of pre-top with pos of top
        cfeat_9 = self.compose_feats(features, [pre_top_pos, cfeat_2]) #pos of pre-top with lex_pos of top
        cfeat_10 = self.compose_feats(features, [feat5_model7, feat6_model7])#lex of both pre_top and top
        cfeat_11 = self.compose_feats(features, [pre_top_pos, top_pos])#pos of both pre_top and top
        cfeat_12 = self.compose_feats(features, [top_pos, feat71_model7])#pos of top and next buff
        cfeat_13 = self.compose_feats(features, [top_pos, feat71_model7, feat76_model7])#pos for top next and next next
        cfeat_14 = self.compose_feats(features, [pre_top_pos, top_pos, feat71_model7])#pos for pre-top, top and next
        cfeat_15 = self.compose_feats(features, [pre_top_pos, feat21_model7, top_pos])#pos for pre-top pre top lmc and top
        cfeat_16 = self.compose_feats(features, [pre_top_pos, feat31_model7, top_pos])#pos for pre-top, pre-top rmc and top
        cfeat_17 = self.compose_feats(features, [pre_top_pos, top_pos, feat41_model7])#pos for pre-top, top and top's lmc


        # Top two POS tags from the stack
        for i in range(3):#was originally 2
            if i >= len(stack):
                break
            s = stack[-(i+1)]
            pos = s[3]
            features['transition=%d,s%d.pos=%s' % (tType, i, pos)] = 1

        # Next four POS tags from the buffer
        for i in range(3):
            if i >= len(buff):
                break
            b = buff[-(i+1)]
            pos = b[3]
            features['transition=%d,b%d.pos=%s' % (tType, i, pos)] = 1

        # Previous transition type
        if len(previous_transitions) > 0:
            prev = previous_transitions[-1]
            features['transition=%d,prev_transition=%d' % (tType, prev)] = 1
        else:
            features['transition=%d,prev_transition=None' % (tType)] = 1

        # Bias feature
        features['transition=%d' % (transition.transitionType)] = 1

        if self.labeled:
            # Action and label pair
            features['transition=%d,label=%s' % (transition.transitionType, transition.label)] = 1
            # Label bias
            features['label=%s' % (transition.label)] = 1

        #Features based on http://dl.acm.org/citation.cfm?id=2002777
        #Distance function
        if len(stack) > 0 and len(buff) > 0:
            dist = h.get_id(stack[-1]) - h.get_id(buff[-1])
            if dist < 0:
                features['transition=%d,neg_dist=' % (tType)] = dist
            else:
                features['transition=%d,pos_dist=' % (tType)] = dist

        #Valency function
        if len(stack) > 1:
            if tType == Transition.LeftArc: # Left Arc
                [left_valency, right_valency] = self.get_valency(arcs, h.get_id(stack[-1]))
                features['transition=%d,head_left_valency=%d' % (tType, left_valency)] = 1
                features['transition=%d,head_right_valency=%d' % (tType, right_valency)] = 1
            elif tType == Transition.RightArc:#should probably check for right arc here!
                [left_valency, right_valency] = self.get_valency(arcs, h.get_id(stack[-2]))
                features['transition=%d,head_left_valency=%d' % (tType, left_valency)] = 1
                features['transition=%d,head_right_valency=%d' % (tType, right_valency)] = 1
        return features
Example #39
0
def test_helper():
    df = pd.DataFrame([[1, 2, 3], [2, 2, 3], [3, 3, 3]])
    help = Helper()
    assert type(help) == pd.Dataframe
Example #40
0
 def is_removable(self, word, arcs, leftmost_child, rightmost_child):
     if ( (leftmost_child != p.INFINITY and not arcs.get(leftmost_child, False)) or (rightmost_child != -1 and not arcs.get(rightmost_child, False)) ):#CHeck if thie value is correct
         return(False)
     if ( p.is_root(word) ):
         return(False)
     return True
Example #41
0
    def extract_features(self, transition, stack, buff, labels,
                         previous_transitions, arcs, input_sentence):
        features = defaultdict(float)
        tType = transition.transitionType
        label = transition.label

        #Model7 features as described in http://stp.lingfil.uu.se/~nivre/docs/maltparser.pdf

        feat1_model7 = self.add_model7_feat(features, stack, buff,
                                            input_sentence, arcs, labels,
                                            tType, self.DEP_FEAT,
                                            self.STACK_SOURCE,
                                            1)  #dep for pre-top
        feat2_model7 = self.add_model7_feat(features, stack, buff,
                                            input_sentence, arcs, labels,
                                            tType, self.DEP_FEAT,
                                            self.STACK_SOURCE, 1, 0, 0,
                                            -1)  #dep for pre-top's lmc
        feat21_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.POS_FEAT,
                                             self.STACK_SOURCE, 1, 0, 0,
                                             -1)  #pos for pre-top's lmc
        feat3_model7 = self.add_model7_feat(features, stack, buff,
                                            input_sentence, arcs, labels,
                                            tType, self.DEP_FEAT,
                                            self.STACK_SOURCE, 1, 0, 0,
                                            1)  #dep or pre-top's rmc
        feat31_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.POS_FEAT,
                                             self.STACK_SOURCE, 1, 0, 0,
                                             1)  #pos or pre-top's rmc
        feat4_model7 = self.add_model7_feat(features, stack, buff,
                                            input_sentence, arcs, labels,
                                            tType, self.DEP_FEAT,
                                            self.STACK_SOURCE, 0, 0, 0,
                                            -1)  #dep for top's lmc
        feat41_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.POS_FEAT,
                                             self.STACK_SOURCE, 0, 0, 0,
                                             -1)  #pos for top's lmc
        feat5_model7 = self.add_model7_feat(features, stack, buff,
                                            input_sentence, arcs, labels,
                                            tType, self.LEX_FEAT,
                                            self.STACK_SOURCE,
                                            1)  #lex for pre-top
        feat6_model7 = self.add_model7_feat(features, stack, buff,
                                            input_sentence, arcs, labels,
                                            tType, self.LEX_FEAT,
                                            self.STACK_SOURCE, 0)  #lex for top

        feat7_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.LEX_FEAT, self.BUFF_SOURCE)  #lex for next buffer item
        feat71_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.BUFF_SOURCE)  #pos for next buffer item

        feat75_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.LEX_FEAT,
                                             self.BUFF_SOURCE,
                                             1)  #lex for next-next buffer item
        feat76_model7 = self.add_model7_feat(features, stack, buff,
                                             input_sentence, arcs, labels,
                                             tType, self.POS_FEAT,
                                             self.BUFF_SOURCE,
                                             1)  #pos for next-next buffer item

        feat8_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.LEX_FEAT, self.STACK_SOURCE, 1,
            1)  #lex for word after pre-top in input
        feat9_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.LEX_FEAT, self.STACK_SOURCE, 1,
            -1)  #lex for word before pre-top in input

        feat10_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.STACK_SOURCE, 1,
            1)  #pos for word after pre-top in input
        feat11_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.STACK_SOURCE, 1,
            -1)  #pos for word before pre-top in input

        feat12_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.DEP_FEAT, self.STACK_SOURCE, 1,
            1)  #dep for word after pre-top in input
        feat13_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.DEP_FEAT, self.STACK_SOURCE, 1,
            -1)  #dep for word before pre-top in input

        feat14_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.LEX_FEAT, self.STACK_SOURCE, 0,
            1)  #lex for word after top in input
        feat15_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.LEX_FEAT, self.STACK_SOURCE, 0,
            -1)  #lex for word before top in input

        feat16_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.STACK_SOURCE, 0,
            1)  #pos for word after top in input
        feat17_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.POS_FEAT, self.STACK_SOURCE, 0,
            -1)  #pos for word before top in input

        feat18_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.DEP_FEAT, self.STACK_SOURCE, 0,
            1)  #dep for word after top in input
        feat19_model7 = self.add_model7_feat(
            features, stack, buff, input_sentence, arcs, labels, tType,
            self.DEP_FEAT, self.STACK_SOURCE, 0,
            -1)  #dep for word before top in input

        pre_top_pos = self.get_model7_params(stack, buff, input_sentence, arcs,
                                             labels, tType, self.POS_FEAT,
                                             self.STACK_SOURCE, 1)
        top_pos = self.get_model7_params(stack, buff, input_sentence, arcs,
                                         labels, tType, self.POS_FEAT,
                                         self.STACK_SOURCE, 0)
        cfeat_1 = self.compose_feats(
            features, [feat5_model7, pre_top_pos])  #lex_pos of pre-top
        cfeat_2 = self.compose_feats(features,
                                     [feat6_model7, top_pos])  #lex_pos for top

        cfeat_3 = self.compose_feats(
            features,
            [feat7_model7, feat71_model7])  #lex_pos for next buffer item
        cfeat_4 = self.compose_feats(
            features,
            [feat75_model7, feat76_model7])  #lex_pos for next-next buffer item

        cfeat_5 = self.compose_feats(
            features, [cfeat_1, cfeat_2])  #lex_pos for both pre-top and top
        cfeat_6 = self.compose_feats(
            features,
            [cfeat_1, feat6_model7])  #lex_pos of pre-top with lex of top
        cfeat_7 = self.compose_feats(
            features,
            [feat5_model7, cfeat_2])  #lex of pre-top with lex_pos of top
        cfeat_8 = self.compose_feats(
            features, [cfeat_1, top_pos])  #lex_pos of pre-top with pos of top
        cfeat_9 = self.compose_feats(
            features,
            [pre_top_pos, cfeat_2])  #pos of pre-top with lex_pos of top
        cfeat_10 = self.compose_feats(
            features,
            [feat5_model7, feat6_model7])  #lex of both pre_top and top
        cfeat_11 = self.compose_feats(
            features, [pre_top_pos, top_pos])  #pos of both pre_top and top
        cfeat_12 = self.compose_feats(
            features, [top_pos, feat71_model7])  #pos of top and next buff
        cfeat_13 = self.compose_feats(features,
                                      [top_pos, feat71_model7, feat76_model7
                                       ])  #pos for top next and next next
        cfeat_14 = self.compose_feats(features,
                                      [pre_top_pos, top_pos, feat71_model7
                                       ])  #pos for pre-top, top and next
        cfeat_15 = self.compose_feats(features,
                                      [pre_top_pos, feat21_model7, top_pos
                                       ])  #pos for pre-top pre top lmc and top
        cfeat_16 = self.compose_feats(
            features, [pre_top_pos, feat31_model7, top_pos
                       ])  #pos for pre-top, pre-top rmc and top
        cfeat_17 = self.compose_feats(features,
                                      [pre_top_pos, top_pos, feat41_model7
                                       ])  #pos for pre-top, top and top's lmc

        # Top two POS tags from the stack
        for i in range(3):  #was originally 2
            if i >= len(stack):
                break
            s = stack[-(i + 1)]
            pos = s[3]
            features['transition=%d,s%d.pos=%s' % (tType, i, pos)] = 1

        # Next four POS tags from the buffer
        for i in range(3):
            if i >= len(buff):
                break
            b = buff[-(i + 1)]
            pos = b[3]
            features['transition=%d,b%d.pos=%s' % (tType, i, pos)] = 1

        # Previous transition type
        if len(previous_transitions) > 0:
            prev = previous_transitions[-1]
            features['transition=%d,prev_transition=%d' % (tType, prev)] = 1
        else:
            features['transition=%d,prev_transition=None' % (tType)] = 1

        # Bias feature
        features['transition=%d' % (transition.transitionType)] = 1

        if self.labeled:
            # Action and label pair
            features['transition=%d,label=%s' %
                     (transition.transitionType, transition.label)] = 1
            # Label bias
            features['label=%s' % (transition.label)] = 1

        #Features based on http://dl.acm.org/citation.cfm?id=2002777
        #Distance function
        if len(stack) > 0 and len(buff) > 0:
            dist = h.get_id(stack[-1]) - h.get_id(buff[-1])
            if dist < 0:
                features['transition=%d,neg_dist=' % (tType)] = dist
            else:
                features['transition=%d,pos_dist=' % (tType)] = dist

        #Valency function
        if len(stack) > 1:
            if tType == Transition.LeftArc:  # Left Arc
                [left_valency,
                 right_valency] = self.get_valency(arcs, h.get_id(stack[-1]))
                features['transition=%d,head_left_valency=%d' %
                         (tType, left_valency)] = 1
                features['transition=%d,head_right_valency=%d' %
                         (tType, right_valency)] = 1
            elif tType == Transition.RightArc:  #should probably check for right arc here!
                [left_valency,
                 right_valency] = self.get_valency(arcs, h.get_id(stack[-2]))
                features['transition=%d,head_left_valency=%d' %
                         (tType, left_valency)] = 1
                features['transition=%d,head_right_valency=%d' %
                         (tType, right_valency)] = 1
        return features