def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: # TODO: Write the body of this loop for part 4 input_vec = self.extractor.get_input_representation( words, pos, state) output_vec = self.model.predict(input_vec.reshape((1, 6)))[0] sortedIdx_by_possibility = np.argsort(output_vec)[::-1] permitted_idx = 0 permitted_action, rel = self.output_labels[ sortedIdx_by_possibility[permitted_idx]] while (len(state.stack) == 0 and permitted_action in {'left_arc','right_arc'}) \ or (len(state.buffer) == 1 and permitted_action=='shift' and len(state.stack) > 0) \ or (len(state.stack) > 0 and state.stack[-1] == 0 and permitted_action == 'left_arc'): permitted_idx += 1 permitted_action, rel = self.output_labels[ sortedIdx_by_possibility[permitted_idx]] if permitted_action == 'shift': state.shift() elif permitted_action == 'left_arc': state.left_arc(rel) elif permitted_action == 'right_arc': state.right_arc(rel) result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1,len(words))) state.stack.append(0) while state.buffer: # TODO: Write the body of this loop for part 4 vecc = self.extractor.get_input_representation(words, pos, state) possible = list(self.model.predict(vecc)[0]) #informed by "https://stackoverflow.com/questions/4233476/sort-a-list-by-multiple-attributes" sorted_list = [j[0] for j in sorted(enumerate(possible), reverse=True, key=lambda x:x[1])] i=0 t=self.output_labels[sorted_list[i]][0] while ((len(state.stack) == 0 and t in {"right_arc", "left_arc"}) or (len(state.stack) > 0 and len(state.buffer) == 1 and t == "shift") or (len(state.stack) > 0 and state.stack[-1] == 0 and t == "left_arc")): i+=1 t=self.output_labels[sorted_list[i]][0] #retreives dependency structure if self.output_labels[sorted_list[i]][1] == None: state.shift() else: if self.output_labels[sorted_list[i]][0] == "left_arc": state.left_arc(self.output_labels[sorted_list[i]][1]) elif self.output_labels[sorted_list[i]][0] == "right_arc": state.right_arc(self.output_labels[sorted_list[i]][1]) result = DependencyStructure() for p,c,r in state.deps: result.add_deprel(DependencyEdge(c,words[c],pos[c],p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) forbit_count = 0 while state.buffer: input = self.extractor.get_input_representation(words, pos, state) transition_vec = self.model.predict(np.asmatrix(input)).tolist()[0] temp = [[i, v] for i, v in enumerate(transition_vec)] sort_temp = sorted(temp, key=lambda x: x[1], reverse=True) forbid = set() if len(state.stack) == 0: forbid.add('left_arc') forbid.add('right_arc') if len(state.buffer) == 1 and len(state.stack) != 0: forbid.add('shift') if len(state.stack) > 0 and state.stack[-1] == 0: forbid.add('left_arc') for idx, _ in sort_temp: transition = self.output_labels[idx] if transition[0] not in forbid: break forbit_count += 1 if transition[0] == 'shift': state.shift() elif transition[0] == 'left_arc': state.left_arc(transition[1]) elif transition[0] == 'right_arc': state.right_arc(transition[1]) else: print('error at state transition') result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: input_vec = self.extractor.get_input_representation( words, pos, state) input_vec = input_vec.reshape((1, 6)) possible_actions = self.model.predict(input_vec)[0].tolist() sorted_actions = [i[0] for i in sorted(enumerate(possible_actions), \ reverse=True, key=lambda x:x[1])] i = 0 while not self.check_action( self.output_labels[sorted_actions[i]][0], state): i += 1 action, dep_rel = self.output_labels[sorted_actions[i]] if not dep_rel: state.shift() elif action == "left_arc": state.left_arc(dep_rel) elif action == "right_arc": state.right_arc(dep_rel) result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1,len(words))) state.stack.append(0) while state.buffer: # TODO: Write the body of this loop for part 4 # TODO when the buffer is empty input_rep = self.extractor.get_input_representation(words, pos, state) output_rep = self.model.predict(np.array( [input_rep] )) legal_moves = list(filter(isLegal(state), np.arange(NUM_CLASSES))) sorted_transition_index = reversed(np.argsort(output_rep)[0]) legal_transition_index = list(filter(contains(legal_moves), sorted_transition_index)) transition_index = legal_transition_index[0] (operator, label) = toTransition(transition_index) if operator == "left_arc": state.left_arc(label) if operator == "right_arc": state.right_arc(label) if operator == "shift": state.shift() result = DependencyStructure() for p,c,r in state.deps: result.add_deprel(DependencyEdge(c,words[c],pos[c],p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: inp = self.extractor.get_input_representation(words, pos, state) output = self.model.predict(inp.reshape(1, -1)) """ Arc-left or Arc-right are not permitted the stack is empty. Shifting the only word out of the buffer is also illegal, unless the stack is empty. Finally, the root node must never be the target of a left-arc. """ for idx in output[0].argsort()[::-1]: transition, label = self.output_labels[idx] if transition == 'shift': if len(state.buffer) == 1 and len(state.stack) > 0: continue else: if len(state.stack) == 0: continue if transition == 'left_arc' and state.stack[-1] == 0: continue break if transition == 'shift': state.shift() elif transition == 'left_arc': state.left_arc(label) else: state.right_arc(label) result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: # TODO: Write the body of this loop for part 4 features = self.extractor.get_input_representation( words, pos, state).reshape((1, -1)) ans = self.model.predict(features)[0] indexes = np.argsort(ans)[::-1][:20] for i in indexes: arc, dep = self.output_labels[i] if arc == 'shift': # shift only word out of buffer is illegal, unless stack empty if not state.stack: state.shift() break elif len(state.buffer) > 1: state.shift() break elif arc == 'left_arc': # arcs not permitted when stack empty if state.stack and state.stack[ -1] != 0: # root node can be target of left arc state.left_arc(dep) break elif arc == 'right_arc': if state.stack: state.right_arc(dep) break result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) # print(words) # print(pos) # print(state) # sys.exit() it = 0 while state.buffer: # TODO: Write the body of this loop for part 4 # Use extractor object to get input representation input = self.extractor.get_input_representation(words, pos, state) # IMPORTANT: Reshape to (1, len(input)) or (1,-1) # Otherwise, an error will be yielded # 1, in this case, is the batch_size (we feed the input one by one) # input.resize(1, len(input)) # Get the prediction from the neural net softmax_output = self.model.predict(input.reshape(1, -1)) # IMPORTANT: softmax_output is a 2d array of shape (1, 91). # Thus, to get list of probabilities, take its first element # Sort the actions using subfunction sorted_actions = self.sort_output(softmax_output[0]) # Loop over actions, starting with the one with highest probability for i in range(0, len(sorted_actions)): action = sorted_actions[i][0] # Execute the action only if it's valid given the current state # Otherwise, try the next possible action # Check using subfunction if self.is_action_permitted(action, state): # if i > -1 and i < 10: # print('i = ' + str(i)) # Execute the action accordingly # print(action[0]) # print(it) if action[0] == 'shift': state.shift() # For left and right arc, supply the relationship type elif action[0] == 'left_arc': state.left_arc(action[1]) elif action[0] == 'right_arc': state.right_arc(action[1]) # IMPORTANT: Don't forget to break the loop so that # only one action is executed. The next action # will be determined again by the neural net break it = it + 1 result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: pass # TODO: Write the body of this loop for part 4 feature = self.extractor.get_input_representation( words, pos, state) feature = feature.reshape(1, feature.shape[0]) score = self.model.predict(feature) actions = {} for i in range(len(score[0])): if score[0][i] > 0: actions.update({i: score[0][i]}) sort_actions = { k: v for k, v in sorted( actions.items(), key=lambda item: item[1], reverse=True) } for a in sort_actions: real_actions = self.output_labels[a] if ('arc' in real_actions[0] and len(state.stack) == 0): continue elif (real_actions[0] == 'shift' and len(state.buffer) == 1 and len(state.stack) > 0): continue elif (real_actions[0] == 'left_arc' and state.stack[-1] == 0): continue else: if (real_actions[0] == 'shift'): state.shift() break elif (real_actions[0] == 'left_arc'): state.left_arc(real_actions[1]) break elif (real_actions[0] == 'right_arc'): state.right_arc(real_actions[1]) break else: print("something is wroooooooong") break result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: # pass # TODO: Write the body of this loop for part 4 # step 1: features = self.extractor.get_input_representation( words, pos, state) possible_actions = self.model.predict(features.reshape([1, 6])) possible_actions = possible_actions.reshape(91) # step 2: select the highest scoring permitted transition # create a possible action indices list sorted by their possibility(largest one comes first) # sorted_actions_indices = np.flipud(np.argsort(possible_actions)) sorted_actions_indices = np.flipud(np.argsort(possible_actions)) # going through and find the highest scoring permitted trasition for i in sorted_actions_indices: flag = False # check the current transition whether permitted or not if self.output_labels[i][0] == "shift": if state.stack and len(state.buffer) == 1: flag = False else: flag = True elif self.output_labels[i][0] == "left_arc": if not state.stack: flag = False elif state.stack[-1] == 0: flag = False else: flag = True elif self.output_labels[i][0] == "right_arc": if not state.stack: flag = False else: flag = True # when flag == True, it states that the cuurent transition is permitted if flag == True: transition = self.output_labels[i] # update the state accordingly if transition[0] == "shift": state.shift() elif transition[0] == "left_arc": state.left_arc(transition[1]) elif transition[0] == "right_arc": state.right_arc(transition[1]) break result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: pass # TODO: Write the body of this loop for part 4 result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): # The function first creates a State instance in the initial state, i.e. only word 0 is on the stack, the # buffer contains all input words (or rather, their indices) and the deps structure is empty state = State(range(1, len(words))) state.stack.append(0) # The algorithm is the standard transition-based algorithm. As long as the buffer is not empty, we # use the feature extractor to obtain a representation of the current state while state.buffer: # TODO: Write the body of this loop for part 4 transitions = [] if not state.stack: transitions.append('shift') elif state.stack[-1] == 0: transitions.append('right_arc') if len(state.buffer) > 1: transitions.append('shift') else: transitions.append('right_arc') transitions.append('left_arc') if len(state.buffer) > 1: transitions.append('shift') #print(transitions) # As long as the buffer is not empty, # use the feature extractor to obtain a representation of the current state input_rep = self.extractor.get_input_representation( words, pos, state).reshape((1, 6)) # print(input_rep) # Call model.predict(features) and retrieve a softmax actived vector of possible actions predict = self.model.predict(input_rep)[0] #print(predict) index = list(np.argsort(predict)[::-1]) #print(index) for i in index: action, label = self.output_labels[i] if action in transitions: if action == 'shift': state.shift() elif action == 'left_arc': state.left_arc(label) else: state.right_arc(label) break result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1,len(words))) state.stack.append(0) while state.buffer: self.input_representation = self.extractor.get_input_representation(words,pos,state) self.input_representation = np.reshape(self.input_representation,(-1,6)) self.prediction = self.model.predict(self.input_representation) self.prediction_list = self.prediction[0].tolist() self.output_proabilities = [] for i in range(0,len(self.prediction_list)): self.output_proabilities.append((self.prediction_list[i],i)) self.output_proabilities.sort(reverse = True, key = lambda x: x[0]) # Check the rules for actions for item in self.output_proabilities: self.predicted_action = self.output_labels.get(item[1])[0] # arc-left or arc-right are not permitted if the stack is empty. # Finally, the root node must never be the target of a left-arc. if self.predicted_action == 'left_arc': if len(state.stack) == 0 or state.stack[-1] == 0: continue else: state.left_arc(self.output_labels.get(item[1])[1]) break elif self.predicted_action == 'right_arc': if len(state.stack) == 0: continue else: state.right_arc(self.output_labels.get(item[1])[1]) break # Shifting the only word out of the buffer is also illegal, unless the stack is empty. elif self.predicted_action == 'shift': if len(state.buffer) == 1: if len(state.stack) == 0: state.shift() break else: continue else: state.shift() break result = DependencyStructure() for p,c,r in state.deps: result.add_deprel(DependencyEdge(c,words[c],pos[c],p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: features = self.extractor.get_input_representation( words, pos, state) features = features.reshape(1, 6) possible_transitions = {} predict_probs = self.model.predict(features)[0] for i, prob in enumerate(predict_probs): if prob > 0.000000000e+00: possible_transitions[i] = prob possible_transitions = sorted(possible_transitions.items(), key=lambda kv: -kv[1]) # print(possible_transitions) for transitionid, prob in possible_transitions: action = self.uniquepairs[transitionid][0] deprel = self.uniquepairs[transitionid][1] if action == 'shift': if len(state.buffer) == 1 and not state.stack: state.shift() break elif len(state.buffer) > 1: state.shift() break elif len(possible_transitions) == 1: state.shift() break elif action == 'left_arc': if state.stack and state.stack[-1] != 0: state.left_arc(deprel) break else: if state.stack: state.right_arc(deprel) break # print(state.stack, state.buffer) # TODO: Write the body of this loop for part 4 result = DependencyStructure() # print(state.deps) for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1,len(words))) state.stack.append(0) dep_relations = ['tmod', 'vmod', 'csubjpass', 'rcmod', 'ccomp', 'poss', 'parataxis', 'appos', 'dep', 'iobj', 'pobj', 'mwe', 'quantmod', 'acomp', 'number', 'csubj', 'root', 'auxpass', 'prep', 'mark', 'expl', 'cc', 'npadvmod', 'prt', 'nsubj', 'advmod', 'conj', 'advcl', 'punct', 'aux', 'pcomp', 'discourse', 'nsubjpass', 'predet', 'cop', 'possessive', 'nn', 'xcomp', 'preconj', 'num', 'amod', 'dobj', 'neg','dt','det'] while state.buffer: #pass # TODO: Write the body of this loop for part 4 features = self.extractor.get_input_representation(words, pos, state) soft_acts = self.model.predict(features.reshape(-1,6)) sort_ind = [i[0] for i in sorted(enumerate(soft_acts[0]), key=lambda x:x[1], reverse = True)] for i in sort_ind: if i == 90: if len(state.buffer) > 1: state.shift() break elif state.stack == []: state.shift() break else: continue elif i >= 45 and i < 90: if state.stack == []: continue else: state.right_arc(self.output_labels[i-45]) break else: if state.stack == []: continue elif len(state.stack) == 1: continue else: state.left_arc(self.output_labels[i]) break result = DependencyStructure() for p,c,r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c],p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: # TODO: Write the body of this loop for part 4 features = self.extractor.get_input_representation( words, pos, state).reshape((1, 6)) sorted_output = self.model.predict(features)[0].argsort()[::-1] for i in sorted_output: transition, label = self.output_labels[i] if transition == 'shift': if not state.stack: state.shift() elif len(state.buffer) > 1: state.shift() else: continue elif transition == 'left_arc': if not state.stack: continue elif state.stack[-1] == 0: continue else: state.left_arc(label) elif transition == 'right_arc': if not state.stack: continue else: state.right_arc(label) else: raise ValueError('transition error') break result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) # build parser while state.buffer: features = self.extractor.get_input_representation( words, pos, state) a = self.model.predict(features.reshape((-1, 6))) indices = np.argsort(a) for i in indices: for j in reversed(i): high = self.output_labels.get(j) if high[0] == 'shift': if len(state.buffer) == 1 and len(state.stack) > 0: continue else: state.shift() break elif high[0] == 'left_arc': if len(state.stack) == 0: continue elif high[1] == 'root': continue else: state.left_arc(high[1]) break elif high[0] == 'right_arc': if len(state.stack) == 0: continue else: state.right_arc(high[1]) break result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: pass # TODO: Write the body of this loop for part 4 single_vector = self.extractor.get_input_representation( words, pos, state) possible_actions = list(self.model.predict(single_vector)[0]) #Reference taken from https://docs.python.org/3/howto/sorting.html sorted_actions = [ i[0] for i in sorted(enumerate(possible_actions), reverse=True, key=lambda other: other[1]) ] j = 0 transition = self.output_labels[sorted_actions[j]][0] while ((len(state.stack) == 0 and transition in {"right_arc", "left_arc"}) or (len(state.stack) > 0 and len(state.buffer) == 1 and transition == "shift") or (len(state.stack) > 0 and state.stack[-1] == 0 and transition == "left_arc")): j += 1 transition = self.output_labels[sorted_actions[j]][0] if self.output_labels[sorted_actions[j]][1] == None: state.shift() else: if self.output_labels[sorted_actions[j]][0] == "left_arc": state.left_arc(self.output_labels[sorted_actions[j]][1]) elif self.output_labels[sorted_actions[j]][0] == "right_arc": state.right_arc(self.output_labels[sorted_actions[j]][1]) result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: # pass # TODO: Write the body of this loop for part 4 feature = self.extractor.get_input_representation( words, pos, state) actions = self.model.predict(np.vstack([feature])) lst = list( zip(actions.tolist()[0], range(len(actions.tolist()[0])))) lst.sort(reverse=True) label = None for probability, i in lst: label = self.output_labels[i] b1 = (label[0] == 'left_arc' or label[0] == 'right_arc') and not state.stack b2 = label[0] == 'shift' and len( state.buffer) == 1 and state.stack b3 = state.stack and state.stack[-1] == 0 and label[ 0] == 'left_arc' if b1 or b2 or b3: continue break if label[0] == "shift": state.shift() elif label[0] == "left_arc": state.left_arc(label[1]) elif label[0] == "right_arc": state.right_arc(label[1]) result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) # print(state) while state.buffer: features = self.extractor.get_input_representation( words, pos, state) possible_actions = self.model.predict(np.vstack([features])) probs = [(idx, prob) for idx, prob in enumerate(possible_actions[0])] # Sort it according to the output probabilities sorted_probs = sorted(probs, key=lambda x: x[1], reverse=True) # print(sorted_probs) for sp in sorted_probs: output_label = self.output_labels[sp[0]] transition, label = output_label if transition == 'shift': # Check illegal case 1: Shifting the only word out of the buffer is illegal, unless the stack is empty if len(state.buffer) == 1 and state.stack != []: continue state.shift() break # Check illegal case 2: arc-left is not permitted if the stack is empty # Check illegal case 3: the root node must never be the target of a left-arc elif transition == 'left_arc' and state.stack != [] and state.stack[ -1] != 0: state.left_arc(label) break # Check illegal case 2: arc-right is not permitted if the stack is empty elif transition == 'right_arc' and state.stack != []: state.right_arc(label) break result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: feature = self.extractor.get_input_representation( words, pos, state) np.reshape(feature, (6, 1)) pre = self.model.predict( np.array([ self.extractor.get_input_representation(words, pos, state), ])) list1 = np.flipud(np.argsort(pre)[0]) for i in list1: act = self.output_labels[i] rel, label = act if rel == "shift": if len(state.buffer) == 1 and len(state.stack) > 0: continue else: state.shift() break elif rel == "left_arc": if len(state.stack) == 0 or state.stack[-1] == 0: continue else: state.left_arc(label) break elif rel == "right_arc": if len(state.stack) == 0: continue else: state.right_arc(label) break result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: feature = self.extractor.get_input_representation( words, pos, state) pred = self.model.predict(np.array([ feature, ])) indices = np.argsort(-pred, axis=1)[0] for i in indices: key, val = self.output_labels[i] if key == 'shift': if len(state.buffer) == 1: if len(state.stack) == 0: state.shift() break elif len(state.buffer) > 1: state.shift() break elif key == 'left_arc': if len(state.stack) > 0: if val != 'root': state.left_arc(val) break elif key == 'right_arc': if len(state.stack) != 0: state.right_arc(val) break # TODO: Write the body of this loop for part 4 result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: data = self.extractor.get_input_representation(words, pos, state) data = data.reshape(1, 6) vector = self.model.predict(data) vector = vector[0] if len(state.stack) == 0: state.shift() continue elif len(state.buffer) <= 1: vector[0] = 0.0 arc_index = 0 for i in range(0, len(vector)): if state.stack[-1] == 0 and self.output_labels[i][ 0] == "left_arc": vector[i] = 0.0 continue if vector[i] > vector[arc_index]: arc_index = i pair = self.output_labels[arc_index] if pair[0] == "left_arc": state.left_arc(pair[1]) elif pair[0] == "shift": state.shift() else: state.right_arc(pair[1]) # TODO: Write the body of this loop for part 4 result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) #print('words ', words) #print('pos ', pos) while state.buffer: pass # TODO: Write the body of this loop for part 4 # deps format 19.95 _ CD CD _ 12 num _ _ #first feature extractor get the representation of current states cur_input = self.extractor.get_input_representation( words, pos, state) #print('cur_input reshape ', cur_input.reshape(1,-1)) output = self.model.predict(cur_input.reshape(1, -1)) #Index in descending order sorted_index = np.argsort(output[0])[::-1] #print('output ', output) #print('sorted_index ', sorted_index) #check the illigal for high_index in sorted_index: #print('high_index ',high_index) #check the output label to find the tranisition action action = self.output_labels[high_index] #print('action ', action) #check the validation of the action #if action works, update states, break the for loop #otherwise continue the loop #print('action[0] ',action[0]) if action[0] in ['right_arc', 'left_arc']: relation = action[1] #print('state.stack ',state.stack) if len(state.stack) != 0: #form the dep [(parent, child, relation)] #(6, 8, 'dobj') if action[0] == 'left_arc': parent = state.buffer[-1] child = state.stack[-1] #check target is not root #print('check target is not root: child =', child) if child == 0: continue state.stack.pop(-1) else: parent = state.stack[-1] child = state.buffer[-1] state.deps.add((parent, child, relation)) state.buffer.pop(-1) state.buffer.append(state.stack.pop(-1)) dep = (parent, child, relation) #print('new dep: ', dep) state.deps.add((parent, child, relation)) break else: if (len(state.stack) != 0) and (len(state.buffer) == 1): #print('shifting when stack is empty') #print('len(state.stack): ', len(state.stack)) #print('len(state.buffer): ',len(state.buffer)) continue else: #state.deps.add('None') state.stack.append(state.buffer.pop(-1)) break #print('state.deps ',state.deps) result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) while state.buffer: pass # TODO: Write the body of this loop for part 4 dep_relations = [ 'tmod', 'vmod', 'csubjpass', 'rcmod', 'ccomp', 'poss', 'parataxis', 'appos', 'dep', 'iobj', 'pobj', 'mwe', 'quantmod', 'acomp', 'number', 'csubj', 'root', 'auxpass', 'prep', 'mark', 'expl', 'cc', 'npadvmod', 'prt', 'nsubj', 'advmod', 'conj', 'advcl', 'punct', 'aux', 'pcomp', 'discourse', 'nsubjpass', 'predet', 'cop', 'possessive', 'nn', 'xcomp', 'preconj', 'num', 'amod', 'dobj', 'neg', 'dt', 'det' ] output_labels_2 = {} output_labels_2[0] = ('shift', None) count3 = 1 for dep in dep_relations: output_labels_2[count3] = ('left_arc', dep) count3 = count3 + 1 count4 = 46 for dep in dep_relations: output_labels_2[count4] = ('right_arc', dep) count4 = count4 + 1 # citation: https://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html in_rep = np.reshape( self.extractor.get_input_representation(words, pos, state), (-1, 6)) probs = self.model.predict(in_rep) # citation: https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.flatten.html probs = probs.flatten() count = 0 probs_new = [] for prob in probs: probs_new.append((prob, count)) count = count + 1 probs_new.sort() count2 = -1 proceed = True while proceed: highest_prob = probs_new[count2][1] if len(state.stack) == 0 and ( output_labels_2[highest_prob][0] == 'left_arc' or output_labels_2[highest_prob][0] == 'right_arc'): count2 = count2 - 1 elif len( state.stack ) == 1 and output_labels_2[highest_prob][0] == 'left_arc': count2 = count2 - 1 elif len(state.stack) > 0 and len( state.buffer ) == 1 and output_labels_2[highest_prob][0] == 'shift': count2 = count2 - 1 elif output_labels_2[highest_prob][ 0] == 'left_arc' and highest_prob > 45: count2 = count2 - 1 elif output_labels_2[highest_prob][ 0] == 'right_arc' and highest_prob < 46: count2 = count2 - 1 elif output_labels_2[highest_prob][ 0] == 'shift' and highest_prob > 0: count2 = count2 - 1 else: if output_labels_2[highest_prob][0] == 'left_arc': state.left_arc(output_labels_2[highest_prob][1]) proceed = False elif output_labels_2[highest_prob][0] == 'right_arc': state.right_arc(output_labels_2[highest_prob][1]) proceed = False elif output_labels_2[highest_prob][0] == 'shift': state.shift() proceed = False result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) count = 0 # print('sentence is:') # print (words) while state.buffer: nn_input = self.extractor.get_input_representation( words, pos, state) out = self.model.predict(np.expand_dims(nn_input, axis=0)) # print('Prediction:') output = out[0] # print ('buffer is:') # print(state.buffer) # print('stack is:') # print(state.stack) # count +=1 # if count ==1: # break # time.sleep(2) pairs = [] for i in range(0, len(output)): pairs.append((self.output_labels[i], output[i])) pairs.sort(key=lambda tup: tup[1], reverse=True) # print (pairs) for i in range(0, len(pairs)): if pairs[i][0][0] == 'shift': # print ('enter shift') if len(state.buffer) == 1 and len(state.stack) == 0: state.stack.append(state.buffer[-1]) state.buffer = state.buffer[:-1] break elif len(state.buffer) == 1 and len(state.stack) > 0: continue else: state.stack.append(state.buffer[-1]) state.buffer = state.buffer[:-1] break elif pairs[i][0][0] == 'left_arc': # print ('enter left arc') if len(state.stack) == 0 or len(state.buffer) == 0: continue target = state.stack[-1] head = state.buffer[-1] if target == 0: continue else: state.deps.add((head, target, pairs[i][0][-1])) state.stack = state.stack[:-1] break elif pairs[i][0][0] == 'right_arc': # print ('enter right arc') if len(state.stack) == 0 or len(state.buffer) == 0: continue target = state.buffer[-1] head = state.stack[-1] if head == 0 and len(state.buffer) > 1: continue if target == 0: continue else: state.deps.add((head, target, pairs[i][0][-1])) state.buffer = state.buffer[:-1] state.buffer.append(state.stack[-1]) state.stack = state.stack[:-1] break # pass # TODO: Write the body of this loop for part 4 # print ('') result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) pos[0] = '<ROOT>' words[0] = '<ROOT>' words.append('<NULL>') pos.append('<NULL>') pos = [i if "<" in i else "<" + i + ">" for i in pos] while state.buffer: n = 3 - len(state.stack) if n > 0: s1 = state.stack[::-1] + n * [len(words) - 1] else: s1 = state.stack[-3:][::-1] n = 3 - len(state.buffer) if n > 0: s2 = state.buffer[::-1] + n * [len(words) - 1] else: s2 = state.buffer[-3:][::-1] s = s1 + s2 s = [ words[i] if words[i] in self.extractor.word_vocab else pos[i] for i in s ] s = [ self.extractor.word_vocab[i] if i in self.extractor.word_vocab else 2 for i in s ] pred = self.model.predict(np.reshape(s, (1, 6)))[0] action = np.argmax(pred) if len(state.stack) == 0: action = 0 if len(state.buffer) == 1 and len(state.stack) > 0 and action == 0: pred[0] = 0 action = np.argmax(pred) if s[0] == 3: pred = [ pred[i] if not 'left' in self.output_labels[i][0] else 0 for i in range(91) ] action = np.argmax(pred) action = self.output_labels[action] if action[0] == 'shift': state.stack.append(state.buffer[-1]) del state.buffer[-1] elif action[0] == 'right_arc': state.deps.add((state.stack[-1], state.buffer[-1], action[1])) if action[1] == 'root': del state.stack[-1] del state.buffer[-1] else: state.buffer[-1] = state.stack[-1] del state.stack[-1] else: state.deps.add((state.buffer[-1], state.stack[-1], action[1])) del state.stack[-1] result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1,len(words))) state.stack.append(0) while state.buffer: arr = np.reshape(self.extractor.get_input_representation(words,pos,state),(-1,6)) trans = self.model.predict(arr) trans = trans.flatten() trans_sorted = np.sort(trans) k = 1 max_prob = trans_sorted[-k] ind = np.where(trans == max_prob) transition = self.output_labels[ind[0][0]] boo = 'Not done' while boo == 'Not done': if len(state.stack) > 0 and transition[0] == 'left_arc': state.left_arc(transition[1]) boo = 'Done' elif len(state.stack) > 0 and transition[0] == 'right_arc': state.right_arc(transition[1]) boo = 'Done' elif len(state.stack) == 0 and transition[0] == 'shift' and len(state.buffer) > 0: state.shift() boo = 'Done' elif len(state.stack) > 0 and transition[0] == 'shift' and len(state.buffer) > 1: state.shift() boo = 'Done' elif state.stack[-1] == 0 and transition[0] != 'left_arc': state.left_arc(transition[1]) boo = 'Done' else: k += 1 max_prob = trans_sorted[-k] ind = np.where(trans == max_prob) transition = self.output_labels[ind[0][0]] result = DependencyStructure() for p,c,r in state.deps: result.add_deprel(DependencyEdge(c,words[c],pos[c],p, r)) return result
def parse_sentence(self, words, pos): state = State(range(1, len(words))) state.stack.append(0) #print(words) #print(pos) while state.buffer: # TODO: Write the body of this loop for part 4 # print("The Buffer is", state.buffer) # print("The Stack is", state.stack) inp_vector = self.extractor.get_input_representation( words, pos, state) # final_vector = np.zeros(1) # final_vector[0] = inp_vector inp_vector_trans = inp_vector.transpose() inp_vector_trans = inp_vector_trans.reshape(1, -1) #print(inp_vector_trans.shape) preds = self.model.predict(inp_vector_trans) # preds[::-1].sort() # print(preds) predsList = list(preds[0]) #print(predsList) predsList = np.argsort(predsList) # print(predsList) i = len(predsList) - 1 # print(i) # print(self.output_labels) while i >= 0: action = self.output_labels[predsList[i]] # print(action) if len(state.stack) == 0 and (action[0] == "left_arc" or action[0] == "right_arc"): i = i - 1 elif len(state.buffer) == 1 and len( state.stack) != 0 and action[0] == "shift": i = i - 1 elif len(state.stack) != 0 and state.stack[-1] == 0 and action[ 0] == "left_arc": i = i - 1 else: # print("Reaches Else") if action[0] == "left_arc": state.left_arc(action[1]) elif action[0] == "right_arc": state.right_arc(action[1]) else: state.shift() break # print("Reaches end of inner") # print("Reaches Here") result = DependencyStructure() for p, c, r in state.deps: result.add_deprel(DependencyEdge(c, words[c], pos[c], p, r)) return result