def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start features = [] list1 = [] s1 = configuration.get_stack(0) s2 = configuration.get_stack(1) s3 = configuration.get_stack(2) s1lc1 = configuration.get_left_child(s1, 1) s1rc1 = configuration.get_right_child(s1, 1) s1lc2 = configuration.get_left_child(s1, 2) s1rc2 = configuration.get_right_child(s1, 2) s2lc1 = configuration.get_left_child(s2, 1) s2rc1 = configuration.get_right_child(s2, 1) s2lc2 = configuration.get_left_child(s2, 2) s2rc2 = configuration.get_right_child(s2, 2) s1lc1lc1 = configuration.get_left_child(s1lc1, 1) s1rc1rc1 = configuration.get_right_child(s1rc1, 1) s2lc1lc1 = configuration.get_left_child(s2lc1, 1) s2rc1rc1 = configuration.get_right_child(s2rc1, 1) b1 = configuration.get_buffer(0) b2 = configuration.get_buffer(1) b3 = configuration.get_buffer(2) list1.extend([ s1, s2, s3, b1, b2, b3, s1lc1, s1rc1, s1lc2, s1rc2, s2lc1, s2rc1, s2lc2, s2rc2, s1lc1lc1, s1rc1rc1, s2lc1lc1, s2rc1rc1 ]) for word in list1: features.extend([vocabulary.get_word_id(configuration.get_word(word))]) features.extend([vocabulary.get_pos_id(configuration.get_pos(word))]) for word in range(6, len(list1)): features.extend( [vocabulary.get_label_id(configuration.get_label(list1[word]))]) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start li = [] features = [] li1=[] li0=[] feat1=[] feat2=[] feat3=[] li0.append(configuration.get_buffer(0)) li0.append(configuration.get_buffer(1)) li0.append(configuration.get_buffer(2)) li0.append(configuration.get_stack(0)) li0.append(configuration.get_stack(1)) li0.append(configuration.get_stack(2)) li.append(configuration.get_left_child(configuration.get_stack(0), 1)) li.append(configuration.get_right_child(configuration.get_stack(0), 1)) li.append(configuration.get_left_child(configuration.get_stack(0), 2)) li.append(configuration.get_right_child(configuration.get_stack(0), 2)) li.append(configuration.get_left_child(configuration.get_stack(1), 1)) li.append(configuration.get_right_child(configuration.get_stack(1), 1)) li.append(configuration.get_left_child(configuration.get_stack(1), 2)) li.append(configuration.get_right_child(configuration.get_stack(1), 2)) li.append(configuration.get_left_child(configuration.get_left_child(configuration.get_stack(0), 1), 1)) li.append(configuration.get_right_child(configuration.get_right_child(configuration.get_stack(0), 1), 1)) li.append(configuration.get_left_child(configuration.get_left_child(configuration.get_stack(1), 1), 1)) li.append(configuration.get_right_child(configuration.get_right_child(configuration.get_stack(1), 1), 1)) li1=li0+li for p in li1: feat1.append(vocabulary.get_pos_id(configuration.get_pos(p))) for w in li1: feat2.append(vocabulary.get_word_id(configuration.get_word(w))) for l in li: feat3.append(vocabulary.get_label_id((configuration.get_label(l)))) features=feat1+feat2+feat3 # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start stack_size = configuration.get_stack_size() buffer_size = configuration.get_buffer_size() #print("Stack:"+str(stack_size)) #print("Buffer"+str(buffer_size)) stack_words = [] buffer_words = [] fst_2nd_leftmost_rightmost = [] fst_2nd_leftmost_leftmost_rightmost_rightmost = [] for i in range(0,3) : stack_words.append(configuration.get_stack(i)) buffer_words.append(configuration.get_buffer(i)) if i < 2 : fst_2nd_leftmost_rightmost.append(configuration.get_left_child(stack_words[i],1)) fst_2nd_leftmost_rightmost.append(configuration.get_right_child(stack_words[i], 1)) fst_2nd_leftmost_rightmost.append(configuration.get_left_child(stack_words[i], 2)) fst_2nd_leftmost_rightmost.append(configuration.get_right_child(stack_words[i], 2)) fst_2nd_leftmost_rightmost.append( configuration.get_left_child(configuration.get_left_child(stack_words[i],1),1) ) fst_2nd_leftmost_rightmost.append( configuration.get_right_child(configuration.get_right_child(stack_words[i], 1),1) ) #fst_2nd_leftmost_leftmost_rightmost_rightmost.append( #configuration.get_left_child(configuration.get_left_child(stack_words[i],1),1) ) #fst_2nd_leftmost_leftmost_rightmost_rightmost.append( #configuration.get_right_child(configuration.get_right_child(stack_words[i], 1),1) ) final = stack_words + buffer_words + fst_2nd_leftmost_rightmost #+ fst_2nd_leftmost_leftmost_rightmost_rightmost pos_tags = [] for j in range(0,len(final)) : pos_tags.append(vocabulary.get_pos_id(configuration.get_pos(final[j]))) temp_final = final[6:18] arc_labels = [] for j in range(0,len(temp_final)) : arc_labels.append(vocabulary.get_label_id(configuration.get_label(temp_final[j]))) for i,x in enumerate(final) : final[i] = vocabulary.get_word_id(configuration.get_word(final[i])) features = final + pos_tags + arc_labels # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start features = [] # Store list of all features (initially only word features) pos_features = [] # Store list of pos tag features label_features = [] # Store list of label features top3Stack = [configuration.get_stack(i) for i in range(3)] # top 3 elements of stack top3Buffer = [configuration.get_buffer(i) for i in range(3)] # top 3 elements of buffer for token_index in top3Stack + top3Buffer: # Iterate over top 3 words in stack and top 3 words in buffer # Add word to the features features.append( vocabulary.get_word_id(configuration.get_word(token_index))) # Add pos tag of corresponding word to the pos_features pos_features.append( vocabulary.get_pos_id(configuration.get_pos(token_index))) for token_index in top3Stack[:2]: # Iterate over top 2 words in stack # Iterate over 1 and 2 to get 1st leftmost, 1st rightmost, 2nd leftmost and 2nd rightmost child # of corresponding word in stack. for i in range(1, 3): ith_left_child = configuration.get_left_child( token_index, i) # Get ith_leftmost_child of word in stack # Add child to the features features.append( vocabulary.get_word_id(configuration.get_word(ith_left_child))) # Add pos tag of corresponding child to the pos_features pos_features.append( vocabulary.get_pos_id(configuration.get_pos(ith_left_child))) # Add label of corresponding child to the label_features label_features.append( vocabulary.get_label_id( configuration.get_label(ith_left_child))) # Similarly for rightmost child add child word, pos tag and label to respective features list ith_right_child = configuration.get_right_child(token_index, i) features.append( vocabulary.get_word_id( configuration.get_word(ith_right_child))) pos_features.append( vocabulary.get_pos_id(configuration.get_pos(ith_right_child))) label_features.append( vocabulary.get_label_id( configuration.get_label(ith_right_child))) for token_index in top3Stack[:2]: # Iterate over top 2 words in stack # Get leftmost child of leftmost child of word in stack left_left_child = configuration.get_left_child( configuration.get_left_child(token_index, 1), 1) # Add the corresponding child word, pos tag and label to respective features list features.append( vocabulary.get_word_id(configuration.get_word(left_left_child))) pos_features.append( vocabulary.get_pos_id(configuration.get_pos(left_left_child))) label_features.append( vocabulary.get_label_id(configuration.get_label(left_left_child))) # Similarly for rightmost child of rightmost child add child word, pos tag and label to respective features list right_right_child = configuration.get_right_child( configuration.get_right_child(token_index, 1), 1) features.append( vocabulary.get_word_id(configuration.get_word(right_right_child))) pos_features.append( vocabulary.get_pos_id(configuration.get_pos(right_right_child))) label_features.append( vocabulary.get_label_id( configuration.get_label(right_right_child))) features += pos_features + label_features # Append the pos and label features to the word features # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start Sw = [] St = [] Sl = [] #s1,s2,s3,b1,b2,b3; for idx in [0, 1, 2]: Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_stack(idx)))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_stack(idx)))) for idx in [0, 1, 2]: Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_buffer(idx)))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_buffer(idx)))) #lc1(si),rc1(si),lc2(si),rc2(si),i= 1,2 for idx in [0, 1]: wrd = configuration.get_stack(idx) # Sw.append(configuration.get_left_child(wrd, 1)) Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_left_child(wrd, 1)))) # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1]))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_left_child(wrd, 1)))) Sl.append( vocabulary.get_label_id( configuration.get_label(configuration.get_left_child(wrd, 1)))) # Sw.append(configuration.get_right_child(wrd, 1)) Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_right_child(wrd, 1)))) # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1]))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_right_child(wrd, 1)))) Sl.append( vocabulary.get_label_id( configuration.get_label(configuration.get_right_child(wrd, 1)))) # Sw.append(configuration.get_left_child(wrd, 2)) Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_left_child(wrd, 2)))) # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1]))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_left_child(wrd, 2)))) Sl.append( vocabulary.get_label_id( configuration.get_label(configuration.get_left_child(wrd, 2)))) # Sw.append(configuration.get_right_child(wrd, 2)) Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_right_child(wrd, 2)))) # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1]))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_right_child(wrd, 2)))) Sl.append( vocabulary.get_label_id( configuration.get_label(configuration.get_right_child(wrd, 2)))) #lc1(lc1(si)),rc1(rc1(si)),i= 1,2 for idx in [0, 1]: wrd = configuration.get_stack(idx) Sw.append( vocabulary.get_word_id( configuration.get_word( configuration.get_left_child( configuration.get_left_child(wrd, 1), 1)))) St.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_left_child( configuration.get_left_child(wrd, 1), 1)))) Sl.append( vocabulary.get_label_id( configuration.get_label( configuration.get_left_child( configuration.get_left_child(wrd, 1), 1)))) Sw.append( vocabulary.get_word_id( configuration.get_word( configuration.get_right_child( configuration.get_right_child(wrd, 1), 1)))) St.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_right_child( configuration.get_right_child(wrd, 1), 1)))) Sl.append( vocabulary.get_label_id( configuration.get_label( configuration.get_right_child( configuration.get_right_child(wrd, 1), 1)))) # print("***********************") # print(configuration.get_str()) # print(Sw) # print(St) # print(Sl) features = [] features.extend(Sw) features.extend(St) features.extend(Sl) assert len(features) == 48 # print(configuration.get_stack(0)) # print(configuration.get_word(configuration.get_stack(0))) # print(vocabulary.get_word_id("unreadable")) # print(configuration.get_label(configuration.get_stack(0))) # print("id", vocabulary.get_pos_id(configuration.get_stack(0))) # print("word", vocabulary.get_pos_id(configuration.get_word(configuration.get_stack(0)))) # print("word", vocabulary.get_label_id(configuration.get_word(configuration.get_stack(0)))) # print(f) return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start fWord = [] fPos = [] fLabel = [] feature = [] for j in range(2, -1, -1): index = configuration.get_stack(j) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) for j in range(0, 3, 1): index = configuration.get_buffer(j) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) for j in range(0, 2, 1): k = configuration.get_stack(j) index = configuration.get_left_child(k, 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child(k, 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_left_child(k, 2) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child(k, 2) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_left_child( configuration.get_left_child(k, 1), 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child( configuration.get_right_child(k, 1), 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) feature.extend(fWord) feature.extend(fPos) feature.extend(fLabel) return feature
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start ''' all_words = list() features = list() # Top 3 words from stack # for idx in range(2, -1, -1): for idx in range(3): all_words.append(vocabulary.get_word_id(configuration.get_stack(idx))) # Top 3 words from buffer for idx in range(3): all_words.append(vocabulary.get_word_id(configuration.get_buffer(idx))) # Left & Right Child of the top words in stack for idx in range(2): k = configuration.get_stack(idx) all_words.append(configuration.get_left_child(k, 1)) all_words.append(configuration.get_left_child(k, 2)) all_words.append(configuration.get_right_child(k, 1)) all_words.append(configuration.get_right_child(k, 2)) # Left of Top word in stack, Right of Top word in stack all_words.append(configuration.get_left_child(configuration.get_left_child(k, 1), 1)) # all_words.append(configuration.get_left_child(all_words[-4], 1)) all_words.append(configuration.get_right_child(configuration.get_right_child(k, 1), 1)) # all_words.append(configuration.get_right_child(all_words[-4], 1)) # Left of 2nd word in stack, Right of 2nd word in stack # Word Features for word in all_words: features.append(vocabulary.get_word_id(configuration.get_word(word))) # POS Features for word in all_words: features.append(vocabulary.get_pos_id(configuration.get_pos(word))) # Labels for word in all_words[6:]: features.append(vocabulary.get_label_id(configuration.get_label(word))) ''' features = list() fWord = [] fPos = [] fLabel = [] feature = [] for j in range(2, -1, -1): index = configuration.get_stack(j) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) for j in range(0, 3, 1): index = configuration.get_buffer(j) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) for j in range(0, 2, 1): k = configuration.get_stack(j) index = configuration.get_left_child(k, 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child(k, 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_left_child(k, 2) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child(k, 2) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_left_child( configuration.get_left_child(k, 1), 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child( configuration.get_right_child(k, 1), 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) features.extend(fWord) features.extend(fPos) features.extend(fLabel) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start # TODO(Students) End features = [] #Step 1: Take top 3 from stack and buffer for i in range(3): features.append(configuration.get_stack(i)) for i in range(3): features.append(configuration.get_buffer(i)) #Step2: first and second leftmost/rightmost children of the top two words on the stack for i in range(2): #first leftmost children left1 = configuration.get_left_child(features[i], 1) right1 = configuration.get_right_child(features[i], 1) #second leftmost children left2 = configuration.get_left_child(features[i], 2) right2 = configuration.get_right_child(features[i], 2) #As part of Step 3 lc1_lc1_s_i = configuration.get_left_child(left1, 1) rc1_rc1_s_i = configuration.get_right_child(right1, 1) features.extend( [left1, right1, left2, right2, lc1_lc1_s_i, rc1_rc1_s_i]) #print(features) num_of_features = len(features) #Extracting POS of the words extracted for i in range((num_of_features)): features.append(configuration.get_pos(features[i])) #Extracting arc labels excluding the 6 words on the stack/buffer for i in range(6, 18): features.append(configuration.tree.get_label(features[i])) #Extracting ID's of the features in the form of S_w,S_t,S_l for i in range(18): features[i] = vocabulary.get_word_id( configuration.get_word(features[i])) for i in range(18, 36): features[i] = vocabulary.get_pos_id(features[i]) for i in range(36, 48): features[i] = vocabulary.get_label_id(features[i]) assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start features = [] stacks = [] buffers = [] val_1 = configuration.get_stack(0) stacks.append(val_1) val_2 = configuration.get_stack(1) stacks.append(val_2) val_3 = configuration.get_stack(2) stacks.append(val_3) bval_1 = configuration.get_buffer(0) buffers.append(bval_1) bval_2 = configuration.get_buffer(1) buffers.append(bval_2) bval_3 = configuration.get_buffer(2) buffers.append(bval_3) ##word id stack and buffer for i in stacks: features.append(vocabulary.get_word_id(configuration.get_word(i))) for i in buffers: features.append(vocabulary.get_word_id(configuration.get_word(i))) #word id left for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_word_id( configuration.get_word( configuration.get_left_child(stacks[i], j)))) ##word id right for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_word_id( configuration.get_word( configuration.get_right_child(stacks[i], j)))) #word id left for i in range(len(stacks) - 1): features.append( vocabulary.get_word_id( configuration.get_word( configuration.get_left_child( configuration.get_left_child(stacks[i], 1), 1)))) #word id right for i in range(len(stacks) - 1): features.append( vocabulary.get_word_id( configuration.get_word( configuration.get_right_child( configuration.get_right_child(stacks[i], 1), 1)))) #position id stack and buffer for i in stacks: features.append(vocabulary.get_pos_id(configuration.get_pos(i))) for i in buffers: features.append(vocabulary.get_pos_id(configuration.get_pos(i))) #position id left for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_left_child(stacks[i], j)))) #position id right for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_right_child(stacks[i], j)))) #position id left for i in range(len(stacks) - 1): features.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_left_child( configuration.get_left_child(stacks[i], 1), 1)))) #position id right for i in range(len(stacks) - 1): features.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_right_child( configuration.get_right_child(stacks[i], 1), 1)))) #label id left for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_label_id( configuration.get_label( configuration.get_left_child(stacks[i], j)))) #label id right for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_label_id( configuration.get_label( configuration.get_right_child(stacks[i], j)))) #label id left for i in range(len(stacks) - 1): features.append( vocabulary.get_label_id( configuration.get_label( configuration.get_left_child( configuration.get_left_child(stacks[i], 1), 1)))) #label id right for i in range(len(stacks) - 1): features.append( vocabulary.get_label_id( configuration.get_label( configuration.get_right_child( configuration.get_right_child(stacks[i], 1), 1)))) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ #Reference: Understood the features from the github implementation of: #akjindal53244/dependency_parsing_tf/utils/feature_extraction.py # TODO(Students) Start features = [] direct_tokens = [] children_token = [] direct_tokens.extend([configuration.get_stack(i) for i in range(3)]) direct_tokens.extend([configuration.get_buffer(i) for i in range(3)]) for i in range(2): children_token.extend( [configuration.get_left_child(configuration.get_stack(i), 1)]) children_token.extend( [configuration.get_right_child(configuration.get_stack(i), 1)]) children_token.extend( [configuration.get_left_child(configuration.get_stack(i), 2)]) children_token.extend( [configuration.get_right_child(configuration.get_stack(i), 2)]) children_token.extend( [configuration.get_left_child(children_token[0], 1)]) children_token.extend( [configuration.get_right_child(children_token[1], 1)]) features.extend([ vocabulary.get_word_id(configuration.get_word(i)) for i in direct_tokens ]) features.extend([ vocabulary.get_word_id(configuration.get_word(i)) for i in children_token ]) features.extend([ vocabulary.get_pos_id(configuration.get_pos(i)) for i in direct_tokens ]) features.extend([ vocabulary.get_pos_id(configuration.get_pos(i)) for i in children_token ]) features.extend([ vocabulary.get_label_id(configuration.get_label(i)) for i in children_token ]) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start # First we get the top three elements of the stack st1 = configuration.get_stack(0) st2 = configuration.get_stack(1) st3 = configuration.get_stack(2) # Next, we get the top three elements of the buffer buf1 = configuration.get_buffer(0) buf2 = configuration.get_buffer(1) buf3 = configuration.get_buffer(2) # The left children at level 1 and 2 of the topmost element of the stack left_ch1_st1 = configuration.get_left_child(st1, 1) left_ch2_st1 = configuration.get_left_child(st1, 2) # The left children at level 1 and 2 of the second topmost element of the stack left_ch1_st2 = configuration.get_left_child(st2, 1) left_ch2_st2 = configuration.get_left_child(st2, 2) # The right children at level 1 and 2 of the topmost element of the stack right_ch1_st1 = configuration.get_right_child(st1, 1) right_ch2_st1 = configuration.get_right_child(st1, 2) # The right children at level 1 and 2 of the second topmost element of the stack right_ch1_st2 = configuration.get_right_child(st2, 1) right_ch2_st2 = configuration.get_right_child(st2, 2) # The leftmost children of the topmost and the second topmost element of the stack left_ch1_left_ch1_st1 = configuration.get_left_child(left_ch1_st1, 1) left_ch1_left_ch1_st2 = configuration.get_left_child(left_ch1_st2, 1) # The rightmost children of the topmost and the second topmost element of the stack right_ch1_right_ch1_st1 = configuration.get_right_child(right_ch1_st1, 1) right_ch1_right_ch1_st2 = configuration.get_right_child(right_ch1_st2, 1) # Appending all of this in a list childs = [ st1, st2, st3, buf1, buf2, buf3, left_ch1_st1, right_ch1_st1, left_ch2_st1, right_ch2_st1, left_ch1_st2, right_ch1_st2, left_ch2_st2, right_ch2_st2, left_ch1_left_ch1_st1, right_ch1_right_ch1_st1, left_ch1_left_ch1_st2, right_ch1_right_ch1_st2 ] pos = [] # We now get the respective parts of speech tags and labels for the ids for idx in childs: pos.append(configuration.get_pos(idx)) for idx in childs[6:len(childs) + 1]: pos.append(configuration.get_label(idx)) temp = childs + pos #print("features_temp",len(temp)) features = [] # Get words, POS tags and Labels and append them to features. for word in temp[0:18]: features.append(vocabulary.get_word_id(configuration.get_word(word))) for pos in temp[18:36]: features.append(vocabulary.get_pos_id(pos)) for label in temp[36:48]: features.append(vocabulary.get_label_id(label)) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start words = [] posTags = [] labels = [] # Get the words and pos tags of the top 3 elements of the stack. for idx in range(2, -1, -1): stack = configuration.get_stack(idx) words.append(vocabulary.get_word_id(configuration.get_word(stack))) posTags.append(vocabulary.get_pos_id(configuration.get_pos(stack))) # Get the words and pos tags of the top 3 elements of the buffer. for idx in range(3): buffer = configuration.get_buffer(idx) words.append(vocabulary.get_word_id(configuration.get_word(buffer))) posTags.append(vocabulary.get_pos_id(configuration.get_pos(buffer))) # Get the words, labels, and pos tags of the first and second left child and right child of the top two elements # on the stack, and # Get the words, labels, and pos tags of the leftmost of the leftmost and rightmost of the rightmost child # of the top two elements on the stack for idx in range(2): stack = configuration.get_stack(idx) firstLeftChild = configuration.get_left_child(stack, 1) words.append( vocabulary.get_word_id(configuration.get_word(firstLeftChild))) labels.append( vocabulary.get_label_id(configuration.get_label(firstLeftChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(firstLeftChild))) firstRightChild = configuration.get_right_child(stack, 1) words.append( vocabulary.get_word_id(configuration.get_word(firstRightChild))) labels.append( vocabulary.get_label_id(configuration.get_label(firstRightChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(firstRightChild))) secondLeftChild = configuration.get_left_child(stack, 2) words.append( vocabulary.get_word_id(configuration.get_word(secondLeftChild))) labels.append( vocabulary.get_label_id(configuration.get_label(secondLeftChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(secondLeftChild))) secondRightChild = configuration.get_right_child(stack, 2) words.append( vocabulary.get_word_id(configuration.get_word(secondRightChild))) labels.append( vocabulary.get_label_id(configuration.get_label(secondRightChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(secondRightChild))) leftLeftChild = configuration.get_left_child( configuration.get_left_child(stack, 1), 1) words.append( vocabulary.get_word_id(configuration.get_word(leftLeftChild))) labels.append( vocabulary.get_label_id(configuration.get_label(leftLeftChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(leftLeftChild))) rightRightChild = configuration.get_right_child( configuration.get_right_child(stack, 1), 1) words.append( vocabulary.get_word_id(configuration.get_word(rightRightChild))) labels.append( vocabulary.get_label_id(configuration.get_label(rightRightChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(rightRightChild))) features = [] features += words + posTags + labels # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start # print("Deubg output") features = [] postags = [] arclabels = [] # For possible steps for item in range(0, 2): features.append( configuration.get_left_child(configuration.get_stack(item), 1)) features.append( configuration.get_left_child(configuration.get_stack(item), 2)) features.append( configuration.get_right_child(configuration.get_stack(item), 1)) features.append( configuration.get_right_child(configuration.get_stack(item), 2)) features.append( configuration.get_left_child( configuration.get_left_child(configuration.get_stack(item), 1), 1)) features.append( configuration.get_right_child( configuration.get_right_child(configuration.get_stack(item), 1), 1)) for item in features: arclabels.append(configuration.get_label(item)) for item in range(0, 3): features.append(configuration.get_stack(item)) for item in range(0, 3): features.append(configuration.get_buffer(item)) for item in features: postags.append(configuration.get_pos(item)) # Initialize empty lists posids = [] labelids = [] wordids = [] featureid = [] # Append the final tags for i in postags: posids.append(vocabulary.get_pos_id(i)) for i in arclabels: labelids.append(vocabulary.get_label_id(i)) for i in features: wordids.append(vocabulary.get_word_id(configuration.get_word(i))) # Append the final data featureid.extend(wordids) featureid.extend(labelids) featureid.extend(posids) features = featureid # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start # print(configuration.get_stack_size()) # print(configuration.get_buffer_size()) # print("****") words = [] pos_tags = [] arc_labels = [] for index in range(0, 3): stackIndex = configuration.get_stack(index) bufferIndex = configuration.get_buffer(index) #Pushing top 3 words from stack n buffer to features_list : s1; s2; s3; b1; b2; b3; words.append(vocabulary.get_word_id( configuration.get_word(stackIndex))) words.append( vocabulary.get_word_id(configuration.get_word(bufferIndex))) pos_tags.append( vocabulary.get_pos_id(configuration.get_pos(stackIndex))) pos_tags.append( vocabulary.get_pos_id(configuration.get_pos(bufferIndex))) for index in range(0, 2): stackIndex = configuration.get_stack(index) #Getting indexes of the first and second leftmost / rightmost children of the top two words on the stack: lc1(si); rc1(si); lc2(si); rc2(si), i = 1; 2. child1 = [ configuration.get_left_child(stackIndex, 1), configuration.get_right_child(stackIndex, 1), configuration.get_left_child(stackIndex, 2), configuration.get_right_child(stackIndex, 2) ] #Getting indexes of leftmost of leftmost / rightmost of rightmost children of the top two words on the stack: lc1(lc1(si)); rc1(rc1(si)), i = 1; 2. child2 = [ configuration.get_left_child( configuration.get_left_child(stackIndex, 1), 1), configuration.get_right_child( configuration.get_right_child(stackIndex, 1), 1) ] #Merging child1 and child2 to single children list. children = child1 + child2 #Pushing children to features_list: for childIndex in children: words.append( vocabulary.get_word_id(configuration.get_word(childIndex))) pos_tags.append( vocabulary.get_pos_id(configuration.get_pos(childIndex))) arc_labels.append( vocabulary.get_label_id(configuration.get_label(childIndex))) features = words + pos_tags + arc_labels #*************************************************# # ##pushing top 3 words from stack # words.append(configuration.get_stack(0)) # words.extend([configuration.get_stack(1),configuration.get_stack(2)]) # ##pushing top 3 words from buffer: # words.extend([configuration.get_buffer(0),configuration.get_buffer(1),configuration.get_buffer(2)]) # ##pushing The first and second leftmost / rightmost children of the top two words on the stack: # words.extend([configuration.get_left_child(0,1),configuration.get_right_child(0,1),configuration.get_left_child(0,2),configuration.get_right_child(0,2)]) # words.extend([configuration.get_left_child(1,1),configuration.get_right_child(1,1),configuration.get_left_child(1,2),configuration.get_right_child(1,2)]) # # ##pushing The leftmost of leftmost / rightmost of rightmost children of the top two words on the stack: # words.extend([configuration.get_left_child(configuration.get_left_child(0,1),1),configuration.get_right_child(configuration.get_right_child(0,1),1)]) # words.extend([configuration.get_left_child(configuration.get_left_child(1,1),1),configuration.get_right_child(configuration.get_right_child(1,1),1)]) #**************************************************# # TODO(Students) End assert len(features) == 48 return features