class Classifier: def __init__(self, max_words=500): self.model = None self.vocab = Vocabulary() self.max_words = max_words def build(self): self.vocab.build() model = Sequential() model.add(Embedding(self.vocab.size(), 128)) model.add(LSTM(128)) model.add(Dropout(0.5)) model.add(Dense(1, W_regularizer=l2(0.01))) model.add(Activation('sigmoid')) model.load_weights("lib/imdb_lstm.w") model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") self.model = model return self def pad(self, X): return pad_sequences(X, maxlen=self.max_words) def classify(self, X): inp = [self.vocab.vectorize(X)] inp = np.array(self.pad(inp)) y = self.model.predict(inp)[0][0] return (round(y), y)
def get_dataloader(args): src_sents, tgt_sents = DataReader(args, train=True).load_dataset() src_vocab = Vocabulary(src_sents, args, args.src_vocab_size) tgt_vocab = Vocabulary(tgt_sents, args, args.tgt_vocab_size) train_dataset = Dataset(src_sents, tgt_sents, args, src_vocab, tgt_vocab, train=True) src_sents, tgt_sents = DataReader(args, train=False).load_dataset() test_dataset = Dataset(src_sents, tgt_sents, args, src_vocab, tgt_vocab, train=False) train_dataloader = data.DataLoader(train_dataset, args.batch_size, sampler=RandomSampler(train_dataset)) test_dataloader = data.DataLoader(test_dataset, args.batch_size, shuffle=False) return train_dataloader, test_dataloader
def print_predictions(datasets: List, filename: str, vocab: Vocabulary) -> None: with open(filename, "w", encoding="utf8") as f: for instance in datasets: seq_len = int((instance['ent_span_labels'] >= 0).sum()) for idx, true_label, pred_label in zip(instance['tokens'][:seq_len], instance['ent_labels'][:seq_len], instance['all_ent_pred'][:seq_len]): token = vocab.get_token_from_index(idx, "tokens") true_label = vocab.get_token_from_index(true_label, "ent_labels") pred_label = vocab.get_token_from_index(pred_label, "ent_labels") print("{}\t{}\t{}".format(token, true_label, pred_label), file=f) for (s, e), r in zip(instance['candi_rels'], instance['rel_labels']): r = vocab.get_token_from_index(r, "rel_labels") assert r != "None" if r[-3:] == "<--": s, e = e, s r = r[:-3] print("Rel-True\t{}\t{}\t{}".format(s, e, r), file=f) for (s, e), r in zip(instance['all_candi_rels'], instance['all_rel_pred']): r = vocab.get_token_from_index(r, "rel_labels") if r == "None": continue if r[-3:] == "<--": s, e = e, s r = r[:-3] print("Rel-Pred\t{}\t{}\t{}".format(s, e, r), file=f) print(file=f)
def print_ent_span_predictions(datasets: List, filename: str, vocab: Vocabulary) -> None: with open(filename, "w", encoding="utf8") as f: for instance in datasets: seq_len = int((instance['ent_span_labels'] >= 0).sum()) for idx, true_label, pred_label in zip(instance['tokens'][:seq_len], instance['ent_span_labels'][:seq_len], instance['ent_span_pred'][:seq_len]): token = vocab.get_token_from_index(idx, "tokens") true_label = vocab.get_token_from_index(true_label, "ent_span_labels") pred_label = vocab.get_token_from_index(pred_label, "ent_span_labels") if true_label != "O": true_label = true_label + "-ENT" if pred_label != "O": pred_label = pred_label + "-ENT" print("{}\t{}\t{}".format(token, true_label, pred_label), file=f) for (s, e), r in zip(instance['candi_rels'], instance['rel_labels']): r = vocab.get_token_from_index(r, "rel_labels") assert r != "None" r = "YES" print("Rel-True\t{}\t{}\t{}".format(s, e, r), file=f) for (s, e), r in zip(instance['all_candi_rels'], instance['all_bin_rel_pred']): if r == 0: continue print("Rel-Pred\t{}\t{}\t{}".format(s, e, "YES"), file=f) print(file=f)
class Classifier: def __init__(self, max_words = 500): self.model = None self.vocab = Vocabulary() self.max_words = max_words def build(self): self.vocab.build() model = Sequential() model.add(Embedding(self.vocab.size(), 128)) model.add(LSTM(128)) model.add(Dropout(0.5)) model.add(Dense(1, W_regularizer=l2(0.01))) model.add(Activation('sigmoid')) model.load_weights("lib/imdb_lstm.w") model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") self.model = model return self def pad(self, X): return pad_sequences(X, maxlen=self.max_words) def classify(self, X): inp = [self.vocab.vectorize(X)] inp = np.array(self.pad(inp)) y = self.model.predict(inp)[0][0] return (round(y), y)
class Classifier(ABC): def __init__(self, name, max_words=500): self.model = None self.graph = None self.name = name self.vocab = Vocabulary() self.max_words = max_words def build(self): self.vocab.build() if not os.path.isfile(self.name): print("No stored configuration for " + self.name + " has been found.") model = self.architecture() print("Model has been built.") model = self.train(model) print("Model has been trained.") model.save(self.name) print("Model has been stored.") else: print("Stored configuration for " + self.name + " has been found.") model = load_model(self.name) model._make_predict_function() self.graph = tf.get_default_graph() print("Model has been loaded.") self.model = model return self def train(self, model): pos_dir = 'dataset/train/pos' neg_dir = 'dataset/train/neg' data = load_dir(pos_dir, 1, 12500) + load_dir(neg_dir, 0, 12500) random.shuffle(data) features = list() labels = list() for X, y in data: features.append(self.pad([self.vocab.vectorize(X)])[0]) labels.append(y) model.fit(np.array(features), np.array(labels)) return model def pad(self, X): return pad_sequences(X, maxlen=self.max_words) def classify(self, X): inp = [self.vocab.vectorize(X)] inp = np.array(self.pad(inp)) with self.graph.as_default(): y = self.model.predict(inp)[0][0] return round(y), y @abstractmethod def architecture(self): pass
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start features = [] list1 = [] s1 = configuration.get_stack(0) s2 = configuration.get_stack(1) s3 = configuration.get_stack(2) s1lc1 = configuration.get_left_child(s1, 1) s1rc1 = configuration.get_right_child(s1, 1) s1lc2 = configuration.get_left_child(s1, 2) s1rc2 = configuration.get_right_child(s1, 2) s2lc1 = configuration.get_left_child(s2, 1) s2rc1 = configuration.get_right_child(s2, 1) s2lc2 = configuration.get_left_child(s2, 2) s2rc2 = configuration.get_right_child(s2, 2) s1lc1lc1 = configuration.get_left_child(s1lc1, 1) s1rc1rc1 = configuration.get_right_child(s1rc1, 1) s2lc1lc1 = configuration.get_left_child(s2lc1, 1) s2rc1rc1 = configuration.get_right_child(s2rc1, 1) b1 = configuration.get_buffer(0) b2 = configuration.get_buffer(1) b3 = configuration.get_buffer(2) list1.extend([ s1, s2, s3, b1, b2, b3, s1lc1, s1rc1, s1lc2, s1rc2, s2lc1, s2rc1, s2lc2, s2rc2, s1lc1lc1, s1rc1rc1, s2lc1lc1, s2rc1rc1 ]) for word in list1: features.extend([vocabulary.get_word_id(configuration.get_word(word))]) features.extend([vocabulary.get_pos_id(configuration.get_pos(word))]) for word in range(6, len(list1)): features.extend( [vocabulary.get_label_id(configuration.get_label(list1[word]))]) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start li = [] features = [] li1=[] li0=[] feat1=[] feat2=[] feat3=[] li0.append(configuration.get_buffer(0)) li0.append(configuration.get_buffer(1)) li0.append(configuration.get_buffer(2)) li0.append(configuration.get_stack(0)) li0.append(configuration.get_stack(1)) li0.append(configuration.get_stack(2)) li.append(configuration.get_left_child(configuration.get_stack(0), 1)) li.append(configuration.get_right_child(configuration.get_stack(0), 1)) li.append(configuration.get_left_child(configuration.get_stack(0), 2)) li.append(configuration.get_right_child(configuration.get_stack(0), 2)) li.append(configuration.get_left_child(configuration.get_stack(1), 1)) li.append(configuration.get_right_child(configuration.get_stack(1), 1)) li.append(configuration.get_left_child(configuration.get_stack(1), 2)) li.append(configuration.get_right_child(configuration.get_stack(1), 2)) li.append(configuration.get_left_child(configuration.get_left_child(configuration.get_stack(0), 1), 1)) li.append(configuration.get_right_child(configuration.get_right_child(configuration.get_stack(0), 1), 1)) li.append(configuration.get_left_child(configuration.get_left_child(configuration.get_stack(1), 1), 1)) li.append(configuration.get_right_child(configuration.get_right_child(configuration.get_stack(1), 1), 1)) li1=li0+li for p in li1: feat1.append(vocabulary.get_pos_id(configuration.get_pos(p))) for w in li1: feat2.append(vocabulary.get_word_id(configuration.get_word(w))) for l in li: feat3.append(vocabulary.get_label_id((configuration.get_label(l)))) features=feat1+feat2+feat3 # TODO(Students) End assert len(features) == 48 return features
def seqchar2number(instance: Dict, vocab: Vocabulary, lower_case: bool) -> List[List]: nums = [] for token in instance['tokens']: nums.append([vocab.get_token_index(item.lower() if lower_case else item, 'token_chars') for item in token]) return nums
def get_minibatch(batch: List[Dict], vocab: Vocabulary, use_cuda: bool) -> Dict[str, Any]: batch = sorted(batch, key=lambda x: len(x['tokens']), reverse=True) batch_seq_len = [len(instance['tokens']) for instance in batch] max_seq_len = max(batch_seq_len) # max_char_seq_len = max([len(tok) for instance in batch for tok in instance['token_chars']]) outputs = defaultdict(list) token_padding_idx = vocab.get_token_index(vocab._padding_token, 'tokens') char_padding_idx = vocab.get_token_index(vocab._padding_token, 'token_chars') label_padding_idx = -1 for instance in batch: cur_seq_len = len(instance['tokens']) outputs['tokens'].append(instance['tokens'] + [token_padding_idx] * (max_seq_len - cur_seq_len)) outputs['ent_labels'].append(instance['ent_labels'] + [label_padding_idx] * (max_seq_len - cur_seq_len)) outputs['ent_span_labels'].append( instance['ent_span_labels'] + [label_padding_idx] * (max_seq_len - cur_seq_len)) outputs['candi_rels'].append(instance['candi_rels']) outputs['ent_ids'].append(instance['ent_ids']) outputs['ent_ids_labels'].append(instance['ent_ids_labels']) outputs['rel_labels'].append(instance['rel_labels']) # char_pad = [] # for char_seq in instance['token_chars']: # char_pad.append(char_seq + [char_padding_idx] * (max_char_seq_len - len(char_seq))) # char_pad = char_pad + [[char_padding_idx] * max_char_seq_len] * (max_seq_len - cur_seq_len) # outputs['token_chars'].append(char_pad) outputs['tokens'] = torch.LongTensor(outputs['tokens']) # outputs['token_chars'] = torch.LongTensor(outputs['token_chars']) outputs['ent_labels'] = torch.LongTensor(outputs['ent_labels']) outputs['ent_span_labels'] = torch.LongTensor(outputs['ent_span_labels']) outputs['seq_lens'] = batch_seq_len if use_cuda: outputs['tokens'] = outputs['tokens'].cuda(non_blocking=True) # outputs['token_chars'] = outputs['token_chars'].cuda(non_blocking=True) outputs['ent_labels'] = outputs['ent_labels'].cuda(non_blocking=True) outputs['ent_span_labels'] = outputs['ent_span_labels'].cuda(non_blocking=True) return outputs
def load_word_vectors(vector_file: str, ndims: int, vocab: Vocabulary, namespace: str = 'tokens') -> List[List]: token_vocab_size = vocab.get_vocab_size(namespace) oov_idx = vocab.get_token_index(vocab._oov_token, namespace) padding_idx = vocab.get_token_index(vocab._padding_token, namespace) W = np.random.uniform(-0.25, 0.25, (token_vocab_size, ndims)) W[padding_idx, :] = 0.0 total, found = 0, 0 with open(vector_file) as fp: for i, line in enumerate(fp): line = line.rstrip().split() if line: total += 1 try: assert len(line) == ndims + 1, ( "Line[{}] {} vector dims {} doesn't match ndims={}". format(i, line[0], len(line) - 1, ndims)) except AssertionError as e: print(e) continue word = line[0] idx = vocab.get_token_index(word, namespace) if idx != oov_idx: found += 1 vecs = np.array(list(map(float, line[1:]))) W[idx, :] = vecs print("Found {} [{:.2f}%] vectors from {} vectors in {} with ndims={}". format(found, found * 100 / token_vocab_size, total, vector_file, ndims)) # norm_W = np.sqrt((W*W).sum(axis=1, keepdims=True)) # valid_idx = norm_W.squeeze() != 0 # W[valid_idx, :] /= norm_W[valid_idx] return W
def data2number(corpus: List[Dict], vocab: Vocabulary) -> List[Dict]: instances = [] oov_idx = vocab.get_token_index(vocab._oov_token, 'tokens') for e in corpus: instance = {} instance['tokens'] = seq2number(e, vocab, 'tokens', True) instance['token_chars'] = seqchar2number(e, vocab, False) instance['ent_labels'] = seq2number(e, vocab, 'ent_labels', False) instance['ent_span_labels'] = seq2number(e, vocab, 'ent_span_labels', False) instance['ent_ids_labels'] = seq2number(e, vocab, 'ent_ids_labels', False) instance['rel_labels'] = seq2number(e, vocab, 'rel_labels', False) instance['candi_rels'] = e['candi_rels'] instance['ent_ids'] = e['ent_ids'] assert all([oov_idx != n for n in instance['tokens']]) assert all([oov_idx != m for n in instance['token_chars'] for m in n]) instances.append(instance) return instances
type=str, help= 'serialization directory of the trained model. Used only for vocab.') parser.add_argument('gold_data_path', type=str, help='gold data file path.') parser.add_argument('prediction_data_path', type=str, help='predictions data file path.') args = parser.parse_args() print("Reading data") sentences, label_trees = read_conll_data(args.gold_data_path) _, predicted_trees = read_conll_data(args.prediction_data_path) print("Reading vocabulary") vocabulary_path = os.path.join(args.load_serialization_dir, "vocab.pickle") vocabulary = Vocabulary.load(vocabulary_path) sorted_labels = [ item[0] for item in sorted(vocabulary.label_token_to_id.items(), key=lambda e: e[1]) ] non_null_sorted_labels = sorted_labels[1:] parsing_system = ParsingSystem(non_null_sorted_labels) print("Evaluating") report = evaluate(sentences, parsing_system, predicted_trees, label_trees) print(report)
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start fWord = [] fPos = [] fLabel = [] feature = [] for j in range(2, -1, -1): index = configuration.get_stack(j) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) for j in range(0, 3, 1): index = configuration.get_buffer(j) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) for j in range(0, 2, 1): k = configuration.get_stack(j) index = configuration.get_left_child(k, 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child(k, 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_left_child(k, 2) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child(k, 2) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_left_child( configuration.get_left_child(k, 1), 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child( configuration.get_right_child(k, 1), 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) feature.extend(fWord) feature.extend(fPos) feature.extend(fLabel) return feature
def __init__(self, name, max_words=500): self.model = None self.graph = None self.name = name self.vocab = Vocabulary() self.max_words = max_words
def seq2number(instance: Dict, vocab: Vocabulary, namespace: str, lower_case: bool) -> List: return [vocab.get_token_index(item.lower() if lower_case else item, namespace) for item in instance[namespace]]
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start ''' all_words = list() features = list() # Top 3 words from stack # for idx in range(2, -1, -1): for idx in range(3): all_words.append(vocabulary.get_word_id(configuration.get_stack(idx))) # Top 3 words from buffer for idx in range(3): all_words.append(vocabulary.get_word_id(configuration.get_buffer(idx))) # Left & Right Child of the top words in stack for idx in range(2): k = configuration.get_stack(idx) all_words.append(configuration.get_left_child(k, 1)) all_words.append(configuration.get_left_child(k, 2)) all_words.append(configuration.get_right_child(k, 1)) all_words.append(configuration.get_right_child(k, 2)) # Left of Top word in stack, Right of Top word in stack all_words.append(configuration.get_left_child(configuration.get_left_child(k, 1), 1)) # all_words.append(configuration.get_left_child(all_words[-4], 1)) all_words.append(configuration.get_right_child(configuration.get_right_child(k, 1), 1)) # all_words.append(configuration.get_right_child(all_words[-4], 1)) # Left of 2nd word in stack, Right of 2nd word in stack # Word Features for word in all_words: features.append(vocabulary.get_word_id(configuration.get_word(word))) # POS Features for word in all_words: features.append(vocabulary.get_pos_id(configuration.get_pos(word))) # Labels for word in all_words[6:]: features.append(vocabulary.get_label_id(configuration.get_label(word))) ''' features = list() fWord = [] fPos = [] fLabel = [] feature = [] for j in range(2, -1, -1): index = configuration.get_stack(j) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) for j in range(0, 3, 1): index = configuration.get_buffer(j) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) for j in range(0, 2, 1): k = configuration.get_stack(j) index = configuration.get_left_child(k, 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child(k, 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_left_child(k, 2) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child(k, 2) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_left_child( configuration.get_left_child(k, 1), 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) index = configuration.get_right_child( configuration.get_right_child(k, 1), 1) fWord.append(vocabulary.get_word_id(configuration.get_word(index))) fPos.append(vocabulary.get_pos_id(configuration.get_pos(index))) fLabel.append(vocabulary.get_label_id(configuration.get_label(index))) features.extend(fWord) features.extend(fPos) features.extend(fLabel) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start # print("Deubg output") features = [] postags = [] arclabels = [] # For possible steps for item in range(0, 2): features.append( configuration.get_left_child(configuration.get_stack(item), 1)) features.append( configuration.get_left_child(configuration.get_stack(item), 2)) features.append( configuration.get_right_child(configuration.get_stack(item), 1)) features.append( configuration.get_right_child(configuration.get_stack(item), 2)) features.append( configuration.get_left_child( configuration.get_left_child(configuration.get_stack(item), 1), 1)) features.append( configuration.get_right_child( configuration.get_right_child(configuration.get_stack(item), 1), 1)) for item in features: arclabels.append(configuration.get_label(item)) for item in range(0, 3): features.append(configuration.get_stack(item)) for item in range(0, 3): features.append(configuration.get_buffer(item)) for item in features: postags.append(configuration.get_pos(item)) # Initialize empty lists posids = [] labelids = [] wordids = [] featureid = [] # Append the final tags for i in postags: posids.append(vocabulary.get_pos_id(i)) for i in arclabels: labelids.append(vocabulary.get_label_id(i)) for i in features: wordids.append(vocabulary.get_word_id(configuration.get_word(i))) # Append the final data featureid.extend(wordids) featureid.extend(labelids) featureid.extend(posids) features = featureid # TODO(Students) End assert len(features) == 48 return features
def __init__(self, max_words=500): self.model = None self.vocab = Vocabulary() self.max_words = max_words
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start features = [] stacks = [] buffers = [] val_1 = configuration.get_stack(0) stacks.append(val_1) val_2 = configuration.get_stack(1) stacks.append(val_2) val_3 = configuration.get_stack(2) stacks.append(val_3) bval_1 = configuration.get_buffer(0) buffers.append(bval_1) bval_2 = configuration.get_buffer(1) buffers.append(bval_2) bval_3 = configuration.get_buffer(2) buffers.append(bval_3) ##word id stack and buffer for i in stacks: features.append(vocabulary.get_word_id(configuration.get_word(i))) for i in buffers: features.append(vocabulary.get_word_id(configuration.get_word(i))) #word id left for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_word_id( configuration.get_word( configuration.get_left_child(stacks[i], j)))) ##word id right for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_word_id( configuration.get_word( configuration.get_right_child(stacks[i], j)))) #word id left for i in range(len(stacks) - 1): features.append( vocabulary.get_word_id( configuration.get_word( configuration.get_left_child( configuration.get_left_child(stacks[i], 1), 1)))) #word id right for i in range(len(stacks) - 1): features.append( vocabulary.get_word_id( configuration.get_word( configuration.get_right_child( configuration.get_right_child(stacks[i], 1), 1)))) #position id stack and buffer for i in stacks: features.append(vocabulary.get_pos_id(configuration.get_pos(i))) for i in buffers: features.append(vocabulary.get_pos_id(configuration.get_pos(i))) #position id left for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_left_child(stacks[i], j)))) #position id right for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_right_child(stacks[i], j)))) #position id left for i in range(len(stacks) - 1): features.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_left_child( configuration.get_left_child(stacks[i], 1), 1)))) #position id right for i in range(len(stacks) - 1): features.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_right_child( configuration.get_right_child(stacks[i], 1), 1)))) #label id left for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_label_id( configuration.get_label( configuration.get_left_child(stacks[i], j)))) #label id right for i in range(len(stacks) - 1): for j in range(1, 3): features.append( vocabulary.get_label_id( configuration.get_label( configuration.get_right_child(stacks[i], j)))) #label id left for i in range(len(stacks) - 1): features.append( vocabulary.get_label_id( configuration.get_label( configuration.get_left_child( configuration.get_left_child(stacks[i], 1), 1)))) #label id right for i in range(len(stacks) - 1): features.append( vocabulary.get_label_id( configuration.get_label( configuration.get_right_child( configuration.get_right_child(stacks[i], 1), 1)))) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ #Reference: Understood the features from the github implementation of: #akjindal53244/dependency_parsing_tf/utils/feature_extraction.py # TODO(Students) Start features = [] direct_tokens = [] children_token = [] direct_tokens.extend([configuration.get_stack(i) for i in range(3)]) direct_tokens.extend([configuration.get_buffer(i) for i in range(3)]) for i in range(2): children_token.extend( [configuration.get_left_child(configuration.get_stack(i), 1)]) children_token.extend( [configuration.get_right_child(configuration.get_stack(i), 1)]) children_token.extend( [configuration.get_left_child(configuration.get_stack(i), 2)]) children_token.extend( [configuration.get_right_child(configuration.get_stack(i), 2)]) children_token.extend( [configuration.get_left_child(children_token[0], 1)]) children_token.extend( [configuration.get_right_child(children_token[1], 1)]) features.extend([ vocabulary.get_word_id(configuration.get_word(i)) for i in direct_tokens ]) features.extend([ vocabulary.get_word_id(configuration.get_word(i)) for i in children_token ]) features.extend([ vocabulary.get_pos_id(configuration.get_pos(i)) for i in direct_tokens ]) features.extend([ vocabulary.get_pos_id(configuration.get_pos(i)) for i in children_token ]) features.extend([ vocabulary.get_label_id(configuration.get_label(i)) for i in children_token ]) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start # First we get the top three elements of the stack st1 = configuration.get_stack(0) st2 = configuration.get_stack(1) st3 = configuration.get_stack(2) # Next, we get the top three elements of the buffer buf1 = configuration.get_buffer(0) buf2 = configuration.get_buffer(1) buf3 = configuration.get_buffer(2) # The left children at level 1 and 2 of the topmost element of the stack left_ch1_st1 = configuration.get_left_child(st1, 1) left_ch2_st1 = configuration.get_left_child(st1, 2) # The left children at level 1 and 2 of the second topmost element of the stack left_ch1_st2 = configuration.get_left_child(st2, 1) left_ch2_st2 = configuration.get_left_child(st2, 2) # The right children at level 1 and 2 of the topmost element of the stack right_ch1_st1 = configuration.get_right_child(st1, 1) right_ch2_st1 = configuration.get_right_child(st1, 2) # The right children at level 1 and 2 of the second topmost element of the stack right_ch1_st2 = configuration.get_right_child(st2, 1) right_ch2_st2 = configuration.get_right_child(st2, 2) # The leftmost children of the topmost and the second topmost element of the stack left_ch1_left_ch1_st1 = configuration.get_left_child(left_ch1_st1, 1) left_ch1_left_ch1_st2 = configuration.get_left_child(left_ch1_st2, 1) # The rightmost children of the topmost and the second topmost element of the stack right_ch1_right_ch1_st1 = configuration.get_right_child(right_ch1_st1, 1) right_ch1_right_ch1_st2 = configuration.get_right_child(right_ch1_st2, 1) # Appending all of this in a list childs = [ st1, st2, st3, buf1, buf2, buf3, left_ch1_st1, right_ch1_st1, left_ch2_st1, right_ch2_st1, left_ch1_st2, right_ch1_st2, left_ch2_st2, right_ch2_st2, left_ch1_left_ch1_st1, right_ch1_right_ch1_st1, left_ch1_left_ch1_st2, right_ch1_right_ch1_st2 ] pos = [] # We now get the respective parts of speech tags and labels for the ids for idx in childs: pos.append(configuration.get_pos(idx)) for idx in childs[6:len(childs) + 1]: pos.append(configuration.get_label(idx)) temp = childs + pos #print("features_temp",len(temp)) features = [] # Get words, POS tags and Labels and append them to features. for word in temp[0:18]: features.append(vocabulary.get_word_id(configuration.get_word(word))) for pos in temp[18:36]: features.append(vocabulary.get_pos_id(pos)) for label in temp[36:48]: features.append(vocabulary.get_label_id(label)) # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start stack_size = configuration.get_stack_size() buffer_size = configuration.get_buffer_size() #print("Stack:"+str(stack_size)) #print("Buffer"+str(buffer_size)) stack_words = [] buffer_words = [] fst_2nd_leftmost_rightmost = [] fst_2nd_leftmost_leftmost_rightmost_rightmost = [] for i in range(0,3) : stack_words.append(configuration.get_stack(i)) buffer_words.append(configuration.get_buffer(i)) if i < 2 : fst_2nd_leftmost_rightmost.append(configuration.get_left_child(stack_words[i],1)) fst_2nd_leftmost_rightmost.append(configuration.get_right_child(stack_words[i], 1)) fst_2nd_leftmost_rightmost.append(configuration.get_left_child(stack_words[i], 2)) fst_2nd_leftmost_rightmost.append(configuration.get_right_child(stack_words[i], 2)) fst_2nd_leftmost_rightmost.append( configuration.get_left_child(configuration.get_left_child(stack_words[i],1),1) ) fst_2nd_leftmost_rightmost.append( configuration.get_right_child(configuration.get_right_child(stack_words[i], 1),1) ) #fst_2nd_leftmost_leftmost_rightmost_rightmost.append( #configuration.get_left_child(configuration.get_left_child(stack_words[i],1),1) ) #fst_2nd_leftmost_leftmost_rightmost_rightmost.append( #configuration.get_right_child(configuration.get_right_child(stack_words[i], 1),1) ) final = stack_words + buffer_words + fst_2nd_leftmost_rightmost #+ fst_2nd_leftmost_leftmost_rightmost_rightmost pos_tags = [] for j in range(0,len(final)) : pos_tags.append(vocabulary.get_pos_id(configuration.get_pos(final[j]))) temp_final = final[6:18] arc_labels = [] for j in range(0,len(temp_final)) : arc_labels.append(vocabulary.get_label_id(configuration.get_label(temp_final[j]))) for i,x in enumerate(final) : final[i] = vocabulary.get_word_id(configuration.get_word(final[i])) features = final + pos_tags + arc_labels # TODO(Students) End assert len(features) == 48 return features
def __init__(self, max_words = 500): self.model = None self.vocab = Vocabulary() self.max_words = max_words
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start Sw = [] St = [] Sl = [] #s1,s2,s3,b1,b2,b3; for idx in [0, 1, 2]: Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_stack(idx)))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_stack(idx)))) for idx in [0, 1, 2]: Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_buffer(idx)))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_buffer(idx)))) #lc1(si),rc1(si),lc2(si),rc2(si),i= 1,2 for idx in [0, 1]: wrd = configuration.get_stack(idx) # Sw.append(configuration.get_left_child(wrd, 1)) Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_left_child(wrd, 1)))) # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1]))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_left_child(wrd, 1)))) Sl.append( vocabulary.get_label_id( configuration.get_label(configuration.get_left_child(wrd, 1)))) # Sw.append(configuration.get_right_child(wrd, 1)) Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_right_child(wrd, 1)))) # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1]))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_right_child(wrd, 1)))) Sl.append( vocabulary.get_label_id( configuration.get_label(configuration.get_right_child(wrd, 1)))) # Sw.append(configuration.get_left_child(wrd, 2)) Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_left_child(wrd, 2)))) # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1]))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_left_child(wrd, 2)))) Sl.append( vocabulary.get_label_id( configuration.get_label(configuration.get_left_child(wrd, 2)))) # Sw.append(configuration.get_right_child(wrd, 2)) Sw.append( vocabulary.get_word_id( configuration.get_word(configuration.get_right_child(wrd, 2)))) # St.append(vocabulary.get_pos_id(configuration.get_word(Sw[-1]))) St.append( vocabulary.get_pos_id( configuration.get_pos(configuration.get_right_child(wrd, 2)))) Sl.append( vocabulary.get_label_id( configuration.get_label(configuration.get_right_child(wrd, 2)))) #lc1(lc1(si)),rc1(rc1(si)),i= 1,2 for idx in [0, 1]: wrd = configuration.get_stack(idx) Sw.append( vocabulary.get_word_id( configuration.get_word( configuration.get_left_child( configuration.get_left_child(wrd, 1), 1)))) St.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_left_child( configuration.get_left_child(wrd, 1), 1)))) Sl.append( vocabulary.get_label_id( configuration.get_label( configuration.get_left_child( configuration.get_left_child(wrd, 1), 1)))) Sw.append( vocabulary.get_word_id( configuration.get_word( configuration.get_right_child( configuration.get_right_child(wrd, 1), 1)))) St.append( vocabulary.get_pos_id( configuration.get_pos( configuration.get_right_child( configuration.get_right_child(wrd, 1), 1)))) Sl.append( vocabulary.get_label_id( configuration.get_label( configuration.get_right_child( configuration.get_right_child(wrd, 1), 1)))) # print("***********************") # print(configuration.get_str()) # print(Sw) # print(St) # print(Sl) features = [] features.extend(Sw) features.extend(St) features.extend(Sl) assert len(features) == 48 # print(configuration.get_stack(0)) # print(configuration.get_word(configuration.get_stack(0))) # print(vocabulary.get_word_id("unreadable")) # print(configuration.get_label(configuration.get_stack(0))) # print("id", vocabulary.get_pos_id(configuration.get_stack(0))) # print("word", vocabulary.get_pos_id(configuration.get_word(configuration.get_stack(0)))) # print("word", vocabulary.get_label_id(configuration.get_word(configuration.get_stack(0)))) # print(f) return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start # TODO(Students) End features = [] #Step 1: Take top 3 from stack and buffer for i in range(3): features.append(configuration.get_stack(i)) for i in range(3): features.append(configuration.get_buffer(i)) #Step2: first and second leftmost/rightmost children of the top two words on the stack for i in range(2): #first leftmost children left1 = configuration.get_left_child(features[i], 1) right1 = configuration.get_right_child(features[i], 1) #second leftmost children left2 = configuration.get_left_child(features[i], 2) right2 = configuration.get_right_child(features[i], 2) #As part of Step 3 lc1_lc1_s_i = configuration.get_left_child(left1, 1) rc1_rc1_s_i = configuration.get_right_child(right1, 1) features.extend( [left1, right1, left2, right2, lc1_lc1_s_i, rc1_rc1_s_i]) #print(features) num_of_features = len(features) #Extracting POS of the words extracted for i in range((num_of_features)): features.append(configuration.get_pos(features[i])) #Extracting arc labels excluding the 6 words on the stack/buffer for i in range(6, 18): features.append(configuration.tree.get_label(features[i])) #Extracting ID's of the features in the form of S_w,S_t,S_l for i in range(18): features[i] = vocabulary.get_word_id( configuration.get_word(features[i])) for i in range(18, 36): features[i] = vocabulary.get_pos_id(features[i]) for i in range(36, 48): features[i] = vocabulary.get_label_id(features[i]) assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start features = [] # Store list of all features (initially only word features) pos_features = [] # Store list of pos tag features label_features = [] # Store list of label features top3Stack = [configuration.get_stack(i) for i in range(3)] # top 3 elements of stack top3Buffer = [configuration.get_buffer(i) for i in range(3)] # top 3 elements of buffer for token_index in top3Stack + top3Buffer: # Iterate over top 3 words in stack and top 3 words in buffer # Add word to the features features.append( vocabulary.get_word_id(configuration.get_word(token_index))) # Add pos tag of corresponding word to the pos_features pos_features.append( vocabulary.get_pos_id(configuration.get_pos(token_index))) for token_index in top3Stack[:2]: # Iterate over top 2 words in stack # Iterate over 1 and 2 to get 1st leftmost, 1st rightmost, 2nd leftmost and 2nd rightmost child # of corresponding word in stack. for i in range(1, 3): ith_left_child = configuration.get_left_child( token_index, i) # Get ith_leftmost_child of word in stack # Add child to the features features.append( vocabulary.get_word_id(configuration.get_word(ith_left_child))) # Add pos tag of corresponding child to the pos_features pos_features.append( vocabulary.get_pos_id(configuration.get_pos(ith_left_child))) # Add label of corresponding child to the label_features label_features.append( vocabulary.get_label_id( configuration.get_label(ith_left_child))) # Similarly for rightmost child add child word, pos tag and label to respective features list ith_right_child = configuration.get_right_child(token_index, i) features.append( vocabulary.get_word_id( configuration.get_word(ith_right_child))) pos_features.append( vocabulary.get_pos_id(configuration.get_pos(ith_right_child))) label_features.append( vocabulary.get_label_id( configuration.get_label(ith_right_child))) for token_index in top3Stack[:2]: # Iterate over top 2 words in stack # Get leftmost child of leftmost child of word in stack left_left_child = configuration.get_left_child( configuration.get_left_child(token_index, 1), 1) # Add the corresponding child word, pos tag and label to respective features list features.append( vocabulary.get_word_id(configuration.get_word(left_left_child))) pos_features.append( vocabulary.get_pos_id(configuration.get_pos(left_left_child))) label_features.append( vocabulary.get_label_id(configuration.get_label(left_left_child))) # Similarly for rightmost child of rightmost child add child word, pos tag and label to respective features list right_right_child = configuration.get_right_child( configuration.get_right_child(token_index, 1), 1) features.append( vocabulary.get_word_id(configuration.get_word(right_right_child))) pos_features.append( vocabulary.get_pos_id(configuration.get_pos(right_right_child))) label_features.append( vocabulary.get_label_id( configuration.get_label(right_right_child))) features += pos_features + label_features # Append the pos and label features to the word features # TODO(Students) End assert len(features) == 48 return features
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start words = [] posTags = [] labels = [] # Get the words and pos tags of the top 3 elements of the stack. for idx in range(2, -1, -1): stack = configuration.get_stack(idx) words.append(vocabulary.get_word_id(configuration.get_word(stack))) posTags.append(vocabulary.get_pos_id(configuration.get_pos(stack))) # Get the words and pos tags of the top 3 elements of the buffer. for idx in range(3): buffer = configuration.get_buffer(idx) words.append(vocabulary.get_word_id(configuration.get_word(buffer))) posTags.append(vocabulary.get_pos_id(configuration.get_pos(buffer))) # Get the words, labels, and pos tags of the first and second left child and right child of the top two elements # on the stack, and # Get the words, labels, and pos tags of the leftmost of the leftmost and rightmost of the rightmost child # of the top two elements on the stack for idx in range(2): stack = configuration.get_stack(idx) firstLeftChild = configuration.get_left_child(stack, 1) words.append( vocabulary.get_word_id(configuration.get_word(firstLeftChild))) labels.append( vocabulary.get_label_id(configuration.get_label(firstLeftChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(firstLeftChild))) firstRightChild = configuration.get_right_child(stack, 1) words.append( vocabulary.get_word_id(configuration.get_word(firstRightChild))) labels.append( vocabulary.get_label_id(configuration.get_label(firstRightChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(firstRightChild))) secondLeftChild = configuration.get_left_child(stack, 2) words.append( vocabulary.get_word_id(configuration.get_word(secondLeftChild))) labels.append( vocabulary.get_label_id(configuration.get_label(secondLeftChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(secondLeftChild))) secondRightChild = configuration.get_right_child(stack, 2) words.append( vocabulary.get_word_id(configuration.get_word(secondRightChild))) labels.append( vocabulary.get_label_id(configuration.get_label(secondRightChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(secondRightChild))) leftLeftChild = configuration.get_left_child( configuration.get_left_child(stack, 1), 1) words.append( vocabulary.get_word_id(configuration.get_word(leftLeftChild))) labels.append( vocabulary.get_label_id(configuration.get_label(leftLeftChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(leftLeftChild))) rightRightChild = configuration.get_right_child( configuration.get_right_child(stack, 1), 1) words.append( vocabulary.get_word_id(configuration.get_word(rightRightChild))) labels.append( vocabulary.get_label_id(configuration.get_label(rightRightChild))) posTags.append( vocabulary.get_pos_id(configuration.get_pos(rightRightChild))) features = [] features += words + posTags + labels # TODO(Students) End assert len(features) == 48 return features
# Setup Serialization dir save_serialization_dir = os.path.join("serialization_dirs", args.experiment_name) if not os.path.exists(save_serialization_dir): os.makedirs(save_serialization_dir) # Setup Training / Validation data print("Reading training data") train_sentences, train_trees = read_conll_data(args.train_data_file_path) print("Reading validation data") validation_sentences, validation_trees = read_conll_data( args.validation_data_file_path) vocabulary = Vocabulary(train_sentences, train_trees) sorted_labels = [ item[0] for item in sorted(vocabulary.label_token_to_id.items(), key=lambda e: e[1]) ] non_null_sorted_labels = sorted_labels[1:] parsing_system = ParsingSystem(non_null_sorted_labels) # Generating training instances takes ~20 minutes everytime. So once you finalize the # feature generation and want to try different configs for experiments, you can use caching. if args.use_cached_data: print("Loading cached training instances") cache_processed_data_path = args.train_data_file_path.replace( "conll", "jsonl")
def get_configuration_features(configuration: Configuration, vocabulary: Vocabulary) -> List[List[int]]: """ ================================================================= Implement feature extraction described in "A Fast and Accurate Dependency Parser using Neural Networks"(2014) ================================================================= """ # TODO(Students) Start # print(configuration.get_stack_size()) # print(configuration.get_buffer_size()) # print("****") words = [] pos_tags = [] arc_labels = [] for index in range(0, 3): stackIndex = configuration.get_stack(index) bufferIndex = configuration.get_buffer(index) #Pushing top 3 words from stack n buffer to features_list : s1; s2; s3; b1; b2; b3; words.append(vocabulary.get_word_id( configuration.get_word(stackIndex))) words.append( vocabulary.get_word_id(configuration.get_word(bufferIndex))) pos_tags.append( vocabulary.get_pos_id(configuration.get_pos(stackIndex))) pos_tags.append( vocabulary.get_pos_id(configuration.get_pos(bufferIndex))) for index in range(0, 2): stackIndex = configuration.get_stack(index) #Getting indexes of the first and second leftmost / rightmost children of the top two words on the stack: lc1(si); rc1(si); lc2(si); rc2(si), i = 1; 2. child1 = [ configuration.get_left_child(stackIndex, 1), configuration.get_right_child(stackIndex, 1), configuration.get_left_child(stackIndex, 2), configuration.get_right_child(stackIndex, 2) ] #Getting indexes of leftmost of leftmost / rightmost of rightmost children of the top two words on the stack: lc1(lc1(si)); rc1(rc1(si)), i = 1; 2. child2 = [ configuration.get_left_child( configuration.get_left_child(stackIndex, 1), 1), configuration.get_right_child( configuration.get_right_child(stackIndex, 1), 1) ] #Merging child1 and child2 to single children list. children = child1 + child2 #Pushing children to features_list: for childIndex in children: words.append( vocabulary.get_word_id(configuration.get_word(childIndex))) pos_tags.append( vocabulary.get_pos_id(configuration.get_pos(childIndex))) arc_labels.append( vocabulary.get_label_id(configuration.get_label(childIndex))) features = words + pos_tags + arc_labels #*************************************************# # ##pushing top 3 words from stack # words.append(configuration.get_stack(0)) # words.extend([configuration.get_stack(1),configuration.get_stack(2)]) # ##pushing top 3 words from buffer: # words.extend([configuration.get_buffer(0),configuration.get_buffer(1),configuration.get_buffer(2)]) # ##pushing The first and second leftmost / rightmost children of the top two words on the stack: # words.extend([configuration.get_left_child(0,1),configuration.get_right_child(0,1),configuration.get_left_child(0,2),configuration.get_right_child(0,2)]) # words.extend([configuration.get_left_child(1,1),configuration.get_right_child(1,1),configuration.get_left_child(1,2),configuration.get_right_child(1,2)]) # # ##pushing The leftmost of leftmost / rightmost of rightmost children of the top two words on the stack: # words.extend([configuration.get_left_child(configuration.get_left_child(0,1),1),configuration.get_right_child(configuration.get_right_child(0,1),1)]) # words.extend([configuration.get_left_child(configuration.get_left_child(1,1),1),configuration.get_right_child(configuration.get_right_child(1,1),1)]) #**************************************************# # TODO(Students) End assert len(features) == 48 return features