def gigaword_generator(dataset_name, dataset_split): article_path = os.path.join(kaiqiang_data_dir, dataset_name, dataset_split + '.Ndocument') abstract_path = os.path.join(kaiqiang_data_dir, dataset_name, dataset_split + '.Nsummary') giga_article_lines = [line.strip() for line in open(article_path).readlines()] giga_abstract_lines = [line.strip() for line in open(abstract_path).readlines()] if len(giga_article_lines) != len(giga_abstract_lines): util.print_vars(giga_article_lines, giga_abstract_lines) raise Exception('len(article_lines) != len(abstract_lines)') for article_idx in range(len(giga_abstract_lines)): article_line = giga_article_lines[article_idx] abstract_line = giga_abstract_lines[article_idx] article = '' doc_indices = '' raw_article_sents = [] orig_sent = article_line tokenized_sent = util.process_sent(orig_sent) # if is_quote(tokenized_sent): # continue sent = ' '.join(tokenized_sent) article += sent + ' ' doc_indices_for_tokens = [0] * len(tokenized_sent) doc_indices_str = ' '.join(str(x) for x in doc_indices_for_tokens) doc_indices += doc_indices_str + ' ' raw_article_sents.append(orig_sent) article = article.strip() abstracts_unprocessed = [[abstract_line]] abstracts = [] for abstract_lines in abstracts_unprocessed: abstract = process_abstract(abstract_lines) abstracts.append(abstract) # yield article, abstracts, doc_indices, raw_article_sents example = make_example(article, abstracts, doc_indices, raw_article_sents, None) yield example
def __init__(self, is_testing): super().__init__() self.is_testing = is_testing with tf.Graph().as_default(), tf.device('/cpu:0'): regularizer = layers.l2_regularizer(1e-4) self.name = "%s %s" % (self.revision, self.message) self.train, self.valid, self.test = self.encode_data(sudoku()) print("Building graph...") self.session = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) self.global_step = tf.Variable(initial_value=0, trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=2e-4) self.mode = tf.placeholder(tf.string) edges = self.sudoku_edges() edges = [(i + (b * 81), j + (b * 81)) for b in range(self.batch_size) for i, j in edges] ridx = [edges.index((j, i)) for i, j in edges] edge_indices = tf.constant(edges, tf.int32) n_edges = tf.shape(edge_indices)[0] positions = tf.constant([[(i, j) for i in range(9) for j in range(9)] for b in range(self.batch_size)], tf.int32) # (bs, 81, 2) rows = layers.embed_sequence(positions[:, :, 0], 9, self.emb_size, scope='row-embeddings', unique=True) # bs, 81, emb_size cols = layers.embed_sequence(positions[:, :, 1], 9, self.emb_size, scope='cols-embeddings', unique=True) # bs, 81, emb_size def avg_n(x): return tf.reduce_mean(tf.stack(x, axis=0), axis=0) towers = [] with tf.variable_scope(tf.get_variable_scope()): for device_nr, device in enumerate(self.devices): with tf.device('/cpu:0'): if self.is_testing: (quizzes, answers ), edge_keep_prob = self.test.get_next(), 1.0 else: (quizzes, answers), edge_keep_prob = tf.cond( tf.equal(self.mode, "train"), true_fn=lambda: (self.train.get_next(), self.edge_keep_prob), false_fn=lambda: (self.valid.get_next(), 1.0)) x = layers.embed_sequence( quizzes, 10, self.emb_size, scope='nr-embeddings', unique=True) # bs, 81, emb_size x = tf.concat([x, rows, cols], axis=2) x = tf.reshape(x, (-1, 3 * self.emb_size)) with tf.device(device), tf.name_scope("device-%s" % device_nr): def mlp(x, scope, n_out): with tf.variable_scope(scope): for i in range(3): x = layers.fully_connected( x, n_out, weights_regularizer=regularizer) return layers.fully_connected( x, n_out, weights_regularizer=regularizer, activation_fn=None) x = mlp(x, 'C1', self.n_hidden) dependents = tf.zeros((n_edges, 10)) outputs = [] log_losses = [] with tf.variable_scope('steps'): for step in range(self.n_steps): # M_F = c2(c1(x, p), c1(x, N_F\p), d_pF) # d_pF = sum_{q \in N_F\p} (M_F) # p(y_p|x) = softmax(sum(M_F)) logits, messages = message_passing( x, edge_indices, dependents, lambda x: mlp(x, 'C2', 10)) dependents = tf.gather( logits, edge_indices[:, 0]) - tf.gather( messages, ridx) out = tf.reshape(logits, (-1, 81, 10)) outputs.append(out) log_losses.append( tf.reduce_mean( tf.nn. sparse_softmax_cross_entropy_with_logits( labels=answers, logits=out))) tf.get_variable_scope().reuse_variables() reg_loss = sum( tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) loss = log_losses[-1] + reg_loss towers.append({ 'loss': loss, 'grads': [(tf.clip_by_value(g, -10.0, 10.0), v) for g, v in self.optimizer.compute_gradients(loss) ], 'log_losses': tf.stack(log_losses), # (n_steps, 1) 'quizzes': quizzes, # (bs, 81, 10) 'answers': answers, # (bs, 81, 10) 'outputs': tf.stack(outputs) # n_steps, bs, 81, 10 }) tf.get_variable_scope().reuse_variables() self.loss = avg_n([t['loss'] for t in towers]) self.out = tf.concat([t['outputs'] for t in towers], axis=1) # n_steps, bs, 81, 10 self.predicted = tf.cast(tf.argmax(self.out, axis=3), tf.int32) self.answers = tf.concat([t['answers'] for t in towers], axis=0) self.quizzes = tf.concat([t['quizzes'] for t in towers], axis=0) tf.summary.scalar('losses/total', self.loss) tf.summary.scalar('losses/reg', reg_loss) log_losses = avg_n([t['log_losses'] for t in towers]) for step in range(self.n_steps): equal = tf.equal(self.answers, self.predicted[step]) digit_acc = tf.reduce_mean(tf.to_float(equal)) tf.summary.scalar('steps/%d/digit-acc' % step, digit_acc) puzzle_acc = tf.reduce_mean( tf.to_float(tf.reduce_all(equal, axis=1))) tf.summary.scalar('steps/%d/puzzle-acc' % step, puzzle_acc) tf.summary.scalar('steps/%d/losses/log' % step, log_losses[step]) avg_gradients = util.average_gradients( [t['grads'] for t in towers]) self.train_step = self.optimizer.apply_gradients( avg_gradients, global_step=self.global_step) self.session.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() util.print_vars(tf.trainable_variables()) self.train_writer = tf.summary.FileWriter( self.tensorboard_dir + '/sudoku/%s/train/%s' % (self.revision, self.name), self.session.graph) self.test_writer = tf.summary.FileWriter( self.tensorboard_dir + '/sudoku/%s/test/%s' % (self.revision, self.name), self.session.graph) self.summaries = tf.summary.merge_all()
"""Question: https://leetcode.com/problems/delete-node-in-a-linked-list/ """ from datastruct import ListNode from util import print_vars class Solution: def deleteNode(self, node: ListNode) -> None: """ :type node: ListNode :rtype: void Do not return anything, modify node in-place instead. """ node.val = node.next.val node.next = node.next.next if __name__ == '__main__': head = ListNode.from_list([4, 5, 1, 9]) node = head.next print_vars(head, node) print('delete node: ', node) Solution().deleteNode(node) print_vars(head, node)
def main(unused_argv): print('Running statistics on %s' % exp_name) if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) if FLAGS.singles_and_pairs == 'both': in_dataset = FLAGS.dataset_name out_dataset = FLAGS.dataset_name + '_both' else: in_dataset = FLAGS.dataset_name + '_singles' out_dataset = FLAGS.dataset_name + '_singles' if FLAGS.lr: out_dataset = FLAGS.dataset_name + '_lr' start_time = time.time() np.random.seed(random_seed) source_dir = os.path.join(data_dir, in_dataset) ex_sents = ['single .', 'sentence .'] article_text = ' '.join(ex_sents) sent_term_matrix = util.get_doc_substituted_tfidf_matrix( tfidf_vectorizer, ex_sents, article_text, pca) if FLAGS.singles_and_pairs == 'pairs': single_feat_len = 0 else: single_feat_len = len( get_single_sent_features(0, sent_term_matrix, [['single', '.'], ['sentence', '.']], [0, 0], 0)) if FLAGS.singles_and_pairs == 'singles': pair_feat_len = 0 else: pair_feat_len = len( get_pair_sent_features([0, 1], sent_term_matrix, [['single', '.'], ['sentence', '.']], [0, 0], [0, 0])) util.print_vars(single_feat_len, pair_feat_len) util.create_dirs(temp_dir) if FLAGS.dataset_split == 'all': dataset_splits = ['test', 'val', 'train'] elif FLAGS.dataset_split == 'train_val': dataset_splits = ['val', 'train'] else: dataset_splits = [FLAGS.dataset_split] for split in dataset_splits: source_files = sorted(glob.glob(source_dir + '/' + split + '*')) out_path = os.path.join(out_dir, out_dataset, split) if FLAGS.pca: out_path += '_pca' util.create_dirs(os.path.join(out_path)) total = len(source_files) * 1000 if ( 'cnn' in in_dataset or 'newsroom' in in_dataset or 'xsum' in in_dataset) else len(source_files) example_generator = data.example_generator(source_dir + '/' + split + '*', True, False, should_check_valid=False) # for example in tqdm(example_generator, total=total): ex_gen = example_generator_extended(example_generator, total, single_feat_len, pair_feat_len, FLAGS.singles_and_pairs, out_path) print('Creating list') ex_list = [ex for ex in ex_gen] if FLAGS.num_instances != -1: ex_list = ex_list[:FLAGS.num_instances] print('Converting...') # all_features = pool.map(convert_article_to_lambdamart_features, ex_list) # all_features = ray.get([convert_article_to_lambdamart_features.remote(ex) for ex in ex_list]) if FLAGS.lr: all_instances = list( futures.map(convert_article_to_lambdamart_features, ex_list)) all_instances = util.flatten_list_of_lists(all_instances) x = [inst.features for inst in all_instances] x = np.array(x) y = [inst.relevance for inst in all_instances] y = np.expand_dims(np.array(y), 1) x_y = np.concatenate((x, y), 1) np.save(writer, x_y) else: list(futures.map(convert_article_to_lambdamart_features, ex_list)) # writer.write(''.join(all_features)) # all_features = [] # for example in tqdm(ex_gen, total=total): # all_features.append(convert_article_to_lambdamart_features(example)) # all_features = util.flatten_list_of_lists(all_features) # num1 = sum(x == 1 for x in all_features) # num2 = sum(x == 2 for x in all_features) # print 'Single sent: %d instances. Pair sent: %d instances.' % (num1, num2) # for example in tqdm(ex_gen, total=total): # features = convert_article_to_lambdamart_features(example) # writer.write(features) final_out_path = out_path + '.txt' file_names = sorted(glob.glob(os.path.join(out_path, '*'))) writer = open(final_out_path, 'wb') for file_name in tqdm(file_names): with open(file_name) as f: text = f.read() writer.write(text) writer.close() util.print_execution_time(start_time)
def __init__(self, is_testing): super().__init__() self.is_testing = is_testing print("Preparing data...") self.train, self.valid, self.test, self.vocab = self.encode_data( bAbI('en-valid-10k')) print("Creating graph...") with tf.Graph().as_default(), tf.device('/cpu:0'): regularizer = layers.l2_regularizer(1e-4) self.session = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) self.global_step = tf.Variable(initial_value=0, trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=2e-4) self.facts_ph = tf.placeholder(tf.int32, shape=(None, None)) # (bs*#facts, seq) self.facts_pos_ph = tf.placeholder(tf.int32, shape=(None, )) # (bs*#facts, ) self.question_ph = tf.placeholder(tf.int32, shape=(None, None)) # (bs, seq) self.answers_ph = tf.placeholder(tf.int32, shape=(None, )) # (bs, ) self.edge_indices_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.fact_segments_ph = tf.placeholder(tf.int32, shape=(None, )) self.edge_segments_ph = tf.placeholder(tf.int32, shape=(None, )) self.q_seq_length_ph = tf.placeholder(tf.int32, shape=(None, )) self.f_seq_length_ph = tf.placeholder(tf.int32, shape=(None, )) self.task_indices_ph = tf.placeholder(tf.int32, shape=(None, )) self.edge_keep_prob_ph = tf.placeholder(tf.float32, shape=()) self.is_training_ph = tf.placeholder(tf.bool) placeholders = [ self.facts_ph, self.facts_pos_ph, self.question_ph, self.answers_ph, self.edge_indices_ph, self.fact_segments_ph, self.edge_segments_ph, self.q_seq_length_ph, self.f_seq_length_ph, self.task_indices_ph, self.edge_keep_prob_ph ] self.train_queue = tf.FIFOQueue(self.qsize, [ph.dtype for ph in placeholders], name='train-queue') self.val_queue = tf.FIFOQueue(self.qsize, [ph.dtype for ph in placeholders], name='val-queue') self.train_enqueue_op = self.train_queue.enqueue(placeholders) self.train_qsize_op = self.train_queue.size() tf.summary.scalar('queues/train', self.train_qsize_op) self.val_enqueue_op = self.val_queue.enqueue(placeholders) self.val_qsize_op = self.val_queue.size() tf.summary.scalar('queues/val', self.val_qsize_op) def avg_n(x): return tf.reduce_mean(tf.stack(x, axis=0), axis=0) towers = [] with tf.variable_scope(tf.get_variable_scope()): for device_nr, device in enumerate(self.devices): with tf.device('/cpu:0'): if self.is_testing: facts_ph, facts_pos_ph, question_ph, answers_ph, edge_indices_ph, fact_segments_ph, edge_segments_ph, q_seq_length_ph, f_seq_length_ph, task_indices_ph, edge_keep_prob = placeholders else: facts_ph, facts_pos_ph, question_ph, answers_ph, edge_indices_ph, fact_segments_ph, edge_segments_ph, q_seq_length_ph, f_seq_length_ph, task_indices_ph, edge_keep_prob = tf.cond( self.is_training_ph, true_fn=lambda: self.train_queue.dequeue(), false_fn=lambda: self.val_queue.dequeue(), ) vars = (facts_ph, facts_pos_ph, question_ph, answers_ph, edge_indices_ph, fact_segments_ph, edge_segments_ph, q_seq_length_ph, f_seq_length_ph, task_indices_ph, edge_keep_prob) for v, ph in zip(vars, placeholders): v.set_shape(ph.get_shape()) facts_emb = layers.embed_sequence( facts_ph, self.vocab.size(), self.emb_size, scope='word-embeddings') questions_emb = layers.embed_sequence( question_ph, self.vocab.size(), self.emb_size, scope='word-embeddings', reuse=True) with tf.device(device), tf.name_scope("device-%s" % device_nr): def mlp(x, scope, n_hidden): with tf.variable_scope(scope): for i in range(3): x = layers.fully_connected( x, n_hidden, weights_regularizer=regularizer) return layers.fully_connected( x, n_hidden, weights_regularizer=regularizer, activation_fn=None) _, (_, f_encoding) = tf.nn.dynamic_rnn( tf.nn.rnn_cell.LSTMCell(32), facts_emb, dtype=tf.float32, sequence_length=f_seq_length_ph, scope='fact-encoder') random_pos_offsets = tf.random_uniform( tf.shape(answers_ph), minval=0, maxval=self.num_facts, dtype=tf.int32) fact_pos = facts_pos_ph + tf.gather( random_pos_offsets, fact_segments_ph) facts_pos_encoding = tf.one_hot( fact_pos, 2 * self.num_facts) f_encoding = tf.concat( [f_encoding, facts_pos_encoding], axis=1) _, (_, q_encoding) = tf.nn.dynamic_rnn( tf.nn.rnn_cell.LSTMCell(32), questions_emb, dtype=tf.float32, sequence_length=q_seq_length_ph, scope='question-encoder') def graph_fn(x): with tf.variable_scope('graph-fn'): x = layers.fully_connected( x, self.n_hidden, weights_regularizer=regularizer) x = layers.fully_connected( x, self.n_hidden, weights_regularizer=regularizer) return layers.fully_connected( x, self.vocab.size(), activation_fn=None, weights_regularizer=regularizer) x = tf.concat([ f_encoding, tf.gather(q_encoding, fact_segments_ph) ], 1) x0 = mlp(x, 'pre', self.n_hidden) edge_features = tf.gather(q_encoding, edge_segments_ph) x = x0 outputs = [] log_losses = [] with tf.variable_scope('steps'): lstm_cell = LSTMCell(self.n_hidden) state = lstm_cell.zero_state( tf.shape(x)[0], tf.float32) for step in range(self.n_steps): x = message_passing( x, edge_indices_ph, edge_features, lambda x: mlp(x, 'message-fn', self. n_hidden), edge_keep_prob) x = mlp(tf.concat([x, x0], axis=1), 'post-fn', self.n_hidden) x, state = lstm_cell(x, state) with tf.variable_scope('graph-sum'): graph_sum = tf.segment_sum( x, fact_segments_ph) out = graph_fn(graph_sum) outputs.append(out) log_losses.append( tf.reduce_mean( tf.nn. sparse_softmax_cross_entropy_with_logits( labels=answers_ph, logits=out))) tf.get_variable_scope().reuse_variables() reg_loss = sum( tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) loss = avg_n(log_losses) + reg_loss towers.append({ 'loss': loss, 'grads': self.optimizer.compute_gradients(loss), 'log_losses': tf.stack(log_losses), # (n_steps, 1) 'answers': answers_ph, # (batch_size, n_outputs) 'outputs': tf.stack( outputs), # (n_steps, batch_size, n_outputs) 'task_indices': task_indices_ph # (batch_size, n_outputs }) tf.get_variable_scope().reuse_variables() self.loss = avg_n([t['loss'] for t in towers]) self.out = tf.concat([t['outputs'] for t in towers], axis=1) self.answers = tf.concat([t['answers'] for t in towers], axis=0) self.task_indices = tf.concat([t['task_indices'] for t in towers], axis=0) tf.summary.scalar('losses/total', self.loss) tf.summary.scalar('losses/reg', reg_loss) log_losses = avg_n([t['log_losses'] for t in towers]) for i in range(self.n_steps): tf.summary.scalar('steps/%d/losses/log' % i, log_losses[i]) avg_gradients = util.average_gradients( [t['grads'] for t in towers]) self.train_step = self.optimizer.apply_gradients( avg_gradients, global_step=self.global_step) self.session.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() util.print_vars(tf.trainable_variables()) self.train_writer = tf.summary.FileWriter( '/tmp/tensorboard/bAbI/%s/train/%s' % (self.revision, self.name), self.session.graph) self.test_writer = tf.summary.FileWriter( '/tmp/tensorboard/bAbI/%s/test/%s' % (self.revision, self.name), self.session.graph) self.summaries = tf.summary.merge_all() print("Starting data loaders...") train_mp_queue = mp.Manager().Queue(maxsize=self.qsize) val_mp_queue = mp.Manager().Queue(maxsize=self.qsize) data_loader_processes = [ mp.Process(target=self.data_loader, args=(train_mp_queue, True)) for i in range(4) ] val_data_loader_processes = [ mp.Process(target=self.data_loader, args=(val_mp_queue, False)) for i in range(1) ] for p in data_loader_processes + val_data_loader_processes: p.daemon = True p.start() queue_putter_threads = [ threading.Thread(target=self.queue_putter, args=(train_mp_queue, self.train_enqueue_op, 'train', 1000)), threading.Thread(target=self.queue_putter, args=(val_mp_queue, self.val_enqueue_op, 'val', 1)), ] for t in queue_putter_threads: t.daemon = True t.start() train_qsize, val_qsize = 0, 0 print("Waiting for queue to fill...") while train_qsize < self.qsize or val_qsize < self.qsize: train_qsize = self.session.run(self.train_qsize_op) val_qsize = self.session.run(self.val_qsize_op) print('train_qsize: %d, val_qsize: %d' % (train_qsize, val_qsize), flush=True) time.sleep(1)
def __init__(self, is_testing): super().__init__() self.is_testing = is_testing print("Preparing data...") # Load and encode data (Disk -> Memory), see more details in encode_data() # Also see data_loader(), the next processing stage. self.train, self.valid, self.test, self.vocab = self.encode_data(bAbI('en-valid-10k')) print("Creating graph...") with tf.Graph().as_default(), tf.device('/cpu:0'): regularizer = layers.l2_regularizer(1e-4) # regularizer applied to fully-connected network # allow_soft_placement=True: if cannot find specific device, allow tf to choose the device self.session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) self.global_step = tf.Variable(initial_value=0, trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=2e-4) self.facts_ph = tf.placeholder(tf.int32, shape=(None, None)) # (bs*#facts, seq) self.facts_pos_ph = tf.placeholder(tf.int32, shape=(None,)) # (bs*#facts, ) self.question_ph = tf.placeholder(tf.int32, shape=(None, None)) # (bs, seq) self.answers_ph = tf.placeholder(tf.int32, shape=(None,)) # (bs, ) self.edge_indices_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.fact_segments_ph = tf.placeholder(tf.int32, shape=(None,)) self.edge_segments_ph = tf.placeholder(tf.int32, shape=(None,)) self.q_seq_length_ph = tf.placeholder(tf.int32, shape=(None,)) self.f_seq_length_ph = tf.placeholder(tf.int32, shape=(None,)) self.task_indices_ph = tf.placeholder(tf.int32, shape=(None,)) self.edge_keep_prob_ph = tf.placeholder(tf.float32, shape=()) self.is_training_ph = tf.placeholder(tf.bool) # device: CPU:0 placeholders = [self.facts_ph, self.facts_pos_ph, self.question_ph, self.answers_ph, self.edge_indices_ph, self.fact_segments_ph, self.edge_segments_ph, self.q_seq_length_ph, self.f_seq_length_ph, self.task_indices_ph, self.edge_keep_prob_ph] # each element of train_queue is a training batch self.train_queue = tf.FIFOQueue(self.qsize, [ph.dtype for ph in placeholders], name='train-queue') # each element of train_queue is a validation batch self.val_queue = tf.FIFOQueue(self.qsize, [ph.dtype for ph in placeholders], name='val-queue') self.train_enqueue_op = self.train_queue.enqueue(placeholders) self.train_qsize_op = self.train_queue.size() # record the size of the train_queue every batch tf.summary.scalar('queues/train', self.train_qsize_op) self.val_enqueue_op = self.val_queue.enqueue(placeholders) self.val_qsize_op = self.val_queue.size() # record the size of the val_queue every batch tf.summary.scalar('queues/val', self.val_qsize_op) def avg_n(x): return tf.reduce_mean(tf.stack(x, axis=0), axis=0) towers = [] with tf.variable_scope(tf.get_variable_scope()): for device_nr, device in enumerate(self.devices): with tf.device('/cpu:0'): if self.is_testing: facts_ph, facts_pos_ph, question_ph, answers_ph, edge_indices_ph, fact_segments_ph, edge_segments_ph, q_seq_length_ph, f_seq_length_ph, task_indices_ph, edge_keep_prob = placeholders else: facts_ph, facts_pos_ph, question_ph, answers_ph, edge_indices_ph, fact_segments_ph, edge_segments_ph, q_seq_length_ph, f_seq_length_ph, task_indices_ph, edge_keep_prob = tf.cond( self.is_training_ph, true_fn=lambda: self.train_queue.dequeue(), false_fn=lambda: self.val_queue.dequeue(), ) # device: CPU:0, CPU:0, CPU:0 (In a 3 GPU machine, these placeholders are in triplicate.) vars = (facts_ph, facts_pos_ph, question_ph, answers_ph, edge_indices_ph, fact_segments_ph, edge_segments_ph, q_seq_length_ph, f_seq_length_ph, task_indices_ph, edge_keep_prob) for v, ph in zip(vars, placeholders): v.set_shape(ph.get_shape()) # device: CPU:0, CPU:0, CPU:0 facts_emb = layers.embed_sequence(facts_ph, self.vocab.size(), self.emb_size, scope='word-embeddings') # device: CPU:0, CPU:0, CPU:0 questions_emb = layers.embed_sequence(question_ph, self.vocab.size(), self.emb_size, scope='word-embeddings', reuse=True) with tf.device(device), tf.name_scope("device-%s" % device_nr): # 4 layers FC def mlp(x, scope, n_hidden): with tf.variable_scope(scope): for i in range(3): x = layers.fully_connected(x, n_hidden, weights_regularizer=regularizer) return layers.fully_connected(x, n_hidden, weights_regularizer=regularizer, activation_fn=None) # get the final hidden state for the sentences(facts), f_encoding shape: (bs*#facts, state_size) _, (_, f_encoding) = tf.nn.dynamic_rnn(tf.nn.rnn_cell.LSTMCell(32), facts_emb, dtype=tf.float32, sequence_length=f_seq_length_ph, scope='fact-encoder') # shape:(bs, ) (the same as answers_ph), elements inside the vector range from 0 to 20 randomly # and subjects to the normal distribution random_pos_offsets = tf.random_uniform(tf.shape(answers_ph), minval=0, maxval=self.num_facts, dtype=tf.int32) # Generate random offset. Note that for a specific task, the offset is the same. fact_pos = facts_pos_ph + tf.gather(random_pos_offsets, fact_segments_ph) # Considering the offset, the depth for the positional one-hot encoding should be 2*num_facts facts_pos_encoding = tf.one_hot(fact_pos, 2 * self.num_facts) # concatenate the encoding of content and position; device: GPU:0, GPU:1, GPU:2 f_encoding = tf.concat([f_encoding, facts_pos_encoding], axis=1) # Need not to encode position for questions, just get the features of their content # q_encoding shape: (bs, state_size); device: GPU:0, GPU:1, GPU: 2 _, (_, q_encoding) = tf.nn.dynamic_rnn(tf.nn.rnn_cell.LSTMCell(32), questions_emb, dtype=tf.float32, sequence_length=q_seq_length_ph, scope='question-encoder') # MLP of 3 layers FC, used to process the output of a graph # num output of last layer is vocab.size(), so as to get the logits def graph_fn(x): with tf.variable_scope('graph-fn'): x = layers.fully_connected(x, self.n_hidden, weights_regularizer=regularizer) x = layers.fully_connected(x, self.n_hidden, weights_regularizer=regularizer) return layers.fully_connected(x, self.vocab.size(), activation_fn=None, weights_regularizer=regularizer) # concatenate the fact_encoding and the question_encoding x = tf.concat([f_encoding, tf.gather(q_encoding, fact_segments_ph)], 1) # x0 represents "fact embedding given the question" # (by concatenate the question embedding with them) # device: GPU:0, GPU:1, GPU:2 x0 = mlp(x, 'pre', self.n_hidden) # generate the question encoding for every edge # edge_features shape: (bs*(#facts**2), LSTM state_size) edge_features = tf.gather(q_encoding, edge_segments_ph) x = x0 outputs = [] log_losses = [] with tf.variable_scope('steps'): lstm_cell = LSTMCell(self.n_hidden) state = lstm_cell.zero_state(tf.shape(x)[0], tf.float32) for step in range(self.n_steps): x = message_passing(x, edge_indices_ph, edge_features, lambda x: mlp(x, 'message-fn', self.n_hidden), edge_keep_prob) x = mlp(tf.concat([x, x0], axis=1), 'post-fn', self.n_hidden) # x=hidden state, state=<cell state, hidden state> # device: (GPU:0)*5, (GPU:1)*5, (GPU:2)*5 (5 is the time step) x, state = lstm_cell(x, state) with tf.variable_scope('graph-sum'): # In every step, get the sum of output vectors of Nodes for every task(Graph) # i.e. graph_sum shape: (bs, n_hidden) graph_sum = tf.segment_sum(x, fact_segments_ph) out = graph_fn(graph_sum) # shape: (bs, vocab_size) outputs.append(out) # softmax loss, scalar Tensor log_loss=tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=answers_ph, logits=out)) # log_losses is a list of scalar Tensor, each one means the loss in a time step log_losses.append(log_loss) # reuse the Variables in LSTM across different time step tf.get_variable_scope().reuse_variables() # scalr Tensor, the sum of all regularization term loss reg_loss = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # avg_n(log_losses) gets the mean loss for every step, i.e. "loss" is a scalar Tensor # device: GPU:0, GPU:1, GPU:2 loss = avg_n(log_losses) + reg_loss # device: GPU:0, GPU:1, GPU:2 stat={ 'loss': loss, # scalar Tensor 'grads': self.optimizer.compute_gradients(loss), 'log_losses': tf.stack(log_losses), # (n_steps, ) 'answers': answers_ph, # (batch_size, ) 'outputs': tf.stack(outputs), # (n_steps, batch_size, vocab_size) 'task_indices': task_indices_ph # (batch_size, ) } towers.append(stat) print('line 159: ') print('"' + tf.get_variable_scope().name + '"') # reuse the Variables in embedding, encoder, and some MLPs across different device tf.get_variable_scope().reuse_variables() # device of the following 4 vars is CPU:0 self.loss = avg_n([t['loss'] for t in towers]) self.out = tf.concat([t['outputs'] for t in towers], axis=1) self.answers = tf.concat([t['answers'] for t in towers], axis=0) self.task_indices = tf.concat([t['task_indices'] for t in towers], axis=0) tf.summary.scalar('losses/total', self.loss) tf.summary.scalar('losses/reg', reg_loss) log_losses = avg_n([t['log_losses'] for t in towers]) for i in range(self.n_steps): tf.summary.scalar('steps/%d/losses/log' % i, log_losses[i]) avg_gradients = util.average_gradients([t['grads'] for t in towers]) # global_step increases by 1 after the gradient is updated self.train_step = self.optimizer.apply_gradients(avg_gradients, global_step=self.global_step) self.session.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() util.print_vars(tf.trainable_variables()) self.train_writer = tf.summary.FileWriter('/tmp/tensorboard/bAbI/%s/train/%s' % (self.revision, self.name), self.session.graph) self.test_writer = tf.summary.FileWriter('/tmp/tensorboard/bAbI/%s/test/%s' % (self.revision, self.name), self.session.graph) self.summaries = tf.summary.merge_all() print("Starting data loaders...") train_mp_queue = mp.Manager().Queue(maxsize=self.qsize) val_mp_queue = mp.Manager().Queue(maxsize=self.qsize) # After loaded data from disk(done in the code `self.encode_data(bAbI('en-valid-10k'))`), # use 4+1=5 Processes to construct batches and encode them, then enqueue them onto corresponding queue. # see more details in random_batch() and encode_batch() data_loader_processes = [mp.Process(target=self.data_loader, args=(train_mp_queue, True)) for i in range(4)] val_data_loader_processes = [mp.Process(target=self.data_loader, args=(val_mp_queue, False)) for i in range(1)] # start the processes for p in data_loader_processes + val_data_loader_processes: p.daemon = True p.start() # Use 2 threads to transfer data from train_mp_queue(val_mp_queue) to train_queue(val_queue). # Note that batch in train_mp_queue is ndarray of numpy, # and these two thread change every batch into Tensors and enqueue it onto train_queue. # see the placeholders defined before for the format of each batch. queue_putter_threads = [ threading.Thread(target=self.queue_putter, args=(train_mp_queue, self.train_enqueue_op, 'train', 1000)), threading.Thread(target=self.queue_putter, args=(val_mp_queue, self.val_enqueue_op, 'val', 1)), ] # start data transferring for t in queue_putter_threads: t.daemon = True t.start() train_qsize, val_qsize = 0, 0 print("Waiting for queue to fill...") while train_qsize < self.qsize or val_qsize < self.qsize: # update the size of the queues of training and validation train_qsize = self.session.run(self.train_qsize_op) val_qsize = self.session.run(self.val_qsize_op) print('train_qsize: %d, val_qsize: %d' % (train_qsize, val_qsize), flush=True) time.sleep(1)
"""Question: https://leetcode.com/problems/middle-of-the-linked-list/ """ from datastruct import ListNode from util import print_vars class Solution: def middleNode(self, head: ListNode) -> ListNode: slow = fast = head while fast and fast.next: slow = slow.next fast = fast.next.next return slow if __name__ == '__main__': head = ListNode.from_list([1, 2, 3, 4, 5]) output = Solution().middleNode(head) print_vars(head, output) head = ListNode.from_list([1, 2, 3, 4, 5, 6]) output = Solution().middleNode(head) print_vars(head, output)
def my_pow_binary_bit(self, x: float, n: int) -> float: if x == 0.0: return 0.0 if n < 0: x, n = 1 / x, -n res = 1 while n: if n & 1: res *= x x *= x n >>= 1 return res def myPow(self, x: float, n: int) -> float: return x**n if __name__ == '__main__': x, n = 2.0, 10 output = Solution().myPow(x, n) print_vars(x, n, output) x, n = 2.0, 10 output = Solution().my_pow_binary(x, n) print_vars(x, n, output) x, n = 2.0, 10 output = Solution().my_pow_binary_bit(x, n) print_vars(x, n, output)
def __init__(self): self.train = train = TabSeparated('tasks/parsing/data/%s/train.tsv' % self.type, self.output_length) self.train_iterator = self.iterator(train) valid = TabSeparated('tasks/parsing/data/%s/valid.tsv' % self.type, self.output_length) self.valid_iterator = self.iterator(valid) parser = {'amounts': AmountParser(self.batch_size), 'dates': DateParser(self.batch_size)}[self.type] print("Building graph...") config = tf.ConfigProto(allow_soft_placement=False) self.session = tf.Session(config=config) self.is_training_ph = tf.placeholder(tf.bool) source, self.targets = tf.cond( self.is_training_ph, true_fn=lambda: self.train_iterator.get_next(), false_fn=lambda: self.valid_iterator.get_next() ) self.sources = source oh_inputs = tf.one_hot(source, train.n_output) # (bs, seq, n_out) context = tf.zeros( (self.batch_size, self.context_size), dtype=tf.float32, name=None ) output_logits = parser.parse(oh_inputs, context, self.is_training_ph) with tf.variable_scope('loss'): mask = tf.logical_not(tf.equal(self.targets, train.pad_idx)) label_cross_entropy = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(self.targets, output_logits, reduction=Reduction.NONE) * tf.to_float(mask)) / tf.log(2.) chars = tf.argmax(output_logits, axis=2, output_type=tf.int32) equal = tf.equal(self.targets, chars) acc = tf.reduce_mean(tf.to_float(tf.reduce_all(tf.logical_or(equal, tf.logical_not(mask)), axis=1))) self.actual = chars self.loss = label_cross_entropy self.global_step = tf.Variable(initial_value=0, trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) self.train_step = self.optimizer.minimize(self.loss, global_step=self.global_step, colocate_gradients_with_ops=True) self.session.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() util.print_vars(tf.trainable_variables()) if self.continue_from: print("Restoring " + self.continue_from + "...") self.saver.restore(self.session, self.continue_from) tf.summary.scalar('loss', self.loss) tf.summary.scalar('label cross entropy', label_cross_entropy) tf.summary.scalar('acc', acc) tensorboard_dir = os.environ.get('TENSORBOARD_DIR') or '/tmp/tensorboard' self.train_writer = tf.summary.FileWriter(tensorboard_dir + '/parse/%s/%s/train' % (self.type, self.experiment), self.session.graph) self.test_writer = tf.summary.FileWriter(tensorboard_dir + '/parse/%s/%s/test' % (self.type, self.experiment), self.session.graph) self.summaries = tf.summary.merge_all()
"""Question: https://leetcode.com/problems/implement-strstr/ """ from util import print_vars class Solution: def strStr(self, haystack: str, needle: str) -> int: if haystack == "" and needle == "": return 0 for i in range(len(haystack) - len(needle) + 1): if haystack[i:i + len(needle)] == needle: return i return -1 if __name__ == "__main__": haystack = "hello" needle = "ll" output = Solution().strStr(haystack, needle) print_vars(haystack, needle, output) assert output == 2
from typing import List from util import print_vars class Solution: def lemonadeChange(self, bills: List[int]) -> bool: five, ten = 0, 0 for i in bills: if i == 5: five += 1 elif i == 10: five -= 1 ten += 1 elif ten > 0: # i = 20 five -= 1 ten -= 1 else: # i = 20 and no 10 in hand five -= 3 if five < 0: return False return True if __name__ == "__main__": bills = [5, 5, 5, 10, 20] output = Solution().lemonadeChange(bills) print_vars(bills, output)
"""Question: https://leetcode.com/problems/minimum-depth-of-binary-tree/ """ from datastruct import TreeNode from util import print_vars class Solution: def minDepth(self, root: TreeNode) -> int: if root is None: return 0 left = self.minDepth(root.left) right = self.minDepth(root.right) if left and right: return min(left, right) + 1 else: return left + right + 1 if __name__ == "__main__": root = TreeNode.deserialize("[3,9,20,null,null,15,7]") output = Solution().minDepth(root) print_vars(root, output)
else: nums[i] = 0 def moveZeroes(self, nums: List[int]) -> None: """ Do not return anything, modify nums in-place instead. """ for i in range(len(nums)): if nums[i] == 0: for j in range(i + 1, len(nums)): if nums[j] != 0: nums[i], nums[j] = nums[j], nums[i] break if __name__ == "__main__": nums = [0, 1, 0, 3, 12] print_vars(nums) Solution().moveZeroes(nums) print_vars(nums) nums = [0, 1, 0, 3, 12] print_vars(nums) Solution().move_zeros_save_non_zero(nums) print_vars(nums) nums = [0, 1, 0, 3, 12] print_vars(nums) Solution().move_zeros_two_pointer(nums) print_vars(nums)
"""Question: https://leetcode.com/problems/intersection-of-two-arrays/ """ from typing import List from util import print_vars class Solution: def intersection(self, nums1: List[int], nums2: List[int]) -> List[int]: return list(set(nums1) & set(nums2)) if __name__ == '__main__': nums1, nums2 = [1, 2, 2, 1], [2, 2] output = Solution().intersection(nums1, nums2) print_vars(nums1, nums2, output) nums1, nums2 = [4, 9, 5], [9, 4, 9, 8, 4] output = Solution().intersection(nums1, nums2) print_vars(nums1, nums2, output)
"""Question: https://leetcode.com/problems/longest-common-prefix/ """ from typing import List from util import print_vars class Solution: def longestCommonPrefix(self, strs: List[str]) -> str: if not strs: return "" prefix = strs[0] for s in strs: while prefix and prefix != s[:len(prefix)]: prefix = prefix[:-1] if not prefix: return "" return prefix if __name__ == '__main__': strs = ["flower", "flow", "flight"] output = Solution().longestCommonPrefix(strs) print_vars(strs, output)
def decode_iteratively(self, example_generator, total, names_to_types, ssi_list, hps): attn_vis_idx = 0 for example_idx, example in enumerate( tqdm(example_generator, total=total)): raw_article_sents, groundtruth_similar_source_indices_list, groundtruth_summary_text, corefs, groundtruth_article_lcs_paths_list = util.unpack_tf_example( example, names_to_types) article_sent_tokens = [ util.process_sent(sent) for sent in raw_article_sents ] groundtruth_summ_sents = [[ sent.strip() for sent in groundtruth_summary_text.strip().split('\n') ]] groundtruth_summ_sent_tokens = [ sent.split(' ') for sent in groundtruth_summ_sents[0] ] if ssi_list is None: # this is if we are doing the upper bound evaluation (ssi_list comes straight from the groundtruth) sys_ssi = groundtruth_similar_source_indices_list sys_alp_list = groundtruth_article_lcs_paths_list if FLAGS.singles_and_pairs == 'singles': sys_ssi = util.enforce_sentence_limit(sys_ssi, 1) sys_alp_list = util.enforce_sentence_limit(sys_alp_list, 1) elif FLAGS.singles_and_pairs == 'both': sys_ssi = util.enforce_sentence_limit(sys_ssi, 2) sys_alp_list = util.enforce_sentence_limit(sys_alp_list, 2) sys_ssi, sys_alp_list = util.replace_empty_ssis( sys_ssi, raw_article_sents, sys_alp_list=sys_alp_list) else: gt_ssi, sys_ssi, ext_len, sys_token_probs_list = ssi_list[ example_idx] sys_alp_list = ssi_functions.list_labels_from_probs( sys_token_probs_list, FLAGS.tag_threshold) if FLAGS.singles_and_pairs == 'singles': sys_ssi = util.enforce_sentence_limit(sys_ssi, 1) sys_alp_list = util.enforce_sentence_limit(sys_alp_list, 1) groundtruth_similar_source_indices_list = util.enforce_sentence_limit( groundtruth_similar_source_indices_list, 1) gt_ssi = util.enforce_sentence_limit(gt_ssi, 1) elif FLAGS.singles_and_pairs == 'both': sys_ssi = util.enforce_sentence_limit(sys_ssi, 2) sys_alp_list = util.enforce_sentence_limit(sys_alp_list, 2) groundtruth_similar_source_indices_list = util.enforce_sentence_limit( groundtruth_similar_source_indices_list, 2) gt_ssi = util.enforce_sentence_limit(gt_ssi, 2) # if gt_ssi != groundtruth_similar_source_indices_list: # raise Exception('Example %d has different groundtruth source indices: ' + str(groundtruth_similar_source_indices_list) + ' || ' + str(gt_ssi)) if FLAGS.dataset_name == 'xsum': sys_ssi = [sys_ssi[0]] final_decoded_words = [] final_decoded_outpus = '' best_hyps = [] highlight_html_total = '<u>System Summary</u><br><br>' for ssi_idx, ssi in enumerate(sys_ssi): # selected_article_lcs_paths = None selected_article_lcs_paths = sys_alp_list[ssi_idx] ssi, selected_article_lcs_paths = util.make_ssi_chronological( ssi, selected_article_lcs_paths) selected_article_lcs_paths = [selected_article_lcs_paths] selected_raw_article_sents = util.reorder( raw_article_sents, ssi) selected_article_text = ' '.join([ ' '.join(sent) for sent in util.reorder(article_sent_tokens, ssi) ]) selected_doc_indices_str = '0 ' * len( selected_article_text.split()) if FLAGS.upper_bound: selected_groundtruth_summ_sent = [[ groundtruth_summ_sents[0][ssi_idx] ]] else: selected_groundtruth_summ_sent = groundtruth_summ_sents batch = create_batch(selected_article_text, selected_groundtruth_summ_sent, selected_doc_indices_str, selected_raw_article_sents, selected_article_lcs_paths, FLAGS.batch_size, hps, self._vocab) original_article = batch.original_articles[0] # string original_abstract = batch.original_abstracts[0] # string article_withunks = data.show_art_oovs(original_article, self._vocab) # string abstract_withunks = data.show_abs_oovs( original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string # article_withunks = data.show_art_oovs(original_article, self._vocab) # string # abstract_withunks = data.show_abs_oovs(original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string if FLAGS.first_intact and ssi_idx == 0: decoded_words = selected_article_text.strip().split() decoded_output = selected_article_text else: decoded_words, decoded_output, best_hyp = decode_example( self._sess, self._model, self._vocab, batch, example_idx, hps) best_hyps.append(best_hyp) final_decoded_words.extend(decoded_words) final_decoded_outpus += decoded_output if example_idx < 100 or (example_idx >= 2000 and example_idx < 2100): min_matched_tokens = 2 selected_article_sent_tokens = [ util.process_sent(sent) for sent in selected_raw_article_sents ] highlight_summary_sent_tokens = [decoded_words] highlight_ssi_list, lcs_paths_list, highlight_article_lcs_paths_list, highlight_smooth_article_lcs_paths_list = ssi_functions.get_simple_source_indices_list( highlight_summary_sent_tokens, selected_article_sent_tokens, None, 2, min_matched_tokens) highlighted_html = ssi_functions.html_highlight_sents_in_article( highlight_summary_sent_tokens, highlight_ssi_list, selected_article_sent_tokens, lcs_paths_list=lcs_paths_list, article_lcs_paths_list= highlight_smooth_article_lcs_paths_list) highlight_html_total += highlighted_html + '<br>' if FLAGS.attn_vis and example_idx < 200: self.write_for_attnvis( article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens, attn_vis_idx ) # write info to .json file for visualization tool attn_vis_idx += 1 if len(final_decoded_words) >= 100: break gt_ssi_list, gt_alp_list = util.replace_empty_ssis( groundtruth_similar_source_indices_list, raw_article_sents, sys_alp_list=groundtruth_article_lcs_paths_list) highlight_html_gt = '<u>Reference Summary</u><br><br>' for ssi_idx, ssi in enumerate(gt_ssi_list): selected_article_lcs_paths = gt_alp_list[ssi_idx] try: ssi, selected_article_lcs_paths = util.make_ssi_chronological( ssi, selected_article_lcs_paths) except: util.print_vars(ssi, example_idx, selected_article_lcs_paths) raise selected_raw_article_sents = util.reorder( raw_article_sents, ssi) if example_idx < 100 or (example_idx >= 2000 and example_idx < 2100): min_matched_tokens = 2 selected_article_sent_tokens = [ util.process_sent(sent) for sent in selected_raw_article_sents ] highlight_summary_sent_tokens = [ groundtruth_summ_sent_tokens[ssi_idx] ] highlight_ssi_list, lcs_paths_list, highlight_article_lcs_paths_list, highlight_smooth_article_lcs_paths_list = ssi_functions.get_simple_source_indices_list( highlight_summary_sent_tokens, selected_article_sent_tokens, None, 2, min_matched_tokens) highlighted_html = ssi_functions.html_highlight_sents_in_article( highlight_summary_sent_tokens, highlight_ssi_list, selected_article_sent_tokens, lcs_paths_list=lcs_paths_list, article_lcs_paths_list= highlight_smooth_article_lcs_paths_list) highlight_html_gt += highlighted_html + '<br>' if example_idx < 100 or (example_idx >= 2000 and example_idx < 2100): self.write_for_human(raw_article_sents, groundtruth_summ_sents, final_decoded_words, example_idx) highlight_html_total = ssi_functions.put_html_in_two_columns( highlight_html_total, highlight_html_gt) ssi_functions.write_highlighted_html(highlight_html_total, self._highlight_dir, example_idx) # if example_idx % 100 == 0: # attn_dir = os.path.join(self._decode_dir, 'attn_vis_data') # attn_selections.process_attn_selections(attn_dir, self._decode_dir, self._vocab) rouge_functions.write_for_rouge( groundtruth_summ_sents, None, example_idx, self._rouge_ref_dir, self._rouge_dec_dir, decoded_words=final_decoded_words, log=False ) # write ref summary and decoded summary to file, to eval with pyrouge later # if FLAGS.attn_vis: # self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens, example_idx) # write info to .json file for visualization tool example_idx += 1 # this is how many examples we've decoded logging.info("Decoder has finished reading dataset for single_pass.") logging.info("Output has been saved in %s and %s.", self._rouge_ref_dir, self._rouge_dec_dir) if len(os.listdir(self._rouge_ref_dir)) != 0: if FLAGS.dataset_name == 'xsum': l_param = 100 else: l_param = 100 logging.info("Now starting ROUGE eval...") results_dict = rouge_functions.rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir, l_param=l_param) rouge_functions.rouge_log(results_dict, self._decode_dir)
"""Question: https://leetcode.com/problems/find-common-characters/ """ from typing import List from util import print_vars class Solution: def commonChars(self, A: List[str]) -> List[str]: from collections import Counter cnt = Counter(A[0]) for i in A: cnt &= Counter(i) return list(cnt.elements()) if __name__ == '__main__': a = ["bella", "label", "roller"] output = Solution().commonChars(a) print_vars(a, output)
"""Question: https://leetcode.com/problems/sum-of-even-numbers-after-queries/ """ from typing import List from util import print_vars class Solution: def sumEvenAfterQueries(self, A: List[int], queries: List[List[int]]) -> List[int]: res = [] sum_even = sum(i for i in A if i % 2 == 0) for val, index in queries: if A[index] % 2 == 0: sum_even -= A[index] A[index] += val if A[index] % 2 == 0: sum_even += A[index] res.append(sum_even) return res if __name__ == "__main__": A, queries = [1, 2, 3, 4], [[1, 0], [-3, 1], [-4, 0], [2, 3]] output = Solution().sumEvenAfterQueries(A, queries) print_vars(A, queries, output)