def get_root_elements(): con, cur = get_con() query = 'select TopicID, TopicName, IsLeaf, SerialPageID from Topics where parentID is NULL ' \ 'or ParentID = 0 order by SortID' cur.execute(query) result = cur.fetchall() root = [] if result is not None: for row in result: short_title = shorten(row[1], 15) root.append({'TopicID': row[0], 'TopicName': short_title, 'IsLeaf': row[2], 'SerialPageID': row[3]}) con.close() return root
def get_tree_head(topicID): con, cur = get_con() query = 'select TopicID, TopicName, Path, IsLeaf from Topics where TopicID = %s' cur.execute(query, (topicID,)) row = cur.fetchone() head = [] if row is not None: path = row[2] # print 'path is ', path for tid in path.split('/'): cur.execute(query, (tid,)) row = cur.fetchone() if row is not None: head.append({'TopicID': row[0], 'TopicName': shorten(row[1], 25), 'IsLeaf': row[3]}) return head
def get_tree_kids(topicID): con, cur = get_con() from search import shorten query = 'select TopicID, TopicName, IsLeaf, SerialPageID from Topics where parentID = %s order by SortID' cur.execute(query, (topicID, )) result = cur.fetchall() body = [] body_count = 0 if result is not None: for row in result: body_count += 1 short_title = shorten(row[1], 15) body.append({'TopicID': row[0], 'TopicName': short_title, 'IsLeaf': row[2], 'SerialPageID': row[3]}) con.close() return body
def decode(self): """Decode examples until data is exhausted (if FLAGS.single_pass) and return, or decode indefinitely, loading latest checkpoint at regular intervals""" t0 = time.time() counter = 0 while True: batch = self._batcher.next_batch( ) # 1 example repeated across batch if batch is None: # finished decoding dataset in single_pass mode assert FLAGS.single_pass, "Dataset exhausted, but we are not in single_pass mode" tf.logging.info( "Decoder has finished reading dataset for single_pass.") tf.logging.info( "Output has been saved in %s and %s. Now starting ROUGE eval...", self._rouge_ref_dir, self._rouge_dec_dir) results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir) rouge_log(results_dict, self._decode_dir) return original_article = batch.original_articles[0] # string original_title = batch.original_title[0] original_abstract = batch.original_abstracts[0] # string # list of strings original_abstract_sents = batch.original_abstracts_sents[0] article_withunks = data.show_art_oovs(original_article, self._vocab) # string abstract_withunks = data.show_abs_oovs( original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string # Run beam search to get best Hypothesis best_hyp = beam_search.run_beam_search(self._sess, self._model, self._vocab, batch) # Extract the output ids from the hypothesis and convert back to words output_ids = [int(t) for t in best_hyp.tokens[1:]] decoded_words = data.outputids2words( output_ids, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # Remove the [STOP] token from decoded_words, if necessary try: # index of the (first) [STOP] symbol fst_stop_idx = decoded_words.index(data.STOP_DECODING) decoded_words = decoded_words[:fst_stop_idx] except ValueError: decoded_words = decoded_words decoded_output = ' '.join(decoded_words) # single string if len(decoded_output) >= 265: decoded_output = decoded_output[:260] + " ..." # Take the title and then process it to get a shortened URL of the paper. # Add this URL to the end of decoded_output url = shorten(original_title) decoded_output += ' ' + url if FLAGS.single_pass: # write ref summary and decoded summary to file, to eval with pyrouge later print_results(article_withunks, abstract_withunks, decoded_output) # log output to screen self.write_for_rouge(original_abstract_sents, decoded_words, counter) counter += 1 # this is how many examples we've decoded else: print_results(article_withunks, abstract_withunks, decoded_output) # log output to screen # write info to .json file for visualization tool self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens) # Check if SECS_UNTIL_NEW_CKPT has elapsed; if so return so we can load a new checkpoint t1 = time.time() if t1 - t0 > SECS_UNTIL_NEW_CKPT: tf.logging.info( 'We\'ve been decoding with same checkpoint for %i seconds. Time to load new checkpoint', t1 - t0) _ = util.load_ckpt(self._saver, self._sess) t0 = time.time()