Example #1
0
def get_root_elements():
	con, cur = get_con()
	query = 'select TopicID, TopicName, IsLeaf, SerialPageID from Topics where parentID is NULL ' \
	        'or ParentID = 0 order by SortID'
	cur.execute(query)
	result = cur.fetchall()
	root = []
	if result is not None:
		for row in result:
			short_title = shorten(row[1], 15)
			root.append({'TopicID': row[0], 'TopicName': short_title, 'IsLeaf': row[2], 'SerialPageID': row[3]})

	con.close()
	return root
Example #2
0
def get_tree_head(topicID):
	con, cur = get_con()
	query = 'select TopicID, TopicName, Path, IsLeaf from Topics where TopicID = %s'
	cur.execute(query, (topicID,))
	row = cur.fetchone()
	head = []
	if row is not None:
		path = row[2]
		# print 'path is ', path
		for tid in path.split('/'):
			cur.execute(query, (tid,))
			row = cur.fetchone()
			if row is not None:
				head.append({'TopicID': row[0], 'TopicName': shorten(row[1], 25), 'IsLeaf': row[3]})
	return head
Example #3
0
def get_tree_kids(topicID):
	con, cur = get_con()
	from search import shorten
	query = 'select TopicID, TopicName, IsLeaf, SerialPageID from Topics where parentID = %s order by SortID'
	cur.execute(query, (topicID, ))
	result = cur.fetchall()
	body = []
	body_count = 0
	if result is not None:
		for row in result:
			body_count += 1
			short_title = shorten(row[1], 15)
			body.append({'TopicID': row[0], 'TopicName': short_title, 'IsLeaf': row[2], 'SerialPageID': row[3]})

	con.close()
	return body
    def decode(self):
        """Decode examples until data is exhausted (if FLAGS.single_pass) and return, or decode indefinitely, loading latest checkpoint at regular intervals"""
        t0 = time.time()
        counter = 0
        while True:
            batch = self._batcher.next_batch(
            )  # 1 example repeated across batch
            if batch is None:  # finished decoding dataset in single_pass mode
                assert FLAGS.single_pass, "Dataset exhausted, but we are not in single_pass mode"
                tf.logging.info(
                    "Decoder has finished reading dataset for single_pass.")
                tf.logging.info(
                    "Output has been saved in %s and %s. Now starting ROUGE eval...",
                    self._rouge_ref_dir, self._rouge_dec_dir)
                results_dict = rouge_eval(self._rouge_ref_dir,
                                          self._rouge_dec_dir)
                rouge_log(results_dict, self._decode_dir)
                return

            original_article = batch.original_articles[0]  # string
            original_title = batch.original_title[0]
            original_abstract = batch.original_abstracts[0]  # string
            # list of strings
            original_abstract_sents = batch.original_abstracts_sents[0]

            article_withunks = data.show_art_oovs(original_article,
                                                  self._vocab)  # string
            abstract_withunks = data.show_abs_oovs(
                original_abstract, self._vocab,
                (batch.art_oovs[0] if FLAGS.pointer_gen else None))  # string

            # Run beam search to get best Hypothesis
            best_hyp = beam_search.run_beam_search(self._sess, self._model,
                                                   self._vocab, batch)

            # Extract the output ids from the hypothesis and convert back to words
            output_ids = [int(t) for t in best_hyp.tokens[1:]]
            decoded_words = data.outputids2words(
                output_ids, self._vocab,
                (batch.art_oovs[0] if FLAGS.pointer_gen else None))

            # Remove the [STOP] token from decoded_words, if necessary
            try:
                # index of the (first) [STOP] symbol
                fst_stop_idx = decoded_words.index(data.STOP_DECODING)
                decoded_words = decoded_words[:fst_stop_idx]
            except ValueError:
                decoded_words = decoded_words
            decoded_output = ' '.join(decoded_words)  # single string

            if len(decoded_output) >= 265:
                decoded_output = decoded_output[:260] + " ..."

            # Take the title and then process it to get a shortened URL of the paper.
            # Add this URL to the end of decoded_output
            url = shorten(original_title)
            decoded_output += ' ' + url

            if FLAGS.single_pass:
                # write ref summary and decoded summary to file, to eval with pyrouge later
                print_results(article_withunks, abstract_withunks,
                              decoded_output)  # log output to screen
                self.write_for_rouge(original_abstract_sents, decoded_words,
                                     counter)
                counter += 1  # this is how many examples we've decoded
            else:
                print_results(article_withunks, abstract_withunks,
                              decoded_output)  # log output to screen
                # write info to .json file for visualization tool
                self.write_for_attnvis(article_withunks, abstract_withunks,
                                       decoded_words, best_hyp.attn_dists,
                                       best_hyp.p_gens)

                # Check if SECS_UNTIL_NEW_CKPT has elapsed; if so return so we can load a new checkpoint
                t1 = time.time()
                if t1 - t0 > SECS_UNTIL_NEW_CKPT:
                    tf.logging.info(
                        'We\'ve been decoding with same checkpoint for %i seconds. Time to load new checkpoint',
                        t1 - t0)
                    _ = util.load_ckpt(self._saver, self._sess)
                    t0 = time.time()