コード例 #1
0
def get_stats(dataset) :
	
	db = MyMySQL(db=dataset)

	kw_table = 'doc_ngrams' if (dataset=='aminer') else 'doc_kws'
	
	npubs = db.select_query("select count(*) from papers")[0][0]
	nauthors = db.select_query("select count(distinct author_id) from authorships")[0][0]
	nkws = db.select_query("select count(distinct ngram) from %s" % kw_table)[0][0]
	nvenues = db.select_query("select count(distinct venue_id) from papers")[0][0]

	pubs_pubs    = db.select_query("select count(*) from graph")[0][0]
	auths_auths  = db.select_query("select count(*) from coauthorships")[0][0]
	pubs_authors = db.select_query("select count(*) from authorships")[0][0]
	pubs_kws     = db.select_query("select count(*) from %s where value>=%f" % (kw_table, config.MIN_NGRAM_TFIDF))[0][0]
	
#	npubs    = 1
#	nauthors = 2
#	nkws     = 3
#	nvenues  = 4
#	pubs_pubs    = 1
#	auths_auths  = 4
#	pubs_authors = 2
#	pubs_kws     = 3

	
	print "\\hline"	
	print "\\multicolumn{4}{|c|}{%s} \\\\" % TEX_NAMES[dataset]
	print "\\hline"
	print "pubs ($N_p$) & %d & pubs-pubs & %d \\\\" % (npubs, pubs_pubs)
	print "authors   & %d & authors-authors & %d  \\\\" % (nauthors, auths_auths)
	print "keywords ($N_k$)  & %d  & pubs-keywords   & %d \\\\" % (nkws, pubs_kws)
	print "venues ($N_v$)    & %d     & pubs-authors  & %d \\\\" % (nvenues, pubs_authors)
コード例 #2
0
def write_surveys_queries_file(prefix, npubs=110) :

	db = MyMySQL(db=config.DB_NAME)
	candidates = db.select_query('''SELECT id, substring(title,1,140), year
																	FROM papers
																	WHERE title LIKE '%survey%' AND (year IS NOT NULL)
																	AND (year BETWEEN 1950 AND 2014)''')

	print "Candidates: %s" % len(candidates)

	# Include the word 'survey' for this particular case
	_stop_words_.add("survey")

	# Write candidates to file
	file = open(prefix + ".txt", "w")

	n = 0
	for pub_id, title, year in candidates :

		citations = utils.get_cited(db, pub_id)
		if len(citations)>=20 :
			query = to_query(title)

			print >> file, "%s\t%d\t%s\t%s" % (pub_id, year, title.strip(), query)

			n += 1
			if (n >= npubs) :
				break

	file.close()
コード例 #3
0
def get_cited_papers(doc_id) :

	db = MyMySQL(db=DB_NAME, user=DB_USER, passwd=DB_PASSWD)

	return db.select_query("""SELECT r.cited_paper_id, g.start, g.end 
														FROM citations c 
														JOIN citation_groups g ON c.group_id = g.id 
														JOIN refs r ON c.ref_id=r.id 
														WHERE c.paper_id='%s' AND r.cited_paper_id IS NOT NULL""" % doc_id)
コード例 #4
0
    def __init__(self):
        self.index = Index(config.INDEX_PATH)

        # Get citation counts and store into dict for fast lookup
        db = MyMySQL(db=config.DB_NAME,
                     user=config.DB_USER,
                     passwd=config.DB_PASSWD)

        ncitations = db.select_query(
            "SELECT cited, COUNT(*) from graph GROUP BY cited")
        self.ncitations = dict(ncitations)
コード例 #5
0
def get_citing_papers(doc_id) :
	
	db = MyMySQL(db=DB_NAME, user=DB_USER, passwd=DB_PASSWD)
	
	query = """SELECT r.paper_id, 
										cg.start, cg.end 
										FROM refs r 
										JOIN citations c ON r.id=c.ref_id 
										JOIN citation_groups cg ON c.group_id=cg.id 
										WHERE cited_paper_id='%s' """ % doc_id
	rows = db.select_query(query)

	# Group citations by paper
	citations = defaultdict(list)
	for citing_paper, start, end in rows :
		citations[citing_paper].append((start, end))

	return citations