Python connection Beispiele, configapp.connection Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def res_kmeans(query):
	form = visOptions()

	base_url = (app.config['BASE_URL'])

	if request.method == 'POST':
		kmr_conn = connection() #k-means results database connection
		pmid_list = query.split('+') #list of string pmids
		pmid = pmid_list[0]
		prefix = pmid[0:3]
		suffix = pmid[3:6]

		filename2 = str(prefix) + '/' + str(suffix) + '/' + "lemma_samples_" + str(query) + ".pickle"
		lemma_file = os.path.join((app.config['PATH_TO_CACHE']), filename2)
		with open(lemma_file, "rb") as l:
			lemma_samples = pickle.load(l)

		k_clusters = form.k_val.data #2,3,4,or 5
		logging.info("the k value is " + str(k_clusters))
		x0_coordinates, y0_coordinates, z0_coordinates, x1_coordinates, y1_coordinates, z1_coordinates, x2_coordinates, y2_coordinates, z2_coordinates, x3_coordinates, y3_coordinates, z3_coordinates, x4_coordinates, y4_coordinates, z4_coordinates, titles0, titles1, titles2, titles3, titles4 = vis_kmeans(lemma_samples, k_clusters, kmr_conn)
		kmr_conn.close()
		return render_template('res_kmeans1.html', base_url=base_url, query=query,
		   x0_coordinates=x0_coordinates, y0_coordinates=y0_coordinates, z0_coordinates=z0_coordinates,
		   x1_coordinates=x1_coordinates, y1_coordinates=y1_coordinates, z1_coordinates=z1_coordinates,
		   x2_coordinates=x2_coordinates, y2_coordinates=y2_coordinates, z2_coordinates=z2_coordinates,
		   x3_coordinates=x3_coordinates, y3_coordinates=y3_coordinates, z3_coordinates=z3_coordinates,
		   x4_coordinates=x4_coordinates, y4_coordinates=y4_coordinates, z4_coordinates=z4_coordinates,
			titles0=titles0, titles1=titles1, titles2=titles2, titles3=titles3, titles4=titles4)
	else:
		#Not running any default vis because its slow
		return render_template('res_kmeans1.html', query=query, base_url=base_url)

Beispiel #2

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def cogeclustermap():
	query = '18952863+18269575'
	form = nesOptions()
	if request.method == 'POST':
		cm_conn = connection()
		nes_categories = request.form.getlist('n_categories')
		logging.info(nes_categories)
		w_number = form.w_words.data
		logging.info("the w value is " + str(w_number))

		filename1 = "189/528/nes_18952863+18269575.pickle"
		nes_file = os.path.join((app.config['PATH_TO_CACHE']), filename1)
		with open(nes_file, "rb") as f:
			nes_samples = pickle.load(f)

		filename2 = "189/528/lemma_samples_18952863+18269575.pickle"
		lemma_file = os.path.join((app.config['PATH_TO_CACHE']), filename2)
		with open(lemma_file, "rb") as f:
			lemma_samples = pickle.load(f)

		saveName = vis_clustermap(lemma_samples, nes_samples, nes_categories, w_number, query, cm_conn)
		#Fix.... why bother returning full file path if html only looks in "static" dir?
		image = "clustermaps/cm_"+str(query)+".png"
		cm_conn.close()
		return render_template('coge_clustermap.html', image=image)
	else:
		#show the default data :D
		image = "clustermaps/cm_18952863+18269575.png"
		return render_template('coge_clustermap.html', image=image)

Beispiel #3

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def cogeheatmap():
	form = nesOptions()
	query = '18952863+18269575'
	if request.method == 'POST':
		hm_conn = connection()
		# nes_categories = request.form.getlist('check')
		#TODO: need to get complete list of stuff
		nes_categories = request.form.getlist('n_categories')
		logging.info(nes_categories)
		w_number = form.w_words.data
		logging.info("the w value is "+str(w_number))

		filename1 = "189/528/nes_18952863+18269575.pickle"
		nes_file = os.path.join((app.config['PATH_TO_CACHE']), filename1)
		with open(nes_file, "rb") as f:
			nes_samples = pickle.load(f)

		filename2 = "189/528/lemma_samples_18952863+18269575.pickle"
		lemma_file = os.path.join((app.config['PATH_TO_CACHE']), filename2)
		with open(lemma_file, "rb") as f:
			lemma_samples = pickle.load(f)

		x_docs, y_words, z_counts, titles = vis_heatmap(lemma_samples, nes_samples, nes_categories, w_number, hm_conn)
		len_x_docs = list(range(len(x_docs)))
		popup = '<div class="alert alert-warning alert-dismissible" role="alert"><button type="button" class="close" data-dismiss="alert" aria-label="Close"><span aria-hidden="true">&times;</span></button><strong>[ ! ]</strong>Displaying results for N= '+str(w_number)+' from categories: '+str(nes_categories)+'</div>'
		hm_conn.close()
		return render_template('coge_heatmap2.html', z_counts=z_counts, x_docs=x_docs, y_words=y_words, titles=titles, len_x_docs=len_x_docs, popup=popup)
	else:
		#display the default data :D
		return render_template('coge_heatmap1.html')

Beispiel #4

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def cogecrawl():
	query = '18952863+18269575'
	coge_conn = connection()
	citations_with_links = db_unique_citations_retrieval(query, coge_conn) #unique
	unique_publications = db_unique_citations_number(query, coge_conn)
	coge_conn.close()
	base_url = (app.config['BASE_URL'])
	return render_template("dashboard.html", citations_with_links=citations_with_links, unique_publications=unique_publications,
						   base_url=base_url)

Beispiel #5

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def res_heatmap(query):
	form = nesOptions()

	base_url = (app.config['BASE_URL'])

	hmr_conn = connection() #heatmap results database connection
	if request.method == 'POST':
		nes_categories = request.form.getlist('n_categories')
		logging.info(nes_categories)
		w_number = form.w_words.data
		logging.info("the w value is "+str(w_number))

		pmid_list = query.split('+')
		pmid = pmid_list[0]
		prefix = pmid[0:3]
		suffix = pmid[3:6]

		filename1 = str(prefix) + '/' + str(suffix) + '/' + "nes_" + str(query) + ".pickle"
		nes_file = os.path.join((app.config['PATH_TO_CACHE']), filename1)
		with open(nes_file, "rb") as f:
			nes_samples = pickle.load(f)

		filename2 = str(prefix) + '/' + str(suffix) + '/' + "lemma_samples_" + str(query) + ".pickle"
		lemma_file = os.path.join((app.config['PATH_TO_CACHE']), filename2)
		with open(lemma_file, "rb") as l:
			lemma_samples = pickle.load(l)

		x_docs, y_words, z_counts, titles = vis_heatmap(lemma_samples, nes_samples, nes_categories, w_number, hmr_conn)
		popup = '<div class="alert alert-warning alert-dismissible" role="alert"><button type="button" class="close" data-dismiss="alert" aria-label="Close"><span aria-hidden="true">&times;</span></button><strong>[ ! ]</strong>Displaying results for N= '+str(w_number)+' from categories: '+str(nes_categories)+'</div>'
		hmr_conn.close()
		return render_template('results_heatmap.html', query=query, z_counts=z_counts, x_docs=x_docs, y_words=y_words, popup=popup, titles=titles, base_url=base_url)
	else:
		nes_categories= ['BioProcess', 'CellLine', 'Cellular_component', 'Family', 'Gene_or_gene_product', 'Organ', 'Simple_chemical', 'Site', 'Species', 'TissueType']
		w_number = 10
		logging.info("the w value is "+str(w_number))

		pmid_list = query.split('+')
		pmid = pmid_list[0]
		prefix = pmid[0:3]
		suffix = pmid[3:6]

		filename1 = str(prefix) + '/' + str(suffix) + '/' + "nes_" + str(query) + ".pickle"
		nes_file = os.path.join((app.config['PATH_TO_CACHE']), filename1)
		with open(nes_file, "rb") as f:
			nes_samples = pickle.load(f)

		filename2 = str(prefix) + '/' + str(suffix) + '/' + "lemma_samples_" + str(query) + ".pickle"
		lemma_file = os.path.join((app.config['PATH_TO_CACHE']), filename2)
		with open(lemma_file, "rb") as l:
			lemma_samples = pickle.load(l)

		x_docs, y_words, z_counts, titles = vis_heatmap(lemma_samples, nes_samples, nes_categories, w_number, hmr_conn)
		popup = '<div class="alert alert-warning alert-dismissible" role="alert"><button type="button" class="close" data-dismiss="alert" aria-label="Close"><span aria-hidden="true">&times;</span></button><strong>[ ! ]</strong> Default: N=10, from all categories.</div>'
		hmr_conn.close()
		return render_template('results_heatmap.html', query=query, z_counts=z_counts, x_docs=x_docs, y_words=y_words, popup=popup, titles=titles, base_url=base_url)

Beispiel #6

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def cogejournals():
	j_conn = connection()
	filename = "189/528/journals_18952863+18269575.json"
	filepath = os.path.join((app.config['PATH_TO_JOURNALS']), filename)
	with open(filepath) as load_data:
		journals = json.load(load_data)
	query = '18952863+18269575'
	range_years, unique_pubs, unique_journals = getJournalsVis(query, j_conn)
	years_list = range_years.split('+')
	s_year = years_list[0]
	e_year = years_list[1]
	j_conn.close()
	return render_template('coge_journals.html', journals=journals, unique_pubs=unique_pubs,
						   unique_journals=unique_journals, s_year=s_year, e_year=e_year)

Beispiel #7

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def coge_stats():
	s_conn = connection()
	query = "18952863+18269575"
	input_click_citations = statsSelfInfo(query, s_conn)
	statistics = get_statistics(query, s_conn) #actually a lot of these are "None" right now. Will need to populate.
	sum_total = statistics[0]
	unique = statistics[1]
	sum_abstracts = statistics[2]
	sum_whole = statistics[3]
	sum_sents = statistics[4]
	sum_tokens = statistics[5]
	s_conn.close()
	return render_template('coge_stats.html', input_click_citations=input_click_citations,
						   sum_total=sum_total, unique=unique, sum_abstracts=sum_abstracts, sum_whole=sum_whole,
						   sum_sents=sum_sents, sum_tokens=sum_tokens)

Beispiel #8

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def coge_scifi():
	form = corpusOptions()
	path_to_eligible_paper = os.path.join((app.config['PATH_TO_CACHE']), '259/367/2593677.txt')
	eligible_papers = [('paper1', '18952863', path_to_eligible_paper, '2008, Lyons')]
	if request.method == 'POST':
		csf_conn = connection()
		logging.info("posted a thing in scifi!")
		corpus = form.corpus.data
		logging.info(corpus)
		query = "18952863+18269575"
		if corpus == 'darwin':
			title = 'On The Origin of Species'
		if corpus == 'yeast':
			title = 'Yeast by Thomas Henry Huxley'
		if corpus == 'mouse':
			title = 'The Dancing Mouse, a Study in Animal Behavior'
		if corpus == 'brain_speech':
			title = 'The Brain & The Voice in Speech & Song'
		if corpus == 'grecoroman':
			title = 'Outlines of Greek and Roman Medicine'
		if corpus == 'startrek':
			title = 'Star Trek: The Next Generation'
		if corpus == 'mars':
			title = 'Gulliver of Mars'
		if corpus == 'last_evolution':
			title = 'The Last Evolution by John W Campbell'
		if corpus == 'youth':
			title = 'Youth by Isaac Asimov'
		if corpus == 'frankenstein':
			title = 'Frankenstein; or, The Modern Prometheus'
		if corpus == 'sherlock':
			title = 'Sherlock Holmes by Sir Arthur Conan Doyle'
		if corpus == 'austen':
			title = 'Pride and Prejudice'
		if corpus == 'bible':
			title = 'The Bible'
		if corpus == 'paper1':
			title =  'Lyons et al., 2008'
		x, y, names, color = vis_scifi(corpus, query, eligible_papers, csf_conn)
		csf_conn.close()
		return render_template('coge_scifi2.html', x=x, y=y, title=title, color=color, names=names, eligible_papers=eligible_papers)
	else:
		flash('Some input paper(s) are not avaliable for TextCompare')
		#Default data!
		return render_template('coge_scifi.html', eligible_papers=eligible_papers)

Beispiel #9

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def cogekmeans():
	form = visOptions()
	if request.method == 'POST':
		k_conn = connection()
		filename = "189/528/lemma_samples_18952863+18269575.pickle"
		lemma_file = os.path.join((app.config['PATH_TO_CACHE']), filename)
		with open(lemma_file, "rb") as f:
			lemma_samples = pickle.load(f)

		k_clusters = form.k_val.data #2,3,4,or 5
		logging.info("the k value is " + str(k_clusters))
		x0_coordinates, y0_coordinates, z0_coordinates, x1_coordinates, y1_coordinates, z1_coordinates, x2_coordinates, y2_coordinates, z2_coordinates, x3_coordinates, y3_coordinates, z3_coordinates, x4_coordinates, y4_coordinates, z4_coordinates, titles0, titles1, titles2, titles3, titles4 = vis_kmeans(lemma_samples, k_clusters, k_conn)
		k_conn.close()
		return render_template('coge_kmeans2.html', x0_coordinates=x0_coordinates, y0_coordinates=y0_coordinates, z0_coordinates=z0_coordinates,
							   x1_coordinates=x1_coordinates, y1_coordinates=y1_coordinates, z1_coordinates=z1_coordinates,
							   x2_coordinates=x2_coordinates, y2_coordinates=y2_coordinates, z2_coordinates=z2_coordinates,
							   x3_coordinates=x3_coordinates, y3_coordinates=y3_coordinates, z3_coordinates=z3_coordinates,
							   x4_coordinates=x4_coordinates, y4_coordinates=y4_coordinates, z4_coordinates=z4_coordinates,
							   titles0=titles0, titles1=titles1, titles2=titles2, titles3=titles3, titles4=titles4)
	else:
		#show the default data!!
		return render_template('coge_kmeans.html')

Beispiel #10

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def res_stats(query):
	sr_conn = connection() #statistics results db connection
	pmid_list = query.split('+') #list of string pmids
	venn_data = make_venn(pmid_list, sr_conn)

	input_click_citations = statsSelfInfo(query, sr_conn)
	statistics = get_statistics(query, sr_conn)
	if None in statistics:
		logging.info("There were some 'None's in our stastics! Let's get this right and make sure the db is populated...")
		db_query_update_statistics(query, sr_conn) #do it again
		logging.info("Done updating the db with the statistics")
		statistics = get_statistics(query, sr_conn)
		sum_total = statistics[0]
		unique = statistics[1]
		sum_abstracts = statistics[2]
		sum_whole = statistics[3]
		sum_sents = statistics[4]
		sum_tokens = statistics[5]

	else:
		sum_total = statistics[0]
		unique = statistics[1]
		sum_abstracts = statistics[2]
		sum_whole = statistics[3]
		sum_sents = statistics[4]
		sum_tokens = statistics[5]

	#get x, y coordinates for pubs x year bar chart.
	#max 5 papers
	x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, n0, n1, n2, n3, n4 = stats_barchart(query, sr_conn)
	sr_conn.close()
	return render_template('results_stats.html', input_click_citations=input_click_citations,
						   venn_data=venn_data, sum_total=sum_total,
						   unique=unique, sum_abstracts=sum_abstracts, sum_whole=sum_whole,
						   sum_sents=sum_sents, sum_tokens=sum_tokens,
						   x0=x0, x1=x1, x2=x2, x3=x3, x4=x4,
						   y0=y0, y1=y1, y2=y2, y3=y3, y4=y4,
						   n0=n0, n1=n1, n2=n2, n3=n3, n4=n4)

Beispiel #11

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def res_clustermap(query):
	form = nesOptions()

	base_url = (app.config['BASE_URL'])

	if request.method == 'POST':
		cmr_conn = connection() #clustermap results database connection
		nes_categories = request.form.getlist('n_categories')
		logging.info(nes_categories)
		w_number = form.w_words.data
		logging.info("the w value is "+str(w_number))

		pmid_list = query.split('+')
		pmid = pmid_list[0]
		prefix = pmid[0:3]
		suffix = pmid[3:6]

		filename1 = str(prefix) + '/' + str(suffix) + '/' + "nes_" + str(query) + ".pickle"
		nes_file = os.path.join((app.config['PATH_TO_CACHE']), filename1)
		with open(nes_file, "rb") as f:
			nes_samples = pickle.load(f)

		filename2 = str(prefix) + '/' + str(suffix) + '/' + "lemma_samples_" + str(query) + ".pickle"
		lemma_file = os.path.join((app.config['PATH_TO_CACHE']), filename2)
		with open(lemma_file, "rb") as l:
			lemma_samples = pickle.load(l)

		saveName = vis_clustermap(lemma_samples, nes_samples, nes_categories, w_number, query, cmr_conn)
		image = '/clustermaps/' + saveName

		cmr_conn.close()
		return render_template('results_clustermap.html',image=image, query=query, base_url=base_url)
	else:
		popup = '<div class="alert alert-warning alert-dismissible" role="alert"><button type="button" class="close" data-dismiss="alert" aria-label="Close"><span aria-hidden="true">&times;</span></button><strong>[ ! ]</strong> Choose N and categories to run clustermap.</div>'
		#No default visualization
		return render_template('results_clustermapH.html', query=query, popup=popup, base_url=base_url)

Beispiel #12

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def resjournals(query, update_check):
	logging.info("in routine res-journals")

	jr_conn = connection() #journal results connection

	needed_to_annotate_check = [update_check]
	range_years, unique_publications, unique_journals = print_journalvis(query, needed_to_annotate_check, jr_conn)

	logging.info("YEARS RANGE: " +str(range_years))
	#Need years for range
	### AHHHH if range is not in db yet, it will get it without the "+" as a tuple!
	try:
		years_list = range_years.split('+')
		s_year = years_list[0]
		e_year = years_list[1]
	except Exception as e:
		s_year = range_years[0]
		e_year = range_years[1]

	#only want to load the json for the LAST id in the query (so includes all)
	pmid_list = query.split('+') #list of string pmids
	pmid = pmid_list[0]
	prefix = pmid[0:3]
	suffix = pmid[3:6]

	filename = str(prefix)+'/'+str(suffix)+'/'+"journals_"+str(query)+".json"
	logging.info("last entry's JOURNAL is named: " + str(filename))
	savePath = (app.config['PATH_TO_JOURNALS'])
	completeName = os.path.join(savePath, filename)
	logging.info("complete file: " + str(completeName))

	with open(completeName) as load_data:
		journals = json.load(load_data)
	jr_conn.close()
	return render_template('results_journals.html', journals=journals, s_year=s_year, e_year=e_year,
						   unique_journals=unique_journals, unique_publications=unique_publications)

Beispiel #13

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def results():
	logging.info("In app route RESULTS")
	time_start = time.time()
	form = pmidForm()
	r_conn = connection() #results_connection to db

	base_url = (app.config['BASE_URL'])

	try:
		if request.method == 'GET':
			logging.info("Request method is GET")

			#entry = form.pmid.data #If method is POST #str
			entry = (request.args.getlist('pmid'))[0] #str
			if len(entry) == 0:
				logging.info("someone pushed enter without entering anything....default entry go!")
				query = '18952863+18269575'
				citations_with_links = db_unique_citations_retrieval(query, r_conn)  # unique
				unique_publications = db_unique_citations_number(query, r_conn)
				r_conn.close()
				base_url = (app.config['BASE_URL'])
				return render_template("dashboard.html", citations_with_links=citations_with_links,
									   unique_publications=unique_publications,
									   base_url=base_url)

			pmid_list = multiple_pmid_input(entry) #list for handling multiple pmids
			logging.info(pmid_list)

			#URL TO RE-VISIT THIS ANALYSIS
			url_data = {}
			url_data['pmid'] = entry
			url_values = urllib.parse.urlencode(url_data)
			scke_url = (app.config['BASE_URL'])
			full_url = scke_url + '?' + url_values
			logging.info(full_url)


			# If the user inputs more than 5 PMIDs, return the home page and flash a warning
			# Need 5 PMIDs or less
			if len(pmid_list) > 5:
				flash('You have entered more than 5 PMIDs. Please reduce your query to 5 PMIDs or less to continue.')
				base_url = (app.config['BASE_URL'])
				r_conn.close()
				return render_template("home.html", base_url=base_url)


			q = '+'
			query = str(q.join(pmid_list))
			logging.info("query: " + str(query))

			needed_to_annotate_check = []

			for user_input in pmid_list:
				logging.info(str(user_input))
				user_input = str(user_input)

				############################################
				#Check database for pmid #Does the entry exists in the db already?
				#conn = connection()
				s = inputPapers.select().\
					where(inputPapers.c.pmid == user_input)
				c = r_conn.execute(s)
				check1 = c.fetchone()
				c.close()


				#if the entry does NOT exist in the db already, will need to retrieve text, annotate it, and populate cache
				if check1 is None:
					update_check = "yes"
					#flash('new pubmedid!')

					#Information Retireval of citing pmcids and info about them
					number_of_citations = run_IR_not_db(user_input, r_conn)
					logging.info(number_of_citations)

					#Throws an exception if there are no citations for something
					if number_of_citations == 0:
						flash('PubMed has no citations for PMID: ' + str(user_input) + '. Please try again with a different PMID or without this PMID in your query. If you had other PMIDs in your query before this, those are fine :)')
						citations_with_links = db_unique_citations_retrieval('18952863+18269575', r_conn)  # unique
						unique_publications = db_unique_citations_number('18952863+18269575', r_conn)
						r_conn.close()
						return render_template("dashboard.html", citations_with_links=citations_with_links,
											   unique_publications=unique_publications)

					else:
						#Annotate
						logging.info("beginning multi-preprocessing")
						biodoc_data = do_multi_preprocessing(user_input, r_conn)
						needed_to_annotate_check.append("yes")
						logging.info("done with new document multi_preprocessing")

						#Before moving on, lets double check that we have EVERYTHING.
						#If some texts were coppied because they are in the database but actually missing, that's a problemo!
						logging.info("Just checking that everything (texts and jsons) is there.... ")
						needed_to_rescrape = check_for_texts(user_input, r_conn)  # returns yes or no
						logging.info("needed_to_rescrape/re_annotate: " + str(needed_to_rescrape))


						logging.info("writing the BIODOC LEMMAS")
						#Populate cache (lemmas and nes)
						need_to_annotate = "yes" #of course we need to annotate, its a new pmid!
						print_lemma_nes_samples(user_input, biodoc_data, need_to_annotate)
						logging.info("* wrote lemme and nes samples to cache!!!")


					#After all citations have been processed, now we can do the analyses:
					if user_input == pmid_list[-1]: #if its the last pmid
						logging.info("last pmid in the query")

						#Lemma_samples and nes_samples for entire query here:
						logging.info("concatting lemma nes samples for query")
						#Populate cache
						need_to_update = "yes" #of course we do, something is new!
						concat_lemma_nes_samples(query, need_to_update)

						#Update "queries" table of db here!!
						logging.info("STARTING db_query_update_statistics")
						db_query_update_statistics(query, r_conn)
						logging.info("finishe db_query_update_statistics")


				#if the entry IS in the db, no need to retrieve text from Entrez, just grab from db
				#MAYBE need to annotate some new documents, maybe not
				#If new citations do need to be retireved, annotated, etc then DO NEED to re-populate cache
				if check1 is not None:
					update_check = "no" #no by default


					#flash("alreay exists in database :) ")
					#Using user_input for Information Retireval - checks if any new papers have been added that we need to scrape
					need_to_annotate = run_IR_in_db(user_input, r_conn)


					# if there's nothing to annotate, still double check that all things are there
					# if papers or annotations are missing, then fix that!
					needed_to_rescrape = check_for_texts(user_input, r_conn) #returns yes or no
					logging.info("needed_to_rescrape: " + str(needed_to_rescrape))
					needed_to_annotate_check.append(needed_to_rescrape) #if we needed to re-scrape anything, lets say we need to update
					if needed_to_rescrape == 'yes':
						logging.info("updated needed_to_annotate_check ")

					if need_to_annotate == 'yes':
						needed_to_annotate_check.append('yes')
						logging.info("need to annotate new documents")
						#Annotate
						biodoc_data = do_multi_preprocessing(user_input, r_conn)
						logging.info("done with new document multi_preprocessing")
						#If need_to_annotate is "yes", will re-populate :)
						#Make cache
						print_lemma_nes_samples(user_input, biodoc_data, need_to_annotate)
						logging.info("repopulated lemmas and nes cache")


					#Make sure that the lemma and nes cache exists before moving on!!!
					if need_to_annotate == 'no':
						logging.info("dont need to annotate any new documents")
						if exists_lemma(user_input) and exists_nes(user_input):
							logging.info("lemmas and nes cache exist so pass :)")
							needed_to_annotate_check.append('no')
						else:
							logging.info("lemmas and nes cache didn't exist so gotta make them!!!")
							biodoc_data = do_multi_preprocessing(user_input, r_conn)
							logging.info("done with new document multi_preprocessing")
							nonexistant = "yes"
							print_lemma_nes_samples(user_input, biodoc_data, nonexistant)
							needed_to_annotate_check.append('yes')


					## Now that we have all the data, do the topic model
					## Only want to save final topic model (not running topic model)
					if user_input == pmid_list[-1]:
						logging.info("last pmid in the query")

						#If ANY user_inputs in the query needed to update, we must update the query's comprehensive cache.
						if 'yes' in needed_to_annotate_check:
							need_to_update = 'yes'
							#will over-ride existing file :)
							concat_lemma_nes_samples(query, need_to_update)
							db_query_update_statistics(query, r_conn)
							update_check = "yes"

						if 'yes' not in needed_to_annotate_check:
							need_to_update = 'no'
							concat_lemma_nes_samples(query, need_to_update) #this should check for file
							db_query_update_statistics(query, r_conn)
							update_check = "no"

						# Update "queries" table of db here!!
						logging.info("STARTING db_query_update_statistics")
						db_query_update_statistics(query, r_conn)
						logging.info("finishe db_query_update_statistics")


				#Housekeeping
				gc.collect() #garbage collector for cleaning up unneeded stuff
				session['entered_id'] = True
				session['engaged'] = 'engaged'


		citations_with_links = db_unique_citations_retrieval(query, r_conn) #unique
		r_conn.close()
		logging.info("BENCHMARK TIME: done in %0.3fs." % (time.time() - time_start))
		return render_template('results.html', form=form, citations_with_links=citations_with_links,
	   			query=query, update_check=update_check, base_url=base_url)

	except Exception as e:
		return(str(e))
		r_conn.close()

Beispiel #14

0

Datei anzeigen

Datei: app.py Projekt: zhaoweikb/Science-Citation-Knowledge-Extractor

def results_scifi(query):
	form = corpusOptions()

	base_url = (app.config['BASE_URL'])

	pmid_list = query.split('+')  # list of string pmids
	#decide eligible papers:
	sfr_conn = connection() #scifi results db connection
	eligible_papers = inputEligible(query, sfr_conn)

	logging.info("eligible papers: " +str(eligible_papers))
	if request.method == 'POST':
		logging.info("posted a thing in scifi!")
		corpus = form.corpus.data
		logging.info(corpus)
		if corpus == 'darwin':
			title = 'On The Origin of Species'
		if corpus == 'yeast':
			title = 'Yeast by Thomas Henry Huxley'
		if corpus == 'mouse':
			title = 'The Dancing Mouse, a Study in Animal Behavior'
		if corpus == 'brain_speech':
			title = 'The Brain and The Voice in Speech and Song'
		if corpus == 'grecoroman':
			title = 'Outlines of Greek and Roman Medicine'
		if corpus == 'startrek':
			title = 'Star Trek: The Next Generation'
		if corpus == 'mars':
			title = 'Guilliver of Mars'
		if corpus == 'last_evolution':
			title = 'The Last Evolution by John W Campbell'
		if corpus == 'youth':
			title = 'Youth by Isaac Asimov'
		if corpus == 'frankenstein':
			title = 'Frankenstein; or, The Modern Prometheus'
		if corpus == 'sherlock':
			title = 'Sherlock Holmes by Sir Arthur Conan Doyle'
		if corpus == 'austen':
			title = 'Pride and Prejudice'
		if corpus == 'bible':
			title = 'The Bible'
		if corpus == 'paper1':
			title =  str('PMID: '+ str(eligible_papers[0][1]))
		if corpus == 'paper2':
			title = str('PMID: '+ str(eligible_papers[1][1]))
		if corpus == 'paper3':
			title = str('PMID: ' + str(eligible_papers[2][1]))
		if corpus == 'paper4':
			title = str('PMID: ' + str(eligible_papers[3][1]))
		if corpus == 'paper5':
			title = str('PMID: ' + str(eligible_papers[4][1]))
		x, y, names, color = vis_scifi(corpus, query, eligible_papers, sfr_conn)
		sfr_conn.close()
		return render_template('results_scifi.html', base_url=base_url, x=x, y=y, title=title, color=color, query=query, names=names, eligible_papers=eligible_papers)
	else:
		logging.info("scifi analysis")
		corpus = 'darwin'
		title = 'On The Origin of Species'
		x, y, names, color = vis_scifi(corpus, query, eligible_papers, sfr_conn)
		logging.info("done with x and y")
		if len(eligible_papers) < len(pmid_list):
			flash('Some input paper(s) are not avaliable for TextCompare')
		sfr_conn.close()
		return render_template('results_scifi.html',base_url=base_url, x=x, y=y, title=title, color=color, query=query, names=names, eligible_papers=eligible_papers)