Ejemplo n.º 1
0
		augment_ids = []
		ids = sorted(ids)
		for nid in range(ids[0] - 3, ids[len(ids)-1] + 3):
			augment_ids.append(nid)
		docsents = []
		#for d in docs:
		#	idsql = 'SELECT sentence from esl_sentences where doc=%s;'
		#	cur2.execute(idsql, (d,))
		#	print d
		#	docsents += [row[0] for row in cur2.fetchall()]	
		for i in augment_ids:
			idsql = 'SELECT sentence from esl_sentences where id=%s;'
			cur2.execute(idsql, (i,))
		#	print i
			docsents += [row[0] for row in cur2.fetchall()]	
	
#		control = controls.get_raw_control(sents, candidates, dfs)
		print len(docsents)
        	b = controls.best_control(docsents, candidates, dfs)
		print b
		for s in sents:
			outfile.write(s+'\n')
        	outfile.write("Control: "+b+'\n')
        	outfile.write('\n')

conn.close()
	

logging.info("esl hit creation pipeline - FINISH")

Ejemplo n.º 2
0
				for item in batchiter:
					sents.append(item)
					doc = item[6]
					sentnums.append(item[4])
				#Insert 1 control sentence
				for s in sents:
					sentfile.write(str(s[0]) + ' ')
				sentfile.write('\n')
				justsents = [s[1] for s in sents]
				topwords = controls.get_topn(justsents, dfs, 5)
				querywords = [w for w, freq in topwords]
				cachefile = 'caches/controls.cache.'+str(datetime.datetime.now()).replace('\s', "-")
				#candidatecache = controls.pull_all_candidates_goog_cacheresults(querywords, cachefile)
				candidates = controls.pull_all_candidates_from_cache(querywords)
				if(len(candidates) > 0):
					b = controls.best_control(justsents, candidates, dfs)
					for j in justsents:
						print j
						sentfile.write(j+'\n')
					sentfile.write(str(b[0][0])+'\n\n')
					print b[0]
				
conn.commit()

conn.close()

if(args.reload):	
	outfile.close()
logging.info("esl hit creation pipeline - FINISH")

Ejemplo n.º 3
0
				sentcounts.append(hit_id)

			logging.info("Batch "+str(check)+" added")
			check+=1
			sents = []
			sentids = []
			for item in batchiter:
				doc_id = item[4]
				candidates = controls.pull_candidates(doc_id.split('_')[0])			
				idsql = 'SELECT sentence from esl_sentences where doc_id=%s;'
				cur2.execute(idsql, (doc_id,))
				sents.append(cur2.fetchone()[0])	
				sentids.append(doc_id)
			if(len(candidates) > 0):
				i = 0
        			b = controls.best_control(sents, candidates, dfs, nbest=5)
				for bb in b:
					bbuni = controls.touni(bb[0])
					newb = generrors.randerr(bb[0])
					cid = controls.insert_into_db(hit_id, newb, cur2)
					if(not(cid == -1)):
						outfile.write(bbuni+'\t')
						sql="INSERT INTO esl_hits_data(hit_id,esl_sentence_id,language_id,sentence_num)VALUES(%s,%s,%s,%s);"
						cur2.execute(sql,(hit_id, cid, lang_id, i))
				outfile.write('\n')
			conn.commit()
	else:
		check = 0

		cachedsents = codecs.open('controls.log.bk', encoding='utf-8', mode='r')
		for hit in cachedsents.readlines():