def fill_site_table(sitefile): f = open(sitefile) print 'Fill the Site table' for c in f: strs = c.strip().split('\t') qry = 'insert into fedtask_site (site_id, site_name, site_url, category) values("%s", "%s", "%s", "%s") on duplicate key update site_name=site_name, site_url=site_url, category = category'%(strs[0], strs[1], strs[2], strs[3]) db.run_qry(qry, conn) f.close()
def fill_site_table(sitefile): f = open(sitefile) print 'Fill the Site table' for c in f: strs = c.strip().split('\t') qry = 'insert into fedtask_site (site_id, site_name, site_url, category) values("%s", "%s", "%s", "%s") on duplicate key update site_name=site_name, site_url=site_url, category = category' % ( strs[0], strs[1], strs[2], strs[3]) db.run_qry(qry, conn) f.close()
def fill_qrels_table(qrels): f = open(qrels) print 'Fill the Qrels table' for c in f: strs = c.strip().split(' ') # Only store relevant docs if not int(strs[-1]) == 0: #qrels.append((strs[0], strs[2], strs[3])) qry = 'insert into fedtask_qrels (topic_id, doc_id, relevance) values (%s, "%s", %s)'%(strs[0], strs[2], strs[3]) db.run_qry(qry, conn) f.close()
def fill_topic_table(topicfile): f = open(topicfile) print 'Fill the Topic table' topics = [] for c in f: strs = c.strip().split(':') qry = 'insert into fedtask_topic (topic_id, topic_text) values(%s, "%s") on duplicate key update topic_text=topic_text;'%(strs[0], strs[1].replace('"', '\\"')) db.run_qry(qry, conn) topics.append(strs) f.close() return topics
def fill_qrels_table(qrels): f = open(qrels) print 'Fill the Qrels table' for c in f: strs = c.strip().split(' ') # Only store relevant docs if not int(strs[-1]) == 0: #qrels.append((strs[0], strs[2], strs[3])) qry = 'insert into fedtask_qrels (topic_id, doc_id, relevance) values (%s, "%s", %s)' % ( strs[0], strs[2], strs[3]) db.run_qry(qry, conn) f.close()
def fill_topic_table(topicfile): f = open(topicfile) print 'Fill the Topic table' topics = [] for c in f: strs = c.strip().split(':') qry = 'insert into fedtask_topic (topic_id, topic_text) values(%s, "%s") on duplicate key update topic_text=topic_text;' % ( strs[0], strs[1].replace('"', '\\"')) db.run_qry(qry, conn) topics.append(strs) f.close() return topics
def fill_doc_table(snippets_loc, docs_loc): print 'Process snippets and documents' # Get all sites qry = 'select distinct site_id from fedtask_site' sites = db.run_qry_with_results(qry, conn) # Get all topics qry = 'select distinct topic_id from fedtask_topic' topics = db.run_qry_with_results(qry, conn) # Get snippets of (site, topics) for s in sites: site_id = s[0] for t in topics: snippet_file = '%s/%s/%s.xml' % (snippets_loc, s[0], t[0]) tree = et.parse(snippet_file) root = tree.getroot() # Snippet element snippets = root.find('search_results')[2] # Get info of each doc: doc_id, title, summary, url for sn in snippets: docid = sn.get('id') url = sn.find('link') title = sn.find('title') summary = sn.find('description') # Page link s_url = url.text sn_url = '' if not s_url == None: sn_url = s_url.replace('"', '\\"') # HTML_location doc_loc = url.get('cache') if doc_loc == None: doc_loc = '' # title sn_title = '' if not title == None: s_title = title.text if not s_title == None: sn_title = s_title.replace('"', '\\"').encode('utf-8') # summary sn_summary = '' if not summary == None: s_summary = summary.text if not s_summary == None: s_summary = s_summary.replace('\\', '') sn_summary = s_summary.replace('"', '\\"').encode('utf-8') qry = 'insert into fedtask_document (doc_id, site_id, title, url, html_location, summary) values ("%s", "%s", "%s", "%s", "%s", "%s")' % ( docid, site_id, sn_title, sn_url, doc_loc, sn_summary) db.run_qry(qry, conn)
def fill_doc_table(snippets_loc, docs_loc): print 'Process snippets and documents' # Get all sites qry = 'select distinct site_id from fedtask_site' sites = db.run_qry_with_results(qry, conn) # Get all topics qry = 'select distinct topic_id from fedtask_topic' topics = db.run_qry_with_results(qry, conn) # Get snippets of (site, topics) for s in sites: site_id = s[0] for t in topics: snippet_file = '%s/%s/%s.xml'%(snippets_loc, s[0], t[0]) tree = et.parse(snippet_file) root = tree.getroot() # Snippet element snippets = root.find('search_results')[2] # Get info of each doc: doc_id, title, summary, url for sn in snippets: docid = sn.get('id') url = sn.find('link') title = sn.find('title') summary = sn.find('description') # Page link s_url = url.text sn_url = '' if not s_url == None: sn_url = s_url.replace('"', '\\"') # HTML_location doc_loc = url.get('cache') if doc_loc == None: doc_loc = '' # title sn_title = '' if not title == None: s_title = title.text if not s_title == None: sn_title = s_title.replace('"', '\\"').encode('utf-8') # summary sn_summary = '' if not summary == None: s_summary = summary.text if not s_summary == None: s_summary = s_summary.replace('\\', '') sn_summary = s_summary.replace('"', '\\"').encode('utf-8') qry = 'insert into fedtask_document (doc_id, site_id, title, url, html_location, summary) values ("%s", "%s", "%s", "%s", "%s", "%s")'%(docid, site_id, sn_title, sn_url, doc_loc, sn_summary) db.run_qry(qry, conn)
def fill_run_table(rundir): files = os.listdir(rundir) for runfile in files: f = open('%s/%s'%(rundir, runfile)) # Insert to run table qry = 'select max(run_id) from fedtask_run' res = db.run_qry_with_results(qry, conn) if res[0][0] == None: run_id = 1 else: run_id = res[0][0] + 1 run_desc = runfile qry = 'insert into fedtask_run (run_id, description) values (%s, "%s")'%(run_id, run_desc) db.run_qry(qry, conn) print 'Precessing run %s: %s'%(run_id, run_desc) current_q = '' docs = [] for c in f: strs = c.strip().split(' ') qid = strs[0] docid = strs[2] if not current_q == qid: if not current_q == '': ranklist = simplejson.dumps(docs) qry = "insert into fedtask_ranklist (run_id, topic_id, ranklist) values(%s, %s, '%s')"%(run_id, current_q, ranklist) db.run_qry(qry, conn) docs = [] current_q = qid docs.append(docid) ranklist = simplejson.dumps(docs) qry = "insert into fedtask_ranklist (run_id, topic_id, ranklist) values(%s, %s, '%s')"%(run_id, current_q, ranklist) db.run_qry(qry, conn) f.close()
def fill_run_table(rundir): files = os.listdir(rundir) for runfile in files: f = open('%s/%s' % (rundir, runfile)) # Insert to run table qry = 'select max(run_id) from fedtask_run' res = db.run_qry_with_results(qry, conn) if res[0][0] == None: run_id = 1 else: run_id = res[0][0] + 1 run_desc = runfile qry = 'insert into fedtask_run (run_id, description) values (%s, "%s")' % ( run_id, run_desc) db.run_qry(qry, conn) print 'Precessing run %s: %s' % (run_id, run_desc) current_q = '' docs = [] for c in f: strs = c.strip().split(' ') qid = strs[0] docid = strs[2] if not current_q == qid: if not current_q == '': ranklist = simplejson.dumps(docs) qry = "insert into fedtask_ranklist (run_id, topic_id, ranklist) values(%s, %s, '%s')" % ( run_id, current_q, ranklist) db.run_qry(qry, conn) docs = [] current_q = qid docs.append(docid) ranklist = simplejson.dumps(docs) qry = "insert into fedtask_ranklist (run_id, topic_id, ranklist) values(%s, %s, '%s')" % ( run_id, current_q, ranklist) db.run_qry(qry, conn) f.close()
import sys, simplejson import os sys.path.append(os.path.abspath('../fw_userstudy/').rsplit('/', 1)[0]) from fw_userstudy import settings import db_util as db DB = settings.DATABASES['default'] user = DB['USER'] passwd = DB['PASSWORD'] database = DB['NAME'] host = DB['HOST'] conn = db.db_connect(host, user, passwd, database) print 'Storing UIs' qry = 'delete from fedtask_ui' db.run_qry(qry, conn) for u_id in UI: qry = 'insert into fedtask_ui (ui_id, ui_description) values(%s, "%s")' % ( u_id, UI[u_id]) db.run_qry(qry, conn) print 'Storing RUNS' qry = 'delete from fedtask_run' db.run_qry(qry, conn) for run_id in RUNS: qry = 'insert into fedtask_run (run_id, description)\ values(%s,"%s")' % (run_id, RUNS[run_id]) db.run_qry(qry, conn) # Make tasks print "Fill task table"
import sys,simplejson import os sys.path.append(os.path.abspath('../fw_userstudy/').rsplit('/', 1)[0]) from fw_userstudy import settings import db_util as db DB = settings.DATABASES['default'] user = DB['USER'] passwd = DB['PASSWORD'] database = DB['NAME'] host = DB['HOST'] conn = db.db_connect(host, user, passwd, database) print 'Storing UIs' qry = 'delete from fedtask_ui' db.run_qry(qry, conn) for u_id in UI: qry = 'insert into fedtask_ui (ui_id, ui_description) values(%s, "%s")'%(u_id, UI[u_id]) db.run_qry(qry, conn) print 'Storing RUNS' qry = 'delete from fedtask_run' db.run_qry(qry, conn) for run_id in RUNS: qry = 'insert into fedtask_run (run_id, description)\ values(%s,"%s")'%(run_id, RUNS[run_id]) db.run_qry(qry, conn) # Make tasks print "Fill task table"
def clear_tables(): print 'Clear Qrels table' qry = 'delete from fedtask_qrels' db.run_qry(qry, conn) print 'Clear Document table' qry = 'delete from fedtask_document' db.run_qry(qry, conn) print 'Clear ranklist table' qry = 'delete from fedtask_ranklist' db.run_qry(qry, conn) print 'Clear Topic table' qry = 'delete from fedtask_topic' db.run_qry(qry, conn) print 'Clear Site table' qry = 'delete from fedtask_site' db.run_qry(qry, conn) print 'Clear Run table' qry = 'delete from fedtask_run' db.run_qry(qry, conn)