Beispiel #1
0
def fill_site_table(sitefile):
	f = open(sitefile)
	print 'Fill the Site table'
	for c in f:
		strs = c.strip().split('\t')
		qry = 'insert into fedtask_site (site_id, site_name, site_url, category) values("%s", "%s", "%s", "%s") on duplicate key update site_name=site_name, site_url=site_url, category = category'%(strs[0], strs[1], strs[2], strs[3])
		db.run_qry(qry, conn)
	f.close()		 
Beispiel #2
0
def fill_site_table(sitefile):
    f = open(sitefile)
    print 'Fill the Site table'
    for c in f:
        strs = c.strip().split('\t')
        qry = 'insert into fedtask_site (site_id, site_name, site_url, category) values("%s", "%s", "%s", "%s") on duplicate key update site_name=site_name, site_url=site_url, category = category' % (
            strs[0], strs[1], strs[2], strs[3])
        db.run_qry(qry, conn)
    f.close()
Beispiel #3
0
def fill_qrels_table(qrels):
	f = open(qrels)
	print 'Fill the Qrels table'
	for c in f:
		strs = c.strip().split(' ')
		# Only store relevant docs
		if not int(strs[-1]) == 0:
			#qrels.append((strs[0], strs[2], strs[3]))
			qry = 'insert into fedtask_qrels (topic_id, doc_id, relevance) values (%s, "%s", %s)'%(strs[0], strs[2], strs[3])
			db.run_qry(qry, conn)
	f.close()		
Beispiel #4
0
def fill_topic_table(topicfile):
	f = open(topicfile)
	print 'Fill the Topic table'
	topics = []
	for c in f:
		strs = c.strip().split(':')
		qry = 'insert into fedtask_topic (topic_id, topic_text) values(%s, "%s") on duplicate key update topic_text=topic_text;'%(strs[0], strs[1].replace('"', '\\"'))
		db.run_qry(qry, conn)
		topics.append(strs)
	f.close()
	return topics
Beispiel #5
0
def fill_qrels_table(qrels):
    f = open(qrels)
    print 'Fill the Qrels table'
    for c in f:
        strs = c.strip().split(' ')
        # Only store relevant docs
        if not int(strs[-1]) == 0:
            #qrels.append((strs[0], strs[2], strs[3]))
            qry = 'insert into fedtask_qrels (topic_id, doc_id, relevance) values (%s, "%s", %s)' % (
                strs[0], strs[2], strs[3])
            db.run_qry(qry, conn)
    f.close()
Beispiel #6
0
def fill_topic_table(topicfile):
    f = open(topicfile)
    print 'Fill the Topic table'
    topics = []
    for c in f:
        strs = c.strip().split(':')
        qry = 'insert into fedtask_topic (topic_id, topic_text) values(%s, "%s") on duplicate key update topic_text=topic_text;' % (
            strs[0], strs[1].replace('"', '\\"'))
        db.run_qry(qry, conn)
        topics.append(strs)
    f.close()
    return topics
Beispiel #7
0
def fill_doc_table(snippets_loc, docs_loc):
    print 'Process snippets and documents'
    # Get all sites
    qry = 'select distinct site_id from fedtask_site'
    sites = db.run_qry_with_results(qry, conn)
    # Get all topics
    qry = 'select distinct topic_id from fedtask_topic'
    topics = db.run_qry_with_results(qry, conn)
    # Get snippets of (site, topics)
    for s in sites:
        site_id = s[0]
        for t in topics:
            snippet_file = '%s/%s/%s.xml' % (snippets_loc, s[0], t[0])
            tree = et.parse(snippet_file)
            root = tree.getroot()
            # Snippet element
            snippets = root.find('search_results')[2]
            # Get info of each doc: doc_id, title, summary, url
            for sn in snippets:
                docid = sn.get('id')
                url = sn.find('link')
                title = sn.find('title')
                summary = sn.find('description')
                # Page link
                s_url = url.text
                sn_url = ''
                if not s_url == None:
                    sn_url = s_url.replace('"', '\\"')
                # HTML_location
                doc_loc = url.get('cache')
                if doc_loc == None:
                    doc_loc = ''
                # title
                sn_title = ''
                if not title == None:
                    s_title = title.text
                    if not s_title == None:
                        sn_title = s_title.replace('"', '\\"').encode('utf-8')
                # summary
                sn_summary = ''
                if not summary == None:
                    s_summary = summary.text
                    if not s_summary == None:
                        s_summary = s_summary.replace('\\', '')
                        sn_summary = s_summary.replace('"',
                                                       '\\"').encode('utf-8')

                qry = 'insert into fedtask_document (doc_id, site_id, title, url, html_location, summary) values ("%s", "%s", "%s", "%s", "%s", "%s")' % (
                    docid, site_id, sn_title, sn_url, doc_loc, sn_summary)
                db.run_qry(qry, conn)
Beispiel #8
0
def fill_doc_table(snippets_loc, docs_loc):
	print 'Process snippets and documents'
	# Get all sites
	qry = 'select distinct site_id from fedtask_site'
	sites = db.run_qry_with_results(qry, conn)
	# Get all topics
	qry = 'select distinct topic_id from fedtask_topic'
	topics = db.run_qry_with_results(qry, conn)
	# Get snippets of (site, topics)
	for s in sites:
		site_id = s[0]
		for t in topics:
			snippet_file = '%s/%s/%s.xml'%(snippets_loc, s[0], t[0])
			tree = et.parse(snippet_file)
			root = tree.getroot()
			# Snippet element
			snippets = root.find('search_results')[2]
			# Get info of each doc: doc_id, title, summary, url
			for sn in snippets:
				docid = sn.get('id')
				url = sn.find('link')
				title = sn.find('title')
				summary = sn.find('description')
				# Page link
				s_url = url.text
				sn_url = ''
				if not s_url == None:
					sn_url = s_url.replace('"', '\\"')
				# HTML_location
				doc_loc = url.get('cache')
				if doc_loc == None:
					doc_loc = ''
				# title
				sn_title = ''
				if not title == None:
					s_title = title.text
					if not s_title == None:
						sn_title = s_title.replace('"', '\\"').encode('utf-8')
				# summary
				sn_summary = ''
				if not summary == None: 
					s_summary = summary.text					
					if not s_summary == None:
						s_summary = s_summary.replace('\\', '') 
						sn_summary = s_summary.replace('"', '\\"').encode('utf-8')

				qry = 'insert into fedtask_document (doc_id, site_id, title, url, html_location, summary) values ("%s", "%s", "%s", "%s", "%s", "%s")'%(docid, site_id, sn_title, sn_url, doc_loc, sn_summary)	
				db.run_qry(qry, conn)
Beispiel #9
0
def fill_run_table(rundir):
	files = os.listdir(rundir)
	for runfile in files:
		f = open('%s/%s'%(rundir, runfile))
		# Insert to run table
		qry = 'select max(run_id) from fedtask_run'
		res = db.run_qry_with_results(qry, conn)
		if res[0][0] == None:
			run_id = 1
		else:
			run_id = res[0][0] + 1

		run_desc = runfile
		qry = 'insert into fedtask_run (run_id, description) values (%s, "%s")'%(run_id, run_desc)
		db.run_qry(qry, conn)

		print 'Precessing run %s: %s'%(run_id, run_desc)
		current_q = ''
		docs = []
		for c in f:
			strs = c.strip().split(' ')
			qid = strs[0]
			docid = strs[2]	
			if not current_q == qid:
				if not current_q == '':
					ranklist = simplejson.dumps(docs)
					qry = "insert into fedtask_ranklist (run_id, topic_id, ranklist) values(%s, %s, '%s')"%(run_id, current_q, ranklist)
					db.run_qry(qry, conn)
					docs = []
				current_q = qid
			docs.append(docid)
		ranklist = simplejson.dumps(docs)
		qry = "insert into fedtask_ranklist (run_id, topic_id, ranklist) values(%s, %s, '%s')"%(run_id, current_q, ranklist)
		db.run_qry(qry, conn)
		f.close()	
Beispiel #10
0
def fill_run_table(rundir):
    files = os.listdir(rundir)
    for runfile in files:
        f = open('%s/%s' % (rundir, runfile))
        # Insert to run table
        qry = 'select max(run_id) from fedtask_run'
        res = db.run_qry_with_results(qry, conn)
        if res[0][0] == None:
            run_id = 1
        else:
            run_id = res[0][0] + 1

        run_desc = runfile
        qry = 'insert into fedtask_run (run_id, description) values (%s, "%s")' % (
            run_id, run_desc)
        db.run_qry(qry, conn)

        print 'Precessing run %s: %s' % (run_id, run_desc)
        current_q = ''
        docs = []
        for c in f:
            strs = c.strip().split(' ')
            qid = strs[0]
            docid = strs[2]
            if not current_q == qid:
                if not current_q == '':
                    ranklist = simplejson.dumps(docs)
                    qry = "insert into fedtask_ranklist (run_id, topic_id, ranklist) values(%s, %s, '%s')" % (
                        run_id, current_q, ranklist)
                    db.run_qry(qry, conn)
                    docs = []
                current_q = qid
            docs.append(docid)
        ranklist = simplejson.dumps(docs)
        qry = "insert into fedtask_ranklist (run_id, topic_id, ranklist) values(%s, %s, '%s')" % (
            run_id, current_q, ranklist)
        db.run_qry(qry, conn)
        f.close()
Beispiel #11
0
import sys, simplejson
import os
sys.path.append(os.path.abspath('../fw_userstudy/').rsplit('/', 1)[0])
from fw_userstudy import settings
import db_util as db

DB = settings.DATABASES['default']
user = DB['USER']
passwd = DB['PASSWORD']
database = DB['NAME']
host = DB['HOST']
conn = db.db_connect(host, user, passwd, database)

print 'Storing UIs'
qry = 'delete from fedtask_ui'
db.run_qry(qry, conn)
for u_id in UI:
    qry = 'insert into fedtask_ui (ui_id, ui_description) values(%s, "%s")' % (
        u_id, UI[u_id])
    db.run_qry(qry, conn)

print 'Storing RUNS'
qry = 'delete from fedtask_run'
db.run_qry(qry, conn)
for run_id in RUNS:
    qry = 'insert into fedtask_run (run_id, description)\
			values(%s,"%s")' % (run_id, RUNS[run_id])
    db.run_qry(qry, conn)

# Make tasks
print "Fill task table"
Beispiel #12
0
import sys,simplejson
import os
sys.path.append(os.path.abspath('../fw_userstudy/').rsplit('/', 1)[0])
from fw_userstudy import settings
import db_util as db

DB = settings.DATABASES['default']
user = DB['USER']
passwd = DB['PASSWORD']
database = DB['NAME']
host = DB['HOST']
conn = db.db_connect(host, user, passwd, database)

print 'Storing UIs'
qry = 'delete from fedtask_ui'
db.run_qry(qry, conn)
for u_id in UI:
	qry = 'insert into fedtask_ui (ui_id, ui_description) values(%s, "%s")'%(u_id, UI[u_id])
	db.run_qry(qry, conn)

print 'Storing RUNS'
qry = 'delete from fedtask_run'
db.run_qry(qry, conn)
for run_id in RUNS:
	qry = 'insert into fedtask_run (run_id, description)\
			values(%s,"%s")'%(run_id, RUNS[run_id])
	db.run_qry(qry, conn)


# Make tasks	
print "Fill task table"
Beispiel #13
0
def clear_tables():
    print 'Clear Qrels table'
    qry = 'delete from fedtask_qrels'
    db.run_qry(qry, conn)
    print 'Clear Document table'
    qry = 'delete from fedtask_document'
    db.run_qry(qry, conn)
    print 'Clear ranklist table'
    qry = 'delete from fedtask_ranklist'
    db.run_qry(qry, conn)
    print 'Clear Topic table'
    qry = 'delete from fedtask_topic'
    db.run_qry(qry, conn)
    print 'Clear Site table'
    qry = 'delete from fedtask_site'
    db.run_qry(qry, conn)
    print 'Clear Run table'
    qry = 'delete from fedtask_run'
    db.run_qry(qry, conn)
Beispiel #14
0
def clear_tables():
	print 'Clear Qrels table'
	qry = 'delete from fedtask_qrels'
	db.run_qry(qry, conn)
	print 'Clear Document table'
	qry = 'delete from fedtask_document'
	db.run_qry(qry, conn)
	print 'Clear ranklist table'
	qry = 'delete from fedtask_ranklist'
	db.run_qry(qry, conn)
	print 'Clear Topic table'
	qry = 'delete from fedtask_topic'
	db.run_qry(qry, conn)
	print 'Clear Site table'
	qry = 'delete from fedtask_site'
	db.run_qry(qry, conn)
	print 'Clear Run table'
	qry = 'delete from fedtask_run'
	db.run_qry(qry, conn)