コード例 #1
0
def recoverSubtopic():

    subtopic_id = int(sys.argv[1])
    atn_db  = DBHandler('./database/test.db')
    atn_db.cur.execute('UPDATE subtopic SET state=0 WHERE subtopic_id=?', [subtopic_id])
    atn_db.cur.execute(
        '''
        UPDATE filter_list SET state=1 
        WHERE topic_id = (SELECT topic_id FROM subtopic WHERE subtopic_id=?)
        AND docno IN (
        SELECT DISTINCT passage.docno FROM passage
        WHERE passage.subtopic_id=?
        AND passage.state=0) AND state!=1
        ''',[subtopic_id, subtopic_id])
    atn_db.cur.execute(
        '''
        INSERT INTO filter_list (topic_id, docno, state)
        SELECT DISTINCT subtopic.topic_id, passage.docno, 1 FROM subtopic, passage
        WHERE subtopic.subtopic_id = passage.subtopic_id
        AND subtopic.subtopic_id=?
        AND passage.state = 0
        AND passage.docno NOT in (SELECT docno FROM filter_list WHERE topic_id = subtopic.topic_id); 
        ''', [subtopic_id])
    atn_db.commit()
    atn_db.close()
コード例 #2
0
ファイル: findmore2.py プロジェクト: cslovell/QA_interface
def getDocList1():
    topic_id, subtopic_id = int(sys.argv[1]), int(sys.argv[2])
    atn_db = DBHandler('../../../database/test.db')

    atn_db.cur.execute(
        'SELECT userid, domain_id, topic_name FROM topic WHERE topic_id=?',
        [topic_id])
    userid, domain_id, topic_name = atn_db.cur.fetchone()

    atn_db.cur.execute('SELECT username FROM user WHERE userid=?', [userid])
    username, = atn_db.cur.fetchone()

    atn_db.cur.execute(
        'SELECT subtopic_name FROM subtopic WHERE subtopic_id=?',
        [subtopic_id])
    subtopic_name, = atn_db.cur.fetchone()

    corpus = ['EBOLA', 'POLAR', 'WEAPON'][domain_id - 1]
    r = requests.get(
        nistURL +
        "CMD=UID=%d TID=%d STID=%d.%d CO=%s CMD=MORE_LIKE_THIS DATA=-" %
        (userid, topic_id, topic_id, subtopic_id, corpus),
        verify=False)

    #mylog.log_nist_findmore(username, sys.argv[1], topic_name, sys.argv[2], subtopic_name+"::"+r.url+"::")

    docs = r.content.split('\n')
    for doc in docs:
        if doc:
            print doc.split()[0]
コード例 #3
0
def userAuthentication(username, password):
    user_db = DBHandler(db_path.user)
    result = None
    user_db.cur.execute(
        'SELECT userid, username, usercookie FROM user WHERE username = ? AND password = ?',
        [username, password])
    result = user_db.cur.fetchone()
    user_db.close()
    return result
コード例 #4
0
def dupsummary():
    atn_db = DBHandler("./database/test.db")
    fh = open('./view/nonrelevant.csv','w')
    atn_db.cur.execute('''
        SELECT filter_list.topic_id, filter_list.docno FROM filter_list, topic 
        WHERE filter_list.topic_id=topic.topic_id
        AND topic.state!=2 
        AND topic.userid<=6
        AND filter_list.state=2
        ORDER BY filter_list.topic_id
        ''')
    dups = atn_db.cur.fetchall()
    for dup in dups:
        fh.write(str(dup[0])+','+dup[1]+'\n')
    fh.close()
コード例 #5
0
def cookieAuthentication(env):
    user_db = DBHandler(db_path.user)
    result = None
    if 'HTTP_COOKIE' in env:
        for pair in env['HTTP_COOKIE'].split(';'):
            cookie = pair.strip()
            if cookie.startswith('usercookie'):
                key, value = cookie.split('=')
                user_db.cur.execute(
                    'SELECT userid, username, usercookie FROM user WHERE usercookie = ?',
                    [
                        value,
                    ])
                result = user_db.cur.fetchone()
                break
    user_db.close()
    return result
コード例 #6
0
ファイル: dupTopic.py プロジェクト: cslovell/QA_interface
def dupTopic():

    userid = 30
    topic_id = 391
    # copy this topic to this userid

    atn_db = DBHandler('./database/test.db')
    atn_db.insert('topic', [
        None, "slums and orphans _ debug", None, userid, 1, 'L', 'L', '', '', 0
    ])
    new_tid = atn_db.cur.lastrowid

    atn_db.cur.execute('SELECT * FROM subtopic WHERE topic_id=? AND state=0',
                       [topic_id])
    subtopics = atn_db.cur.fetchall()
    for subtopic in subtopics:
        atn_db.insert('subtopic',
                      [None, subtopic[1] + ' _ debug', new_tid, 0, 0])
        new_sid = atn_db.cur.lastrowid
        atn_db.cur.execute(
            'SELECT * FROM passage WHERE subtopic_id=? AND state=0',
            [subtopic[0]])
        passages = atn_db.cur.fetchall()
        for passage in passages:
            atn_db.insert(
                'passage',
                [None, passage[1], passage[2], 0, 0, passage[5], new_sid, 0])

    atn_db.cur.execute('SELECT docno, state FROM filter_list WHERE topic_id=?',
                       [topic_id])

    fdocs = atn_db.cur.fetchall()

    for fdoc in fdocs:
        docno, state = fdoc
        atn_db.insert('filter_list', [new_tid, docno, state])

    atn_db.commit()
    atn_db.close()
コード例 #7
0
ファイル: app.py プロジェクト: omukazu/covid-19-api
from flask_cors import CORS

from util import load_config
from database import DBHandler
from constants import TOPICS, COUNTRIES

here = os.path.dirname(os.path.abspath(__file__))
cfg = load_config()

app = Flask(__name__)
CORS(app, origins=cfg['access_control_allow_origin'])

mongo = DBHandler(
    host=cfg['database']['host'],
    port=cfg['database']['port'],
    db_name=cfg['database']['db_name'],
    collection_name=cfg['database']['collection_name'],
    es_host=cfg['es']['host'],
    es_port=cfg['es']['port'],
)


class InvalidUsage(Exception):

    status_code = 400

    def __init__(self, message, status_code=None, payload=None):
        Exception.__init__(self)
        self.message = message
        if status_code is not None:
            self.status_code = status_code
        self.payload = payload
コード例 #8
0
ファイル: crawler.py プロジェクト: mdrafiur/WebSearchEngine
    def __init__(self, db_conn, url_file):
        """Initialize the crawler with a connection to the database to populate
        and with the file containing the list of seed URLs to begin indexing."""
        self._url_queue = []
        self._doc_id_cache = {}
        self._word_id_cache = {}

        self._url_list = {}
        self._word_list = {}
        self._inverted_index = {}
        self._resolved_inverted_index = {}
        self._link_list = []

        self._db = DBHandler()

        # functions to call when entering and exiting specific tags
        self._enter = defaultdict(lambda *a, **ka: self._visit_ignore)
        self._exit = defaultdict(lambda *a, **ka: self._visit_ignore)

        # add a link to our graph, and indexing info to the related page
        self._enter['a'] = self._visit_a

        # record the currently indexed document's title an increase
        # the font size
        def visit_title(*args, **kargs):
            self._visit_title(*args, **kargs)
            self._increase_font_factor(7)(*args, **kargs)

        # increase the font size when we enter these tags
        self._enter['b'] = self._increase_font_factor(2)
        self._enter['strong'] = self._increase_font_factor(2)
        self._enter['i'] = self._increase_font_factor(1)
        self._enter['em'] = self._increase_font_factor(1)
        self._enter['h1'] = self._increase_font_factor(7)
        self._enter['h2'] = self._increase_font_factor(6)
        self._enter['h3'] = self._increase_font_factor(5)
        self._enter['h4'] = self._increase_font_factor(4)
        self._enter['h5'] = self._increase_font_factor(3)
        self._enter['title'] = visit_title

        # decrease the font size when we exit these tags
        self._exit['b'] = self._increase_font_factor(-2)
        self._exit['strong'] = self._increase_font_factor(-2)
        self._exit['i'] = self._increase_font_factor(-1)
        self._exit['em'] = self._increase_font_factor(-1)
        self._exit['h1'] = self._increase_font_factor(-7)
        self._exit['h2'] = self._increase_font_factor(-6)
        self._exit['h3'] = self._increase_font_factor(-5)
        self._exit['h4'] = self._increase_font_factor(-4)
        self._exit['h5'] = self._increase_font_factor(-3)
        self._exit['title'] = self._increase_font_factor(-7)

        # never go in and parse these tags
        self._ignored_tags = set([
            'meta',
            'script',
            'link',
            'meta',
            'embed',
            'iframe',
            'frame',
            'noscript',
            'object',
            'svg',
            'canvas',
            'applet',
            'frameset',
            'textarea',
            'style',
            'area',
            'map',
            'base',
            'basefont',
            'param',
        ])

        # set of words to ignore
        self._ignored_words = set([
            '',
            'the',
            'of',
            'at',
            'on',
            'in',
            'is',
            'it',
            'a',
            'b',
            'c',
            'd',
            'e',
            'f',
            'g',
            'h',
            'i',
            'j',
            'k',
            'l',
            'm',
            'n',
            'o',
            'p',
            'q',
            'r',
            's',
            't',
            'u',
            'v',
            'w',
            'x',
            'y',
            'z',
            'and',
            'or',
        ])

        # TODO remove me in real version
        self._mock_next_doc_id = 1
        self._mock_next_word_id = 1

        # keep track of some info about the page we are currently parsing
        self._curr_depth = 0
        self._curr_url = ""
        self._curr_doc_id = 0
        self._font_size = 0
        self._curr_words = None

        # get all urls into the queue
        try:
            with open(url_file, 'r') as f:
                for line in f:
                    self._url_queue.append((self._fix_url(line.strip(),
                                                          ""), 0))
        except IOError:
            pass
コード例 #9
0
#from flask.ext.restful import Api, Resource, reqparse
from flask_restful import Api, Resource, reqparse
from flask_restful.utils import cors
#from flask.ext.restful.utils import cors
#from flask.ext.cors import CORS
from flask_cors import CORS

from modules.ssh import QoSHandler

app = Flask(__name__)
CORS(app)
api = Api(app)

config = json.load(open('./config.json', 'r'))

db_handler = DBHandler(config)


class UserAPI(Resource):
    def __init__(self):
        self.reqparse = reqparse.RequestParser()

        super(UserAPI, self).__init__()

    @cors.crossdomain(origin='*')
    def get(self, uid):
        users = []
        if uid != 'all':
            users = uid.split(',')

        resultset = db_handler.get_users(users)
コード例 #10
0
ファイル: frontend.py プロジェクト: mdrafiur/WebSearchEngine
def do_search(keywords):

    global user_top_20_database

    # Fetch the current session
    request_session = request.environ['beaker.session']
    # Fetch the users email for their session
    user_email = request_session.get('user_email', 'Anonymous')

    if reduce(and_, map(lambda c: c in math_chars, keywords)):
        result = None
        try:
            result = eval(
                keywords.replace('^', '**').replace('[',
                                                    '(').replace(']', ')'))
            return result_template(
                user_email, keywords,
                template('''
				<p> {{keywords}} = {{result}} </p>
				''',
                         keywords=keywords,
                         result=result))
        except Exception as e:
            pass

    # A list of all keywords from the search query.
    keyword_list = map(str.lower, keywords.split())
    keywords = keyword_list
    #-----------------------------------------------------------------------
    counted_keyword_list = [(keyword_list.count(x), x)
                            for x in set(keyword_list)]
    # Sort the list in descending order of frequency.
    counted_keyword_list.sort(key=wordCount, reverse=1)

    page = request.query.get('page')
    if user_email <> 'anonymous' and page == None:
        # Fetch the top 20 list for that users email
        user_top_20 = user_top_20_database.get(user_email)

        if user_top_20 != None:
            # Add to the top 20 list and update totals.
            # Iterate through the counted keyword list.
            for keywords1 in counted_keyword_list:
                # If any keywords are already in the top 20 list, merge them into the top 20 list.
                if any(keywords1[1] in element for element in user_top_20):
                    # Iterator to keep track of which keyword in the top 20 list we are at.
                    i = 0
                    # Iterate through the keyword pairs and add the values from the counted_keyword_list into the top20 list.
                    for keywords2 in user_top_20:
                        # If the keywords match.
                        if keywords2[1] == keywords1[1]:
                            # Save the count value of the user_top_20 version.
                            keyword_count = keywords2[0]
                            # Delete the old user_top_20 keyword and count.
                            del user_top_20[i]
                            # Add the keyword with updated count to the front of the top_20 list.
                            user_top_20.insert(
                                0,
                                ((keywords1[0] + keyword_count), keywords1[1]))
                        # Iterate
                        i = i + 1

                # If the word isn't already in the top 20 list add it.
                else:
                    user_top_20.append(keywords1)

            # Organize the top 20 list in decending order by the frequency of a keyword.
            user_top_20.sort(key=wordCount, reverse=1)

            # Update the database of user search history
            user_top_20_database["user_email"] = user_top_20

            # If the user_top_20 list is longer than 20 keywords, trim it.
            # while len(user_top_20) > 20:
            #	del user_top_20[-1]


#------------------------------------------------------------------------

# Grab the first keyword that was inputted by the user
    if keyword_list == []:
        results_list = []
        return generate_page_results(1, results_list, [], user_email)

    if page == None:
        page = 1
    else:
        page = int(page)

    db = DBHandler()

    # Get the word_ids through a getter in the database
    word_ids = []
    ignored_words = set([
        '',
        'the',
        'of',
        'at',
        'on',
        'in',
        'is',
        'it',
        'a',
        'b',
        'c',
        'd',
        'e',
        'f',
        'g',
        'h',
        'i',
        'j',
        'k',
        'l',
        'm',
        'n',
        'o',
        'p',
        'q',
        'r',
        's',
        't',
        'u',
        'v',
        'w',
        'x',
        'y',
        'z',
        'and',
        'or',
    ])

    for keyword in keyword_list:
        if keyword in ignored_words:
            continue
        word_ids.append(db.get_word_id(keyword))

    # Get the doc_ids from the word_ids in the database
    list_of_doc_id_lists = []
    for word_id in word_ids:
        if word_id == None:
            list_of_doc_id_lists.append([])
        else:
            list_of_doc_id_lists.append(db.get_doc_ids(word_id))

    # Find lists of doc_ids that intersect with each other, this will give us doc ids that contain both keywords
    intersecting_doc_ids = find_intersections(list_of_doc_id_lists)

    # Get the url_ranks from pagerank in the database
    ranks = db.get_pageranks(intersecting_doc_ids)

    # Zip the doc_ids with the corresponding url_ranks to make ranked_doc_ids
    ranked_doc_ids = zip(ranks, intersecting_doc_ids)

    # Sort the ranked_doc_ids to make sorted_doc_ids and get the sorted_urls from the database
    ranked_sorted_doc_ids = sorted(ranked_doc_ids, key=itemgetter(0))
    results_list = map(itemgetter(0),
                       db.get_urls(map(itemgetter(1), ranked_sorted_doc_ids)))
    return generate_page_results(page, results_list, keyword_list, user_email)
コード例 #11
0
ファイル: test_database.py プロジェクト: skriverthefirst/BOS
 def setUp(self):
     self.db = DBHandler()