Exemple #1
0
def get_all_docs():
    try:
        data = []
        cursor = db.cnx.cursor()
        query = "SELECT * FROM doc_tbl"
        cursor.execute(query)
        for row in cursor:
            data.append({
                'docid': row[0],
                'docname': row[1],
                'author': row[2],
                'path': row[3],
                'year': row[4],
                'intro': row[5],
                'hidden': row[6],
                'content': open(row[3], 'r').read()
            })
        cursor.close()
        return utils.create_res_obj(data)
    except Exception as e:
        return utils.create_res_obj(
            {
                'traceback': traceback.format_exc(),
                'msg': "{}".format(e.args)
            },
            success=False)
Exemple #2
0
def restore_doc(docname):
    try:
        cursor = db.cnx.cursor()

        query = ("SELECT docid FROM doc_tbl WHERE docname=%s")
        data = (docname, )
        cursor.execute(query, data)
        docid = cursor.fetchone()[0]

        query = ("SELECT * FROM hidden_files WHERE docid = %s")
        data = (docid, )
        cursor.execute(query, data)
        row_count = cursor.rowcount
        if row_count == -1:
            x = cursor.fetchall()
            cursor = db.cnx.cursor()
            query = ("DELETE FROM hidden_files WHERE docid = %s")
            data = (docid, )
            cursor.execute(query, data)
            query = ("UPDATE doc_tbl SET hidden = 0 WHERE docid = %s ")
            data = (docid, )
            cursor.execute(query, data)
            db.cnx.commit()
            data = [{'file_restored': 'True'}]
        else:
            data = [{'file_restored': 'False'}]
        cursor.close()
        return utils.create_res_obj(data)
    except Exception as e:
        return utils.create_res_obj(
            {
                'traceback': traceback.format_exc(),
                'msg': "{}".format(e.args)
            },
            success=False)
Exemple #3
0
def init():
    try:
        api_handler.db.init_db()
        return utils.create_res_obj({'status': 'init success'})
    except Exception as e:
        return utils.create_res_obj(
            {
                'traceback': traceback.format_exc(),
                'msg': "{}".format(e.args)
            },
            success=False)
Exemple #4
0
def delete_doc(docname):
    try:
        postid_list = []
        query = ("SELECT path FROM doc_tbl WHERE docname=%s")
        data = (docname, )
        doc_path = db.run_query(query, data, one=True)[0]
        query = ("SELECT docid FROM doc_tbl WHERE path=%s")
        data = (doc_path, )
        docid = db.run_query(query, data, one=True)[0]
        query = ("SELECT postid FROM postfiletable WHERE docid=%s")
        data = (docid, )
        for row in db.run_query(query, data):
            postid_list.append(row[0])
        for postid in postid_list:
            query = ("SELECT hit FROM indextable WHERE postid=%s")
            data = (postid, )
            hit = db.run_query(query, data, one=True)[0]
            if hit == 1:
                query = ("DELETE FROM indextable WHERE postid=%s")
                data = (postid, )
                db.run_query(query, data, commit=True)
            else:
                new_hit = hit - 1
                query = (
                    "UPDATE `indextable` SET `hit` = {} WHERE `indextable`.`postid` = %s"
                ).format(str(new_hit))
                data = (postid, )
                db.run_query(query, data, commit=True)
        query = ("DELETE FROM postfiletable WHERE docid=%s")
        data = (docid, )
        db.run_query(query, data, commit=True)
        query = ("DELETE FROM doc_tbl WHERE docid=%s")
        data = (docid, )
        db.run_query(query, data, commit=True)
        query = ("DELETE FROM hidden_files WHERE docid=%s")
        data = (docid, )
        db.run_query(query, data, commit=True)
        if os.path.exists(doc_path):
            os.remove(doc_path)
        return utils.create_res_obj(data)
    except Exception as e:
        return utils.create_res_obj(
            {
                'traceback': traceback.format_exc(),
                'msg': "{}".format(e.args)
            },
            success=False)
Exemple #5
0
def getfile(filename):
    try:
        x = api_handler.getfile(filename)
        x = jsonify(x)
        return x
    except Exception as e:
        return utils.create_res_obj(
            {
                'traceback': traceback.format_exc(),
                'msg': "{}".format(e.args)
            },
            success=False)
Exemple #6
0
def getfile(docname):
    query = ("SELECT * FROM doc_tbl WHERE docname=%s")
    data = (docname, )
    try:
        row = db.run_query(query, data, one=True)
        db.disconnect()
        return utils.create_res_obj({
            'docid': row[0],
            'docname': row[1],
            'author': row[2],
            'path': row[3],
            'year': row[4],
            'intro': row[5],
            'hidden': row[6],
            'content': open(row[3], 'r').read()
        })

    except Exception as e:
        return utils.create_res_obj(
            {
                'traceback': traceback.format_exc(),
                'msg': "{}".format(e.args)
            },
            success=False)
Exemple #7
0
 def drop_all_tables(self):
     cursor = self.cnx.cursor()
     tables_names = self.get_all_tables()
     for table_name in tables_names:
         str = ''.join(table_name)
         try:
             cursor.execute("drop table " + str)
         except Exception as e:
             return utils.create_res_obj(
                 {
                     'traceback': traceback.format_exc(),
                     'msg': "{}".format(e.args),
                     'text': "DROP TABLE failed WITH TABLE {} ".format(str)
                 },
                 success=False)
Exemple #8
0
def res_query(query):
    class MyTransformer(ast.NodeTransformer):
        def visit_Str(self, node):
            return ast.Set(words_dict[node.s])

    try:

        data = []
        hidden_files = list_hidden_files()

        operator = ['OR', 'AND', 'NOT']
        data = []

        query = query.replace("\'", "'")

        # check for more than one operator in a row
        splited_query = query.split()
        duplicate_op_counter = 0
        for first in operator:
            for second in operator:
                if not is_in_order(first, second, splited_query):
                    duplicate_op_counter += 1
        if duplicate_op_counter < 9:
            # bad query detect
            for op in operator:
                query = re.sub(r'\b' + op + r'\b', ' ', query)

        tmp_quote = ''
        quotes_string = re.findall(r'"([^"]*)"', query)
        if quotes_string:
            for text in quotes_string:
                if len(text.split()) > 1:
                    words_in_quotes = text.split()
                    for item in words_in_quotes:
                        tmp_quote += ' \"' + item + '\" '
                    query = query.replace('\"{}\"'.format(text), tmp_quote, 1)
                tmp_quote = ''

        # check for terms only without operators
        tmp_query = re.sub(' +', ' ', query)
        if tmp_query.endswith(' AND') or tmp_query.endswith(
                ' OR') or tmp_query.endswith(' NOT'):
            tmp_query = query.replace('AND',
                                      '').replace('OR', '').replace('NOT', '')

        if not query.replace(')', '').replace('(', '').replace(
                'AND', '').replace('OR', '').replace('NOT', '').replace(
                    '"', '').strip():
            query = 'error'

        if not ((findWholeWord(operator[0])(query)) or
                (findWholeWord(operator[1])(query)) or
                (findWholeWord(operator[2])(query))):
            query = query.strip()
            query = ' OR '.join(query.split())

        tmp_query = re.split(r'(OR|AND|NOT)',
                             query)  # split to text OP text OP text

        new_query = ''
        for text in tmp_query:
            if text in operator:
                new_query += text + ' '
                continue
            if len(text.split()) > 1:
                # if len(text.replace('(', '').replace(')', '').split()) > 1:
                new_query += '('
                for word in text.split():
                    new_query += word + ' OR '
                new_query = new_query[:-3]
                new_query += ') '
            else:
                new_query += text

        # remove stop list terms
        query = new_query
        quotes_words_indexs = []
        for word in new_query.split():
            for term in conf.STOP_LIST:
                tmp_word = word.replace(')', '').replace('(',
                                                         '').replace('"', '')
                if term == tmp_word:
                    if word[0] == '\"' and word[-1] == '\"':
                        quotes_words_indexs = [
                            (m.start(0), m.end(0))
                            for m in re.finditer(r'\b{}\b'.format(term), query)
                        ]
                        if quotes_words_indexs:
                            for tup in quotes_words_indexs:
                                if query[tup[0] -
                                         1] == '\"' and query[tup[1]] == '\"':
                                    query = query[:tup[0] -
                                                  1] + '$' + query[tup[0]:]
                                    query = query[:tup[1]] + '$' + query[
                                        tup[1] + 1:]
                                    break
                                else:
                                    continue
                    else:
                        quotes_words_indexs = [
                            (m.start(0), m.end(0)) for m in re.finditer(
                                r'\b{}\b'.format(tmp_word), query)
                        ]
                        # query = new_query.replace(tmp_word, 'STOPPED')
                        for tup in quotes_words_indexs:
                            if query[tup[0] - 1] == '$':
                                continue
                            else:
                                start = query[:tup[0]]
                                mid = ' stoppedword '
                                end = query[tup[1] + 1:]
                                query_helper = start + mid + end
                                query = query_helper
                                break

            quotes_words_indexs = []
        query = re.sub(' +', ' ', query)
        query = query.replace('$', ' ')
        # careful
        query = query.replace('\"', '')
        tmp_query = query.replace(')', '').replace('(', '').replace(
            'AND', '').replace('OR', '').replace('NOT', '')
        tmp_query = tmp_query.lower()
        words_list = tmp_query.split()

        words_list_in_quotes = [
            '\'' + re.sub("'", "\\'", w) + '\'' for w in words_list
        ]
        words_dict = {}
        for i in range(len(words_list)):
            words_dict[words_list[i]] = words_list_in_quotes[i]

        processed_query = ''
        for item in query.split():
            if item.lower() in words_dict:
                processed_query += words_dict[item.lower()]
            elif item.replace(')', '').lower() in words_dict:
                b = item.count(')')
                processed_query += words_dict[item.replace(')', '').lower()]
                processed_query += b * ')'
            elif item.replace('(', '').lower() in words_dict:
                b = item.count('(')
                processed_query += b * '('
                processed_query += words_dict[item.replace('(', '').lower()]
            else:
                processed_query += item
            processed_query += ' '

        for k, v in words_dict.items():
            if k == 'stoppedword':
                ast_list = create_ast_list([])
            else:
                doc_list = get_doc_list_by_term(k, hidden_files, words_list)
                ast_list = create_ast_list(doc_list)
            words_dict[k] = ast_list

        words_dict = dict(
            {k.replace('*', ''): v
             for k, v in words_dict.items()})
        words_list = list([word.replace('*', '') for word in words_list])
        processed_query = processed_query.replace('*', '')
        processed_query = processed_query.replace('AND', '&')
        processed_query = processed_query.replace('OR', '|')
        processed_query = processed_query.replace('NOT', '-')

        input_code = ast.parse(processed_query, mode='eval')
        MyTransformer().visit(input_code)
        fixed = ast.fix_missing_locations(input_code)
        code = compile(fixed, '<string>', 'eval')
        result = eval(code)
        result = list(result)
        ranked_doc = _rank(result, words_list)
        sorted_by_rank = sorted(ranked_doc, key=lambda tup: tup[1])
        sorted_docit = [tup[0] for tup in sorted_by_rank]
        sorted_docit = sorted_docit[::-1]
        result = sorted_docit
        for doc_id in result:
            data.append(get_data_by_docid(doc_id, words_list))

        return utils.create_res_obj(data)
    except Exception as e:
        return utils.create_res_obj(
            {
                'traceback': traceback.format_exc(),
                'msg': "{}".format(e.args)
            },
            success=False)