コード例 #1
0
 def _hit_context(self, hit):
     qparser = self._searcher.parser
     query = self._searcher.query
     contents = read_file(self.path)
     lines = []
     # For each query word,
     for queryWord in set(query):
         # Reverse map query words to document words
         documentWords = list(qparser.unstemlist(queryWord))
         # If the query word is not in the document, skip it
         if not documentWords:
             continue
         # Prepare regular expression using matching document words
         searchExpression = r'|'.join(documentWords)
         pattern = re.compile(searchExpression, re.IGNORECASE)
         for match in pattern.finditer(contents):
             token = self.Token()
             token.startchar = match.start()
             token.endchar = match.end()
             # get the context line
             context = fragment_text(token, contents)
             self.append_line(lines, context)
             if len(lines) >= self.max_sub_results:
                 break
     return u''.join(lines)
コード例 #2
0
 def process_hit(self, hit):
     contents = read_file(self.path)
     self.context = hit.highlights('content', text=contents)
     # the file path could have matched
     if not self.context:
         self.context = self.path
     pass
コード例 #3
0
ファイル: xapian_backend.py プロジェクト: cbess/text-sherlock
 def _hit_context(self, hit):
     qparser = self._searcher.parser
     query = self._searcher.query
     contents = read_file(self.path)
     lines = []
     # For each query word,
     for queryWord in set(query):
         # Reverse map query words to document words
         documentWords = list(qparser.unstemlist(queryWord))
         # If the query word is not in the document, skip it
         if not documentWords:
             continue
         # Prepare regular expression using matching document words
         searchExpression = r'|'.join(_ensure_str(word) for word in documentWords)
         pattern = re.compile(searchExpression, re.IGNORECASE)
         for match in pattern.finditer(contents):
             token = self.Token()
             token.startchar = match.start()
             token.endchar = match.end()
             # get the context line
             context = fragment_text(token, contents)
             self.append_line(lines, context)
             if len(lines) >= self.max_sub_results:
                 break
     return u''.join(lines)
コード例 #4
0
 def process_hit(self, hit):
     contents = read_file(self.path)
     self.context = hit.highlights('content', text=contents)
     # the file path could have matched
     if not self.context:
         self.context = self.path
     pass
コード例 #5
0
def document():
    """Handles document display requests
    """
    http_status = 200
    root_dir = FULL_INDEX_PATH
    full_path = request.args.get('path')
    is_raw = (request.args.get('raw') == 'true')
    # allow `lines` or `hl` to highlight the target lines
    hl_str = request.args.get('lines') or request.args.get('hl', '')
    # if the full path wasn't appended, then append it (assumes path exist in default index path)
    if root_dir not in full_path:
        full_path = os.path.join(root_dir, full_path)
    search_text = request.args.get('q')
    pagenum = request.args.get('p')

    # perform the text search, get wrapped results
    results = results_from_search_text(full_path, isPath=True)
    if not results:
        app.logger.error('Unable to find document: %s' % full_path)
        abort(404)
    doc = results.items[0]

    # grab contents, if file gone, then send 404 error message
    try:
        doc_contents = read_file(full_path)
    except IOError:
        app.logger.error('Document no longer exists: %s' % full_path)
        doc_contents = "Document does not exist"
        http_status = 404

    if is_raw:
        # dump the document text
        return Response(doc_contents, mimetype='text/plain')
    db_record = db.get_raw_file_record(full_path)

    if http_status == 200:
        # get syntax highlighted html
        trn = transformer.Transformer()
        doc_html = trn.to_html(doc_contents,
                               doc.result.filename,
                               highlight_lines=hl_str)
    else:
        doc_html = doc_contents

    # build response
    response = {
        "title": doc.result.filename,
        'html_css_class': 'document',
        'doc': doc,
        'contents': doc_html,
        'search_text': search_text,
        'page_number': pagenum,
        'last_modified': db_record.get('mod_date'),
        'http_status': http_status
    }
    add_default_response(response)
    return render_template('document.html', **response), http_status
コード例 #6
0
ファイル: views.py プロジェクト: ahnan4arch/text-sherlock
def document():
    """Handles document display requests
    """
    http_status = 200
    root_dir = FULL_INDEX_PATH
    full_path = request.args.get('path')
    is_raw = (request.args.get('raw') == 'true')
    # allow `lines` or `hl` to highlight the target lines
    hl_str = request.args.get('lines') or request.args.get('hl', '')
    # if the full path wasn't appended, then append it (assumes path exist in default index path)
    if root_dir not in full_path:
        full_path = os.path.join(root_dir, full_path)
    search_text = request.args.get('q')
    pagenum = request.args.get('p')

    # perform the text search, get wrapped results
    results = results_from_search_text(full_path, isPath=True)
    if not results:
        app.logger.error('Unable to find document: %s' % full_path)
        abort(404)
    doc = results.items[0]

    # grab contents, if file gone, then send 404 error message
    try:
        doc_contents = read_file(full_path)
    except IOError:
        app.logger.error('Document no longer exists: %s' % full_path)
        doc_contents = "Document does not exist"
        http_status = 404

    if is_raw:
        # dump the document text
        return Response(doc_contents, mimetype='text/plain')
    db_record = db.get_raw_file_record(full_path)

    if http_status == 200:
        # get syntax highlighted html
        trn = transformer.Transformer()
        doc_html = trn.to_html(doc_contents, doc.result.filename, highlight_lines=hl_str)
    else:
        doc_html = doc_contents

    # build response
    response = {
        "title" : doc.result.filename,
        'html_css_class' : 'document',
        'doc' : doc,
        'contents' : doc_html,
        'search_text' : search_text,
        'page_number' : pagenum,
        'last_modified' : db_record.get('mod_date'),
        'http_status' : http_status
    }
    add_default_response(response)
    return render_template('document.html', **response), http_status
コード例 #7
0
def is_valid_template_file(file_path):
    """Check if it's a valid template file"""
    data = None
    filename = ntpath.basename(file_path)
    ignore_dirs = any(ignr in file_path for ignr in settings.IGNORE_DIRS)
    ignore_file = bool(filename.lower() in settings.IGNORE_FILES)
    ext = os.path.splitext(filename)[1]
    if (ext.lower() in settings.OTHER_SCAN_FILE_EXTENSIONS and not ignore_dirs
            and not ignore_file):
        data = utils.read_file(file_path)
    return data
コード例 #8
0
def is_valid_node(file_path):
    """Make sure file is a valid Node.js file."""
    # Files that doesn't needs to be scanned
    filename = ntpath.basename(file_path)
    ext = os.path.splitext(filename)[1]
    is_js_file = bool(ext.lower() in settings.JS_SCAN_FILE_EXTENSIONS)
    ignore_dirs = any(ignr in file_path for ignr in settings.IGNORE_DIRS)
    ignore_file = bool(filename.lower() in settings.IGNORE_FILES)
    is_node_www = bool(file_path.lower().endswith("bin/www"))
    valid = (is_js_file or is_node_www) and not ignore_file and not ignore_dirs
    if valid:
        data = utils.read_file(file_path)
        if re.search(NODE_RGX, data):
            # Possible Node.js Source Code
            return data
    return None
コード例 #9
0
ファイル: app.py プロジェクト: Best-of-NodeJS/NodeJS-Scan
def view_file():
    """View File"""
    context = {"contents": "not_found"}
    path = request.form["path"]
    scan_hash = request.form["scan_hash"]
    if utils.sha2_match_regex(scan_hash):
        res = Results.query.filter(Results.scan_hash == scan_hash).first()
        if res:
            safe_dir = settings.UPLOAD_FOLDER
            req_path = os.path.join(safe_dir, path)
            if os.path.commonprefix(
                (os.path.realpath(req_path), safe_dir)) != safe_dir:
                context = {"contents": "Path Traversal Detected!"}
            else:
                if os.path.isfile(req_path):
                    contents = utils.read_file(req_path)
                    context = {"contents": contents}
    return jsonify(**context)
コード例 #10
0
ファイル: mappingtest.py プロジェクト: gexiaowei/pybatis
import time

from core.mapping import mapping_xml2sql
from core.utils import read_file

__author__ = 'sunshine'

if __name__ == '__main__':
    prev_time = time.time()
    content = read_file('E:\work\projects\pybatis\database.xml')
    res = mapping_xml2sql(content, 'select', 'select_user_by_id', {'age': 20})
    print(res)
    after_time = time.time()
    print(after_time - prev_time)
    pass