Example #1
0
def __get_tree_and_content(repo_obj, commit, path):
    """ Return the tree and the content of the specified file. """

    (blob_or_tree, tree_obj, extended) = __get_tree(repo_obj, commit.tree,
                                                    path)

    if blob_or_tree is None:
        return (tree_obj, None, None)

    if not repo_obj[blob_or_tree.oid]:
        # Not tested and no idea how to test it, but better safe than sorry
        flask.abort(404, "File not found")

    if isinstance(blob_or_tree, pygit2.TreeEntry):  # Returned a file
        filename = blob_or_tree.name
        name, ext = os.path.splitext(filename)
        blob_obj = repo_obj[blob_or_tree.oid]
        if not is_binary_string(blob_obj.data):
            try:
                content, safe = pagure.doc_utils.convert_readme(
                    blob_obj.data, ext)
                if safe:
                    filename = name + ".html"
            except pagure.exceptions.PagureEncodingException:
                content = blob_obj.data
        else:
            content = blob_obj.data

    tree = sorted(tree_obj, key=lambda x: x.filemode)
    return (tree, content, filename)
Example #2
0
 def is_binary(self, file_path):
     # sftp аналог `from binaryornot.check import is_binary`
     if file_path[-3:] == "pyc":
         return False
     with self.sftp.file(file_path) as f:
         chunk = f.read(1024)
         return is_binary_string(chunk)
Example #3
0
File: mime.py Project: Darshnik/dxr
def decode_data(data, encoding_guess, can_be_binary=True):
    """Given string data, return an (is_text, data) tuple, where data is
    returned as unicode if we think it's text and were able to determine an
    encoding for it.
    If can_be_binary is False, then skip the initial is_binary check.
    """
    if not (can_be_binary and is_binary_string(data[:1024])):
        try:
            # Try our default encoding.
            data = data.decode(encoding_guess)
            return True, data
        except UnicodeDecodeError:
            # Fall back to chardet - chardet is really slow, which is why we
            # don't just do chardet from the start.
            detector = UniversalDetector()
            for chunk in ichunks(80, data):
                detector.feed(chunk)
                if detector.done:
                    break
            detector.close()
            if detector.result['encoding']:
                try:
                    data = data.decode(detector.result['encoding'])
                    return True, data
                except (UnicodeDecodeError, LookupError):
                    # Either we couldn't decode or chardet gave us an encoding
                    # that python doesn't recognize (yes, it can do that).
                    pass  # Leave data as str.
    return False, data
Example #4
0
def __get_tree_and_content(repo_obj, commit, path):
    """ Return the tree and the content of the specified file. """

    (blob_or_tree, tree_obj, extended) = __get_tree(
        repo_obj, commit.tree, path
    )

    if blob_or_tree is None:
        return (tree_obj, None, None)

    if not repo_obj[blob_or_tree.oid]:
        # Not tested and no idea how to test it, but better safe than sorry
        flask.abort(404, description="File not found")

    if isinstance(blob_or_tree, pygit2.TreeEntry):  # Returned a file
        filename = blob_or_tree.name
        name, ext = os.path.splitext(filename)
        blob_obj = repo_obj[blob_or_tree.oid]
        if not is_binary_string(blob_obj.data):
            try:
                content, safe = pagure.doc_utils.convert_readme(
                    blob_obj.data, ext
                )
                if safe:
                    filename = name + ".html"
            except pagure.exceptions.PagureEncodingException:
                content = blob_obj.data
        else:
            content = blob_obj.data

    tree = sorted(tree_obj, key=lambda x: x.filemode)
    return (tree, content, filename)
Example #5
0
File: mime.py Project: vck/dxr
def decode_data(data, encoding_guess, can_be_binary=True):
    """Given string data, return an (is_text, data) tuple, where data is
    returned as unicode if we think it's text and were able to determine an
    encoding for it.
    If can_be_binary is False, then skip the initial is_binary check.
    """
    if not (can_be_binary and is_binary_string(data[:1024])):
        try:
            # Try our default encoding.
            data = data.decode(encoding_guess)
            return True, data
        except UnicodeDecodeError:
            # Fall back to chardet - chardet is really slow, which is why we
            # don't just do chardet from the start.
            detector = UniversalDetector()
            for chunk in ichunks(80, data):
                detector.feed(chunk)
                if detector.done:
                    break
            detector.close()
            if detector.result['encoding']:
                try:
                    data = data.decode(detector.result['encoding'])
                    return True, data
                except (UnicodeDecodeError, LookupError):
                    # Either we couldn't decode or chardet gave us an encoding
                    # that python doesn't recognize (yes, it can do that).
                    pass  # Leave data as str.
    return False, data
Example #6
0
def is_binary(location):
    """
    Retrun True if the file at `location` is a binary file.
    """
    known_extensions = ('.pyc', '.pgm', '.mp3', '.mp4', '.mpeg', '.mpg',
                        '.emf', '.pgm', '.pbm', '.ppm')
    if location.endswith(known_extensions):
        return True
    return is_binary_string(get_starting_chunk(location))
Example #7
0
def is_binary(filename):
    """
    :param filename: File to check.
    :returns: True if it's a binary file, otherwise False.
    """
    logger.debug('is_binary: %(filename)r', locals())

    # Check if the file extension is in a list of known binary types
    #     binary_extensions = ['.pyc', ]
    #     for ext in binary_extensions:
    #         if filename.endswith(ext):
    #             return True

    # Check if the starting chunk is a binary string
    chunk = get_starting_chunk(filename)
    return is_binary_string(chunk)
Example #8
0
File: build.py Project: bitslab/dxr
def unicode_contents(path, encoding_guess):  # TODO: Make accessible to TreeToIndex.post_build.
    """Return the unicode contents of a file if we can figure out a decoding,
    or else None.

    :arg path: A sufficient path to the file
    :arg encoding_guess: A guess at the encoding of the file, to be applied if
        it seems to be text

    """
    # Read the binary contents of the file.
    with open(path, 'rb') as source_file:
        initial_portion = source_file.read(4096)
        if not is_binary_string(initial_portion):
            # Move the cursor back to the start of the file.
            source_file.seek(0)
            decoded, contents = decode_data(source_file.read(),
                                            encoding_guess,
                                            can_be_binary=False)
            if decoded:
                return contents
Example #9
0
def unicode_contents(path, encoding_guess):  # TODO: Make accessible to TreeToIndex.post_build.
    """Return the unicode contents of a file if we can figure out a decoding,
    or else None.

    :arg path: A sufficient path to the file
    :arg encoding_guess: A guess at the encoding of the file, to be applied if
        it seems to be text

    """
    # Read the binary contents of the file.
    with open(path, 'rb') as source_file:
        initial_portion = source_file.read(4096)
        if not is_binary_string(initial_portion):
            # Move the cursor back to the start of the file.
            source_file.seek(0)
            decoded, contents = decode_data(source_file.read(),
                                            encoding_guess,
                                            can_be_binary=False)
            if decoded:
                return contents
Example #10
0
 def contains_binary(self):
     """Return True if the file contains binary characters."""
     from binaryornot.helpers import is_binary_string
     return is_binary_string(self.contents)
 def _is_binary(d: DiffIndex):
     return is_binary_string(d.b_blob.data_stream.read(1024))
Example #12
0
def view_file(repo, identifier, filename, username=None):
    """ Displays the content of a file or a tree for the specified repo.
    """
    repo = pagure.lib.get_project(SESSION, repo, user=username)

    if not repo:
        flask.abort(404, 'Project not found')

    reponame = pagure.get_repo_path(repo)

    repo_obj = pygit2.Repository(reponame)

    if repo_obj.is_empty:
        flask.abort(404, 'Empty repo cannot have a file')

    if identifier in repo_obj.listall_branches():
        branchname = identifier
        branch = repo_obj.lookup_branch(identifier)
        commit = branch.get_object()
    else:
        try:
            commit = repo_obj.get(identifier)
            branchname = identifier
        except ValueError:
            if 'master' not in repo_obj.listall_branches():
                flask.abort(404, 'Branch no found')
            # If it's not a commit id then it's part of the filename
            commit = repo_obj[repo_obj.head.target]
            branchname = 'master'

    if isinstance(commit, pygit2.Tag):
        commit = commit.get_object()

    if commit and not isinstance(commit, pygit2.Blob):
        content = __get_file_in_tree(
            repo_obj, commit.tree, filename.split('/'), bail_on_tree=True)
        if not content:
            flask.abort(404, 'File not found')
        content = repo_obj[content.oid]
    else:
        content = commit

    if not content:
        flask.abort(404, 'File not found')

    if isinstance(content, pygit2.Blob):
        rawtext = str(flask.request.args.get('text')).lower() in ['1', 'true']
        ext = filename[filename.rfind('.'):]
        if ext in (
                '.gif', '.png', '.bmp', '.tif', '.tiff', '.jpg',
                '.jpeg', '.ppm', '.pnm', '.pbm', '.pgm', '.webp', '.ico'):
            try:
                Image.open(StringIO(content.data))
                output_type = 'image'
            except IOError as err:
                LOG.debug(
                    'Failed to load image %s, error: %s', filename, err
                )
                output_type = 'binary'
        elif ext in ('.rst', '.mk', '.md') and not rawtext:
            content, safe = pagure.doc_utils.convert_readme(content.data, ext)
            output_type = 'markup'
        elif not is_binary_string(content.data):
            file_content = content.data
            if not isinstance(file_content, basestring):
                file_content = content.data.decode('utf-8')
            try:
                lexer = guess_lexer_for_filename(
                    filename,
                    file_content
                )
            except (ClassNotFound, TypeError):
                lexer = TextLexer()

            content = highlight(
                file_content,
                lexer,
                HtmlFormatter(
                    noclasses=True,
                    style="tango",)
            )
            output_type = 'file'
        else:
            output_type = 'binary'
    else:
        content = sorted(content, key=lambda x: x.filemode)
        output_type = 'tree'

    return flask.render_template(
        'file.html',
        select='tree',
        repo=repo,
        origin='view_file',
        username=username,
        branches=sorted(repo_obj.listall_branches()),
        branchname=branchname,
        filename=filename,
        content=content,
        output_type=output_type,
        repo_admin=is_repo_admin(repo),
    )
Example #13
0
def edit_file(repo, branchname, filename, username=None):
    """ Edit a file online.
    """
    repo = pagure.lib.get_project(SESSION, repo, user=username)

    if not repo:
        flask.abort(404, 'Project not found')

    if not is_repo_admin(repo):
        flask.abort(
            403,
            'You are not allowed to change the settings for this project')

    user = pagure.lib.search_user(
        SESSION, username=flask.g.fas_user.username)

    reponame = pagure.get_repo_path(repo)

    repo_obj = pygit2.Repository(reponame)

    if repo_obj.is_empty:
        flask.abort(404, 'Empty repo cannot have a file')

    form = pagure.forms.EditFileForm(emails=user.emails)

    branch = None
    if branchname in repo_obj.listall_branches():
        branch = repo_obj.lookup_branch(branchname)
        commit = branch.get_object()
    else:
        flask.abort(400, 'Invalid branch specified')

    if form.validate_on_submit():
        try:
            pagure.lib.git.update_file_in_git(
                repo,
                branch=branchname,
                branchto=form.branch.data,
                filename=filename,
                content=form.content.data,
                message='%s\n\n%s' % (
                    form.commit_title.data.strip(),
                    form.commit_message.data.strip()
                ),
                user=flask.g.fas_user,
                email=form.email.data,
            )
            flask.flash('Changes committed')
            return flask.redirect(
                flask.url_for(
                    '.view_commits', repo=repo.name, username=username,
                    branchname=form.branch.data)
            )
        except pagure.exceptions.PagureException as err:  # pragma: no cover
            APP.logger.exception(err)
            flask.flash('Commit could not be done', 'error')
            data = form.content.data
    elif flask.request.method == 'GET':
        content = __get_file_in_tree(
            repo_obj, commit.tree, filename.split('/'))
        if not content or isinstance(content, pygit2.Tree):
            flask.abort(404, 'File not found')

        if is_binary_string(content.data):
            flask.abort(400, 'Cannot edit binary files')

        data = repo_obj[content.oid].data.decode('utf-8')
    else:
        data = form.content.data.decode('utf-8')

    return flask.render_template(
        'edit_file.html',
        select='tree',
        repo=repo,
        username=username,
        branchname=branchname,
        data=data,
        filename=filename,
        form=form,
        user=user,
        branches=repo_obj.listall_branches(),
    )
Example #14
0
 def contains_binary(self):
     """Return True if the file contains binary characters."""
     from binaryornot.helpers import is_binary_string
     return is_binary_string(self.contents)
Example #15
0
def view_issue_raw_file(
        repo, filename=None, username=None, namespace=None):
    """ Displays the raw content of a file of a commit for the specified
    ticket repo.
    """
    repo = flask.g.repo

    if not repo.settings.get('issue_tracker', True):
        flask.abort(404, 'No issue tracker found for this project')

    reponame = os.path.join(APP.config['TICKETS_FOLDER'], repo.path)

    repo_obj = pygit2.Repository(reponame)

    if repo_obj.is_empty:
        flask.abort(404, 'Empty repo cannot have a file')

    branch = repo_obj.lookup_branch('master')
    commit = branch.get_object()

    mimetype = None
    encoding = None

    content = __get_file_in_tree(
        repo_obj, commit.tree, filename.split('/'), bail_on_tree=True)
    if not content or isinstance(content, pygit2.Tree):
        flask.abort(404, 'File not found')

    mimetype, encoding = mimetypes.guess_type(filename)
    data = repo_obj[content.oid].data

    if not data:
        flask.abort(404, 'No content found')

    if (filename.endswith('.patch') or filename.endswith('.diff')) \
            and not is_binary_string(content.data):
        # We have a patch file attached to this issue, render the diff in html
        orig_filename = filename.partition('-')[2]
        return flask.render_template(
            'patchfile.html',
            select='issues',
            repo=repo,
            username=username,
            diff=data,
            patchfile=orig_filename,
            form=pagure.forms.ConfirmationForm(),
        )

    if not mimetype and data[:2] == '#!':
        mimetype = 'text/plain'

    headers = {}
    if not mimetype:
        if '\0' in data:
            mimetype = 'application/octet-stream'
        else:
            mimetype = 'text/plain'
    elif 'html' in mimetype:
        mimetype = 'application/octet-stream'
        headers['Content-Disposition'] = 'attachment'

    if mimetype.startswith('text/') and not encoding:
        try:
            encoding = pagure.lib.encoding_utils.guess_encoding(
                ktc.to_bytes(data))
        except pagure.exceptions.PagureException:
            # We cannot decode the file, so bail but warn the admins
            LOG.exception('File could not be decoded')

    if encoding:
        mimetype += '; charset={encoding}'.format(encoding=encoding)
    headers['Content-Type'] = mimetype

    return (data, 200, headers)