def __get_tree_and_content(repo_obj, commit, path): """ Return the tree and the content of the specified file. """ (blob_or_tree, tree_obj, extended) = __get_tree(repo_obj, commit.tree, path) if blob_or_tree is None: return (tree_obj, None, None) if not repo_obj[blob_or_tree.oid]: # Not tested and no idea how to test it, but better safe than sorry flask.abort(404, "File not found") if isinstance(blob_or_tree, pygit2.TreeEntry): # Returned a file filename = blob_or_tree.name name, ext = os.path.splitext(filename) blob_obj = repo_obj[blob_or_tree.oid] if not is_binary_string(blob_obj.data): try: content, safe = pagure.doc_utils.convert_readme( blob_obj.data, ext) if safe: filename = name + ".html" except pagure.exceptions.PagureEncodingException: content = blob_obj.data else: content = blob_obj.data tree = sorted(tree_obj, key=lambda x: x.filemode) return (tree, content, filename)
def is_binary(self, file_path): # sftp аналог `from binaryornot.check import is_binary` if file_path[-3:] == "pyc": return False with self.sftp.file(file_path) as f: chunk = f.read(1024) return is_binary_string(chunk)
def decode_data(data, encoding_guess, can_be_binary=True): """Given string data, return an (is_text, data) tuple, where data is returned as unicode if we think it's text and were able to determine an encoding for it. If can_be_binary is False, then skip the initial is_binary check. """ if not (can_be_binary and is_binary_string(data[:1024])): try: # Try our default encoding. data = data.decode(encoding_guess) return True, data except UnicodeDecodeError: # Fall back to chardet - chardet is really slow, which is why we # don't just do chardet from the start. detector = UniversalDetector() for chunk in ichunks(80, data): detector.feed(chunk) if detector.done: break detector.close() if detector.result['encoding']: try: data = data.decode(detector.result['encoding']) return True, data except (UnicodeDecodeError, LookupError): # Either we couldn't decode or chardet gave us an encoding # that python doesn't recognize (yes, it can do that). pass # Leave data as str. return False, data
def __get_tree_and_content(repo_obj, commit, path): """ Return the tree and the content of the specified file. """ (blob_or_tree, tree_obj, extended) = __get_tree( repo_obj, commit.tree, path ) if blob_or_tree is None: return (tree_obj, None, None) if not repo_obj[blob_or_tree.oid]: # Not tested and no idea how to test it, but better safe than sorry flask.abort(404, description="File not found") if isinstance(blob_or_tree, pygit2.TreeEntry): # Returned a file filename = blob_or_tree.name name, ext = os.path.splitext(filename) blob_obj = repo_obj[blob_or_tree.oid] if not is_binary_string(blob_obj.data): try: content, safe = pagure.doc_utils.convert_readme( blob_obj.data, ext ) if safe: filename = name + ".html" except pagure.exceptions.PagureEncodingException: content = blob_obj.data else: content = blob_obj.data tree = sorted(tree_obj, key=lambda x: x.filemode) return (tree, content, filename)
def is_binary(location): """ Retrun True if the file at `location` is a binary file. """ known_extensions = ('.pyc', '.pgm', '.mp3', '.mp4', '.mpeg', '.mpg', '.emf', '.pgm', '.pbm', '.ppm') if location.endswith(known_extensions): return True return is_binary_string(get_starting_chunk(location))
def is_binary(filename): """ :param filename: File to check. :returns: True if it's a binary file, otherwise False. """ logger.debug('is_binary: %(filename)r', locals()) # Check if the file extension is in a list of known binary types # binary_extensions = ['.pyc', ] # for ext in binary_extensions: # if filename.endswith(ext): # return True # Check if the starting chunk is a binary string chunk = get_starting_chunk(filename) return is_binary_string(chunk)
def unicode_contents(path, encoding_guess): # TODO: Make accessible to TreeToIndex.post_build. """Return the unicode contents of a file if we can figure out a decoding, or else None. :arg path: A sufficient path to the file :arg encoding_guess: A guess at the encoding of the file, to be applied if it seems to be text """ # Read the binary contents of the file. with open(path, 'rb') as source_file: initial_portion = source_file.read(4096) if not is_binary_string(initial_portion): # Move the cursor back to the start of the file. source_file.seek(0) decoded, contents = decode_data(source_file.read(), encoding_guess, can_be_binary=False) if decoded: return contents
def contains_binary(self): """Return True if the file contains binary characters.""" from binaryornot.helpers import is_binary_string return is_binary_string(self.contents)
def _is_binary(d: DiffIndex): return is_binary_string(d.b_blob.data_stream.read(1024))
def view_file(repo, identifier, filename, username=None): """ Displays the content of a file or a tree for the specified repo. """ repo = pagure.lib.get_project(SESSION, repo, user=username) if not repo: flask.abort(404, 'Project not found') reponame = pagure.get_repo_path(repo) repo_obj = pygit2.Repository(reponame) if repo_obj.is_empty: flask.abort(404, 'Empty repo cannot have a file') if identifier in repo_obj.listall_branches(): branchname = identifier branch = repo_obj.lookup_branch(identifier) commit = branch.get_object() else: try: commit = repo_obj.get(identifier) branchname = identifier except ValueError: if 'master' not in repo_obj.listall_branches(): flask.abort(404, 'Branch no found') # If it's not a commit id then it's part of the filename commit = repo_obj[repo_obj.head.target] branchname = 'master' if isinstance(commit, pygit2.Tag): commit = commit.get_object() if commit and not isinstance(commit, pygit2.Blob): content = __get_file_in_tree( repo_obj, commit.tree, filename.split('/'), bail_on_tree=True) if not content: flask.abort(404, 'File not found') content = repo_obj[content.oid] else: content = commit if not content: flask.abort(404, 'File not found') if isinstance(content, pygit2.Blob): rawtext = str(flask.request.args.get('text')).lower() in ['1', 'true'] ext = filename[filename.rfind('.'):] if ext in ( '.gif', '.png', '.bmp', '.tif', '.tiff', '.jpg', '.jpeg', '.ppm', '.pnm', '.pbm', '.pgm', '.webp', '.ico'): try: Image.open(StringIO(content.data)) output_type = 'image' except IOError as err: LOG.debug( 'Failed to load image %s, error: %s', filename, err ) output_type = 'binary' elif ext in ('.rst', '.mk', '.md') and not rawtext: content, safe = pagure.doc_utils.convert_readme(content.data, ext) output_type = 'markup' elif not is_binary_string(content.data): file_content = content.data if not isinstance(file_content, basestring): file_content = content.data.decode('utf-8') try: lexer = guess_lexer_for_filename( filename, file_content ) except (ClassNotFound, TypeError): lexer = TextLexer() content = highlight( file_content, lexer, HtmlFormatter( noclasses=True, style="tango",) ) output_type = 'file' else: output_type = 'binary' else: content = sorted(content, key=lambda x: x.filemode) output_type = 'tree' return flask.render_template( 'file.html', select='tree', repo=repo, origin='view_file', username=username, branches=sorted(repo_obj.listall_branches()), branchname=branchname, filename=filename, content=content, output_type=output_type, repo_admin=is_repo_admin(repo), )
def edit_file(repo, branchname, filename, username=None): """ Edit a file online. """ repo = pagure.lib.get_project(SESSION, repo, user=username) if not repo: flask.abort(404, 'Project not found') if not is_repo_admin(repo): flask.abort( 403, 'You are not allowed to change the settings for this project') user = pagure.lib.search_user( SESSION, username=flask.g.fas_user.username) reponame = pagure.get_repo_path(repo) repo_obj = pygit2.Repository(reponame) if repo_obj.is_empty: flask.abort(404, 'Empty repo cannot have a file') form = pagure.forms.EditFileForm(emails=user.emails) branch = None if branchname in repo_obj.listall_branches(): branch = repo_obj.lookup_branch(branchname) commit = branch.get_object() else: flask.abort(400, 'Invalid branch specified') if form.validate_on_submit(): try: pagure.lib.git.update_file_in_git( repo, branch=branchname, branchto=form.branch.data, filename=filename, content=form.content.data, message='%s\n\n%s' % ( form.commit_title.data.strip(), form.commit_message.data.strip() ), user=flask.g.fas_user, email=form.email.data, ) flask.flash('Changes committed') return flask.redirect( flask.url_for( '.view_commits', repo=repo.name, username=username, branchname=form.branch.data) ) except pagure.exceptions.PagureException as err: # pragma: no cover APP.logger.exception(err) flask.flash('Commit could not be done', 'error') data = form.content.data elif flask.request.method == 'GET': content = __get_file_in_tree( repo_obj, commit.tree, filename.split('/')) if not content or isinstance(content, pygit2.Tree): flask.abort(404, 'File not found') if is_binary_string(content.data): flask.abort(400, 'Cannot edit binary files') data = repo_obj[content.oid].data.decode('utf-8') else: data = form.content.data.decode('utf-8') return flask.render_template( 'edit_file.html', select='tree', repo=repo, username=username, branchname=branchname, data=data, filename=filename, form=form, user=user, branches=repo_obj.listall_branches(), )
def view_issue_raw_file( repo, filename=None, username=None, namespace=None): """ Displays the raw content of a file of a commit for the specified ticket repo. """ repo = flask.g.repo if not repo.settings.get('issue_tracker', True): flask.abort(404, 'No issue tracker found for this project') reponame = os.path.join(APP.config['TICKETS_FOLDER'], repo.path) repo_obj = pygit2.Repository(reponame) if repo_obj.is_empty: flask.abort(404, 'Empty repo cannot have a file') branch = repo_obj.lookup_branch('master') commit = branch.get_object() mimetype = None encoding = None content = __get_file_in_tree( repo_obj, commit.tree, filename.split('/'), bail_on_tree=True) if not content or isinstance(content, pygit2.Tree): flask.abort(404, 'File not found') mimetype, encoding = mimetypes.guess_type(filename) data = repo_obj[content.oid].data if not data: flask.abort(404, 'No content found') if (filename.endswith('.patch') or filename.endswith('.diff')) \ and not is_binary_string(content.data): # We have a patch file attached to this issue, render the diff in html orig_filename = filename.partition('-')[2] return flask.render_template( 'patchfile.html', select='issues', repo=repo, username=username, diff=data, patchfile=orig_filename, form=pagure.forms.ConfirmationForm(), ) if not mimetype and data[:2] == '#!': mimetype = 'text/plain' headers = {} if not mimetype: if '\0' in data: mimetype = 'application/octet-stream' else: mimetype = 'text/plain' elif 'html' in mimetype: mimetype = 'application/octet-stream' headers['Content-Disposition'] = 'attachment' if mimetype.startswith('text/') and not encoding: try: encoding = pagure.lib.encoding_utils.guess_encoding( ktc.to_bytes(data)) except pagure.exceptions.PagureException: # We cannot decode the file, so bail but warn the admins LOG.exception('File could not be decoded') if encoding: mimetype += '; charset={encoding}'.format(encoding=encoding) headers['Content-Type'] = mimetype return (data, 200, headers)