def normalize_blob(blob, conversion, binary_detection): """ Takes a blob as input returns either the original blob if binary_detection is True and the blob content looks like binary, else return a new blob with converted data """ # Read the original blob data = blob.data # If we need to detect if a file is binary and the file is detected as # binary, do not apply the conversion function and return the original # chunked text if binary_detection is True: if is_binary(data): return blob # Now apply the conversion converted_data = conversion(data) new_blob = Blob() new_blob.data = converted_data return new_blob
def fn(info): path, mode, sha = info return path is None or (type(store[sha]) is Blob and not is_binary(store[sha].data))
def difflist(self, old_rev, new_rev): r = self.repo store = r.object_store try: old_tree = r[old_rev].tree except KeyError: old_tree = self.head() new_tree = r[new_rev].tree changes = store.tree_changes(old_tree, new_tree) def shortid(hexsha): if hexsha is None: return '0' * 7 return hexsha[:7] def content(mode, hexsha): if hexsha is None: return '' elif S_ISGITLINK(mode): return 'Submodule commit {}\n'.format(hexsha) else: return store[hexsha].data def lines(content): if not content: return [] else: return content.splitlines(True) tree = {'stats': {'files_added': 0, 'files_deleted': 0, 'lines_added': 0, 'lines_deleted': 0, 'lines_added_ratio': 0, 'lines_deleted_ratio': 0}, 'changes': []} for path, mode, sha in changes: old_path, old_mode, old_sha = path[1], mode[1], sha[1] new_path, new_mode, new_sha = path[0], mode[0], sha[0] file_added = False file_deleted = False if not old_path: old_path = new_path file_added = True if not new_path: new_path = old_path file_deleted = True # Construct our object entry entry = {'old_path': old_path, 'new_path': new_path, 'old_mode': old_mode, 'new_mode': new_mode, 'file_added': file_added, 'file_deleted': file_deleted, 'lines_added': 0, 'lines_deleted': 0, 'diff': []} # Fetch all of the content old_content = content(old_mode, old_sha) new_content = content(new_mode, new_sha) if is_binary(old_content) or is_binary(new_content): msg = 'Binary files {} and {} are different' entry['diff'] = msg.format(old_path, new_path) entry['lines_added'] = 0 entry['lines_deleted'] = 0 entry['lines_added_ratio'] = 0 entry['lines_deleted_ratio'] = 0 else: # Diff the file for changes new_lines = lines(new_content) old_lines = lines(old_content) diff, added, deleted = unified_diff(old_lines, new_lines) entry['diff'] = diff entry['lines_added'] += added entry['lines_deleted'] += deleted total = float(entry['lines_added'] + entry['lines_deleted']) if total > 0: lines_added = entry['lines_added'] * 100.0 lines_deleted = entry['lines_deleted'] * 100.0 entry['lines_added_ratio'] = lines_added / total entry['lines_deleted_ratio'] = lines_deleted / total # Aggregate statistics tree['stats']['lines_added'] += entry['lines_added'] tree['stats']['lines_deleted'] += entry['lines_deleted'] tree['changes'].append(entry) return tree