def check_many_edenfs_are_running( tracker: ProblemTracker, process_finder: process_finder.ProcessFinder ) -> None: rogue_pids_list = process_finder.find_rogue_pids() if len(rogue_pids_list) > 0: rogue_pids_problem = ManyEdenFsRunning(rogue_pids_list) tracker.add_problem(rogue_pids_problem)
def run_operating_system_checks(tracker: ProblemTracker, instance: EdenInstance, out: ui.Output) -> None: if platform.system() != "Linux": return # get kernel version string; same as "uname -r" current_kernel_release = platform.release() # check if version too low result = _os_is_kernel_version_too_old(instance, current_kernel_release) if result: tracker.add_problem( OSProblem( # TODO: Reword these messages prior to public release description=f"Kernel version {current_kernel_release} too low.", remediation=f"Reboot to upgrade kernel version.", )) # if the kernel version is too low, return here as continuing to # further checks has no benefit return # check against known bad versions result = _os_is_bad_release(instance, current_kernel_release) if result: tracker.add_problem( OSProblem( # TODO: Reword these messages prior to public release description=f"Kernel {current_kernel_release} is a known " + "bad kernel.", remediation="Reboot to upgrade kernel version.", )) return
def check_many_edenfs_are_running( tracker: ProblemTracker, process_finder: process_finder.ProcessFinder) -> None: rogue_pids_list = process_finder.find_rogue_pids() if len(rogue_pids_list) > 0: rogue_pids_problem = ManyEdenFsRunning(rogue_pids_list) tracker.add_problem(rogue_pids_problem)
def read_shared_path(tracker: ProblemTracker, shared_path: Path) -> str: try: return shared_path.read_text() except (FileNotFoundError, IsADirectoryError): raise except Exception as e: tracker.add_problem(Problem(f"Failed to read .hg/sharedpath: {e}")) raise
def check_many_edenfs_are_running(tracker: ProblemTracker, process_finder: ProcessFinder, uid: Optional[int] = None) -> None: rogue_processes = find_rogue_processes(process_finder, uid=uid) if len(rogue_processes) > 0: rogue_pids = [p.pid for p in rogue_processes] rogue_pids_problem = ManyEdenFsRunning(rogue_pids) tracker.add_problem(rogue_pids_problem)
def check_watchman_subscriptions(tracker: ProblemTracker, path: str, info: WatchmanCheckInfo) -> None: if path not in info.watchman_roots: return watch_details = _call_watchman(["watch-project", path]) watcher = watch_details.get("watcher") if watcher == "eden": return tracker.add_problem(IncorrectWatchmanWatch(path, watcher))
def check_watchman_subscriptions( tracker: ProblemTracker, path: str, info: WatchmanCheckInfo ) -> None: if path not in info.watchman_roots: return watch_details = _call_watchman(["watch-project", path]) watcher = watch_details.get("watcher") if watcher == "eden": return tracker.add_problem(IncorrectWatchmanWatch(path, watcher))
def check_disk_usage( tracker: ProblemTracker, mount_paths: List[str], instance: EdenInstance, fs_util: FsUtil, ) -> None: prob_advice_space_used_ratio_threshold = 0.90 prob_error_absolute_space_used_threshold = 1024 * 1024 * 1024 # 1GB eden_mount_pts_set = get_mount_pts_set(tracker, mount_paths, instance) for eden_mount_pt in eden_mount_pts_set: if eden_mount_pt and os.path.exists(eden_mount_pt): disk_status = fs_util.statvfs(eden_mount_pt) avail = disk_status.f_frsize * disk_status.f_bavail size = disk_status.f_frsize * disk_status.f_blocks if size == 0: continue used = size - avail used_percent = float(used) / size message = ( "Eden lazily loads your files and needs enough disk space to " "store these files when loaded." ) extra_message = instance.get_config_value( "doctor.low-disk-space-message", "" ) if extra_message: message = f"{message} {extra_message}" if avail <= prob_error_absolute_space_used_threshold: tracker.add_problem( Problem( f"{eden_mount_pt} " f"has only {str(avail)} bytes available. " f"{message}", severity=ProblemSeverity.ERROR, ) ) elif used_percent >= prob_advice_space_used_ratio_threshold: tracker.add_problem( Problem( f"{eden_mount_pt} " f"is {used_percent:.2%} full. " f"{message}", severity=ProblemSeverity.ADVICE, ) )
def check_shared_path(tracker: ProblemTracker, mount_path: Path) -> None: shared_path = get_shared_path(mount_path) try: dst_shared_path = read_shared_path(tracker, shared_path) except Exception: return if is_nfs_mounted(dst_shared_path): msg = ( f"The Mercurial data directory for {shared_path} is at" f" {dst_shared_path} which is on a NFS filesystem." f" Accessing files and directories in this repository will be slow." ) problem = Problem(msg, severity=ProblemSeverity.ADVICE) tracker.add_problem(problem)
def check_disk_usage( tracker: ProblemTracker, mount_paths: List[str], instance: EdenInstance ) -> None: prob_advice_space_used_ratio_threshold = 0.90 prob_error_absolute_space_used_threshold = 1024 * 1024 * 1024 # 1GB eden_mount_pts_set = get_mount_pts_set(tracker, mount_paths, instance) for eden_mount_pt in eden_mount_pts_set: if eden_mount_pt and os.path.exists(eden_mount_pt): disk_status = os.statvfs(eden_mount_pt) avail = disk_status.f_frsize * disk_status.f_bavail size = disk_status.f_frsize * disk_status.f_blocks if size == 0: continue used = size - avail used_percent = float(used) / size message = ( "Eden lazily loads your files and needs enough disk space to " "store these files when loaded." ) extra_message = instance.get_config_value( "doctor.low-disk-space-message", "" ) if extra_message: message = f"{message} {extra_message}" if avail <= prob_error_absolute_space_used_threshold: tracker.add_problem( Problem( f"{eden_mount_pt} " f"has only {str(avail)} bytes available. " f"{message}", severity=ProblemSeverity.ERROR, ) ) elif used_percent >= prob_advice_space_used_ratio_threshold: tracker.add_problem( Problem( f"{eden_mount_pt} " f"is {used_percent:.2%} full. " f"{message}", severity=ProblemSeverity.ADVICE, ) )
def check_eden_directory(tracker: ProblemTracker, instance: EdenInstance) -> None: if not is_nfs_mounted(str(instance.state_dir)): return msg = ( f"Eden's state directory is on an NFS file system: {instance.state_dir}\n" f" This will likely cause performance problems and/or other errors.") # On FB devservers the default Eden state directory path is ~/local/.eden # Normally ~/local is expected to be a symlink to local disk (for users who are # still using NFS home directories in the first place). The most common cause of # the Eden state directory being on NFS is for users that somehow have a regular # directory at ~/local rather than a symlink. Suggest checking this as a # remediation. remediation = ( "The most common cause for this is if your ~/local symlink does not point " "to local disk. Make sure that ~/local is a symlink pointing to local disk " "and then restart Eden.") tracker.add_problem(Problem(msg, remediation))
def check_eden_directory(tracker: ProblemTracker, instance: EdenInstance) -> None: if not is_nfs_mounted(str(instance.state_dir)): return msg = ( f"Eden's state directory is on an NFS file system: {instance.state_dir}\n" f" This will likely cause performance problems and/or other errors." ) # On FB devservers the default Eden state directory path is ~/local/.eden # Normally ~/local is expected to be a symlink to local disk (for users who are # still using NFS home directories in the first place). The most common cause of # the Eden state directory being on NFS is for users that somehow have a regular # directory at ~/local rather than a symlink. Suggest checking this as a # remediation. remediation = ( "The most common cause for this is if your ~/local symlink does not point " "to local disk. Make sure that ~/local is a symlink pointing to local disk " "and then restart Eden." ) tracker.add_problem(Problem(msg, remediation))
def check_hg(tracker: ProblemTracker, checkout: EdenCheckout) -> None: checker_classes: List[Type[HgChecker]] = [ DirstateChecker, HgrcChecker, RequiresChecker, SharedPathChecker, SharedChecker, BookmarksChecker, BranchChecker, ] checkers = [checker_class(checkout) for checker_class in checker_classes] hg_path = checkout.path / ".hg" if not os.path.exists(hg_path): description = f"Missing hg directory: {checkout.path}/.hg" tracker.add_problem(HgDirectoryError(checkout, checkers, description)) return bad_checkers: List[HgChecker] = [] for checker in checkers: try: if checker.check(): continue bad_checkers.append(checker) except Exception: tracker.add_problem(UnexpectedCheckError()) if bad_checkers: msg = ( f"No contents present in hg directory: {checkout.path}/.hg" if len(bad_checkers) == len(checkers) else None ) tracker.add_problem(HgDirectoryError(checkout, bad_checkers, msg))
def check_hg(tracker: ProblemTracker, checkout: EdenCheckout) -> None: checker_classes: List[Type[HgChecker]] = [ DirstateChecker, HgrcChecker, RequiresChecker, SharedPathChecker, SharedChecker, BookmarksChecker, BranchChecker, ] checkers = [checker_class(checkout) for checker_class in checker_classes] hg_path = checkout.path / ".hg" if not os.path.exists(hg_path): description = f"Missing hg directory: {checkout.path}/.hg" tracker.add_problem(HgDirectoryError(checkout, checkers, description)) return bad_checkers: List[HgChecker] = [] for checker in checkers: try: if checker.check(): continue bad_checkers.append(checker) except Exception: tracker.add_problem(UnexpectedCheckError()) if bad_checkers: msg = (f"No contents present in hg directory: {checkout.path}/.hg" if len(bad_checkers) == len(checkers) else None) tracker.add_problem(HgDirectoryError(checkout, bad_checkers, msg))
def check_bind_mounts( tracker: ProblemTracker, checkout: EdenCheckout, mount_table: mtab.MountTable, fs_util: filesystem.FsUtil, ) -> None: """Check that bind mounts exist and have different device IDs than the top-level checkout mount path, to confirm that they are mounted.""" mount_path = str(checkout.path) try: checkout_path_stat = mount_table.lstat(mount_path) except OSError as ex: tracker.add_problem(Problem(f"Failed to stat eden mount: {mount_path}: {ex}")) return client_bind_mount_dir = str(checkout.state_dir / "bind-mounts") bind_mounts = checkout.get_config().bind_mounts # Create a dictionary of client paths : mount paths # Client directory eg. /data/users/bob/.eden/clients/fbsource-eden/bind-mounts # Mount directory eg. /data/users/bob/fbsource/ client_mount_path_dict = {} for client_suffix, mount_suffix in bind_mounts.items(): path_in_client_dir = os.path.join(client_bind_mount_dir, client_suffix) path_in_mount_dir = os.path.join(mount_path, mount_suffix) client_mount_path_dict[path_in_client_dir] = path_in_mount_dir for path_in_client_dir, path_in_mount_dir in client_mount_path_dict.items(): _check_bind_mount_client_path(tracker, path_in_client_dir, mount_table, fs_util) _check_bind_mount_path( tracker, path_in_client_dir, path_in_mount_dir, checkout_path_stat, mount_table, fs_util, )
def check_disk_usage(tracker: ProblemTracker, mount_paths: List[str], instance: EdenInstance) -> None: prob_advice_space_used_ratio_threshold = 0.90 prob_error_absolute_space_used_threshold = 1024 * 1024 * 1024 # 1GB eden_mount_pts_set = get_mount_pts_set(tracker, mount_paths, instance) for eden_mount_pt in eden_mount_pts_set: if eden_mount_pt and os.path.exists(eden_mount_pt): disk_status = os.statvfs(eden_mount_pt) avail = disk_status.f_frsize * disk_status.f_bavail size = disk_status.f_frsize * disk_status.f_blocks if size == 0: continue used = size - avail used_percent = float(used) / size if avail <= prob_error_absolute_space_used_threshold: tracker.add_problem( Problem( f"{eden_mount_pt} " f"has only {str(avail)} bytes available. " f"Eden lazily loads your files and needs enough disk " f"space to store these files when loaded.", severity=ProblemSeverity.ERROR, )) elif used_percent >= prob_advice_space_used_ratio_threshold: tracker.add_problem( Problem( f"{eden_mount_pt} " f"is {used_percent:.2%} full. " f"Eden lazily loads your files and needs enough disk " f"space to store these files when loaded.", severity=ProblemSeverity.ADVICE, ))
def _check_bind_mount_path( tracker: ProblemTracker, mount_source: str, mount_point: str, checkout_path_stat: mtab.MTStat, mount_table: mtab.MountTable, fs_util: filesystem.FsUtil, ) -> None: # Identify missing or not mounted bind mounts try: bind_mount_stat = mount_table.lstat(mount_point) if not stat.S_ISDIR(bind_mount_stat.st_mode): tracker.add_problem(NonDirectoryFile(mount_point)) return if bind_mount_stat.st_dev == checkout_path_stat.st_dev: tracker.add_problem( BindMountNotMounted( mount_source, mount_point, mkdir=False, fs_util=fs_util, mount_table=mount_table, ) ) except OSError as ex: if ex.errno == errno.ENOENT: tracker.add_problem( BindMountNotMounted( mount_source, mount_point, mkdir=True, fs_util=fs_util, mount_table=mount_table, ) ) else: tracker.add_problem(Problem(f"Failed to lstat mount path: {mount_point}"))
def _check_bind_mount_client_path( tracker: ProblemTracker, path: str, mount_table: mtab.MountTable, fs_util: filesystem.FsUtil, ) -> None: # Identify missing or non-directory client paths try: client_stat = mount_table.lstat(path) if not stat.S_ISDIR(client_stat.st_mode): tracker.add_problem(NonDirectoryFile(path)) except OSError as ex: if ex.errno == errno.ENOENT: tracker.add_problem(MissingBindMountClientDir(path, fs_util)) else: tracker.add_problem( Problem(f"Failed to lstat bind mount source directory: {path}: {ex}") )
def check_hg(tracker: ProblemTracker, checkout: EdenCheckout) -> None: file_checker_classes: List[Type[HgChecker]] = [ DirstateChecker, HgrcChecker, RequiresChecker, SharedPathChecker, SharedChecker, BookmarksChecker, BranchChecker, ] # `AbandonedTransactionChecker` is looking for the existence of the journal # file as indicator of a potential problem. The rest is check if files are # missing. other_checker_classes: List[Type[HgChecker]] = [AbandonedTransactionChecker] file_checkers = [checker_class(checkout) for checker_class in file_checker_classes] checkers = file_checkers + [ checker_class(checkout) for checker_class in other_checker_classes ] hg_path = checkout.path / ".hg" if not os.path.exists(hg_path): description = f"Missing hg directory: {checkout.path}/.hg" tracker.add_problem(HgDirectoryError(checkout, checkers, description)) return bad_checkers: List[HgChecker] = [] for checker in checkers: try: if checker.check(): continue bad_checkers.append(checker) except Exception: tracker.add_problem(UnexpectedCheckError()) if bad_checkers: # if all the file checkers fail, it indicates we are seeing an empty # `.hg` directory msg = ( f"No contents present in hg directory: {checkout.path}/.hg" if len(bad_checkers) == len(file_checkers) else None ) tracker.add_problem(HgDirectoryError(checkout, bad_checkers, msg))
def check_nuclide_subscriptions(tracker: ProblemTracker, path: str, info: WatchmanCheckInfo) -> None: if info.nuclide_roots is None: return # Note that nuclide_roots is a set, but each entry in the set # could appear as a root folder multiple times if the user uses multiple # Atom windows. path_prefix = path + "/" connected_nuclide_roots = [ nuclide_root for nuclide_root in info.nuclide_roots if path == nuclide_root or nuclide_root.startswith(path_prefix) ] if not connected_nuclide_roots: # There do not appear to be any Nuclide connections for path. return subscriptions = _call_watchman(["debug-get-subscriptions", path]) subscribers = subscriptions.get("subscribers", []) subscription_counts: Dict[str, int] = {} for subscriber in subscribers: subscriber_info = subscriber.get("info", {}) name = subscriber_info.get("name") if name is None: continue elif name in subscription_counts: subscription_counts[name] += 1 else: subscription_counts[name] = 1 missing_or_duplicate_subscriptions = [] for nuclide_root in connected_nuclide_roots: filewatcher_subscription = f"filewatcher-{nuclide_root}" # Note that even if the user has `nuclide_root` opened in multiple # Nuclide windows, the Nuclide server should not create the # "filewatcher-" subscription multiple times. if subscription_counts.get(filewatcher_subscription) != 1: missing_or_duplicate_subscriptions.append(filewatcher_subscription) # Today, Nuclide creates a number of Watchman subscriptions per root # folder that is under an Hg working copy. (It should probably # consolidate these subscriptions, though it will take some work to # refactor things to do that.) Because each of connected_nuclide_roots # is a root folder in at least one Atom window, there must be at least # as many instances of each subscription as there are # connected_nuclide_roots. # # TODO(mbolin): Come up with a more stable contract than including a # hardcoded list of Nuclide subscription names in here because Eden and # Nuclide releases are not synced. This is admittedly a stopgap measure: # the primary objective is to figure out how Eden/Nuclide gets into # this state to begin with and prevent it. # # Further, Nuclide should probably rename these subscriptions so that: # (1) It is clear that Nuclide is the one who created the subscription. # (2) The subscription can be ascribed to an individual Nuclide client # if we are going to continue to create the same subscription # multiple times. num_roots = len(connected_nuclide_roots) for hg_subscription in NUCLIDE_HG_SUBSCRIPTIONS: if subscription_counts.get(hg_subscription, 0) < num_roots: missing_or_duplicate_subscriptions.append(hg_subscription) if missing_or_duplicate_subscriptions: def format_paths(paths: List[str]) -> str: return "\n ".join(paths) missing_subscriptions = [ sub for sub in missing_or_duplicate_subscriptions if 0 == subscription_counts.get(sub, 0) ] duplicate_subscriptions = [ sub for sub in missing_or_duplicate_subscriptions if 1 < subscription_counts.get(sub, 0) ] output = io.StringIO() output.write( "Nuclide appears to be used to edit the following directories\n" f"under {path}:\n\n" f" {format_paths(connected_nuclide_roots)}\n\n") if missing_subscriptions: output.write( "but the following Watchman subscriptions appear to be missing:\n\n" f" {format_paths(missing_subscriptions)}\n\n") if duplicate_subscriptions: conj = "and" if missing_subscriptions else "but" output.write( f"{conj} the following Watchman subscriptions have duplicates:\n\n" f" {format_paths(duplicate_subscriptions)}\n\n") output.write( "This can cause file changes to fail to show up in Nuclide.\n" "Currently, the only workaround for this is to run\n" '"Nuclide Remote Projects: Kill And Restart" from the\n' "command palette in Atom.\n") tracker.add_problem(Problem(output.getvalue()))
def check_nuclide_subscriptions( tracker: ProblemTracker, path: str, info: WatchmanCheckInfo ) -> None: if info.nuclide_roots is None: return # Note that nuclide_roots is a set, but each entry in the set # could appear as a root folder multiple times if the user uses multiple # Atom windows. path_prefix = path + "/" connected_nuclide_roots = [ nuclide_root for nuclide_root in info.nuclide_roots if path == nuclide_root or nuclide_root.startswith(path_prefix) ] if not connected_nuclide_roots: # There do not appear to be any Nuclide connections for path. return subscriptions = _call_watchman(["debug-get-subscriptions", path]) subscribers = subscriptions.get("subscribers", []) subscription_counts: Dict[str, int] = {} for subscriber in subscribers: subscriber_info = subscriber.get("info", {}) name = subscriber_info.get("name") if name is None: continue elif name in subscription_counts: subscription_counts[name] += 1 else: subscription_counts[name] = 1 missing_or_duplicate_subscriptions = [] for nuclide_root in connected_nuclide_roots: filewatcher_subscription = f"filewatcher-{nuclide_root}" # Note that even if the user has `nuclide_root` opened in multiple # Nuclide windows, the Nuclide server should not create the # "filewatcher-" subscription multiple times. if subscription_counts.get(filewatcher_subscription) != 1: missing_or_duplicate_subscriptions.append(filewatcher_subscription) # Today, Nuclide creates a number of Watchman subscriptions per root # folder that is under an Hg working copy. (It should probably # consolidate these subscriptions, though it will take some work to # refactor things to do that.) Because each of connected_nuclide_roots # is a root folder in at least one Atom window, there must be at least # as many instances of each subscription as there are # connected_nuclide_roots. # # TODO(mbolin): Come up with a more stable contract than including a # hardcoded list of Nuclide subscription names in here because Eden and # Nuclide releases are not synced. This is admittedly a stopgap measure: # the primary objective is to figure out how Eden/Nuclide gets into # this state to begin with and prevent it. # # Further, Nuclide should probably rename these subscriptions so that: # (1) It is clear that Nuclide is the one who created the subscription. # (2) The subscription can be ascribed to an individual Nuclide client # if we are going to continue to create the same subscription # multiple times. num_roots = len(connected_nuclide_roots) for hg_subscription in NUCLIDE_HG_SUBSCRIPTIONS: if subscription_counts.get(hg_subscription, 0) < num_roots: missing_or_duplicate_subscriptions.append(hg_subscription) if missing_or_duplicate_subscriptions: def format_paths(paths: List[str]) -> str: return "\n ".join(paths) missing_subscriptions = [ sub for sub in missing_or_duplicate_subscriptions if 0 == subscription_counts.get(sub, 0) ] duplicate_subscriptions = [ sub for sub in missing_or_duplicate_subscriptions if 1 < subscription_counts.get(sub, 0) ] output = io.StringIO() output.write( "Nuclide appears to be used to edit the following directories\n" f"under {path}:\n\n" f" {format_paths(connected_nuclide_roots)}\n\n" ) if missing_subscriptions: output.write( "but the following Watchman subscriptions appear to be missing:\n\n" f" {format_paths(missing_subscriptions)}\n\n" ) if duplicate_subscriptions: conj = "and" if missing_subscriptions else "but" output.write( f"{conj} the following Watchman subscriptions have duplicates:\n\n" f" {format_paths(duplicate_subscriptions)}\n\n" ) output.write( "This can cause file changes to fail to show up in Nuclide.\n" "Currently, the only workaround for this is to run\n" '"Nuclide Remote Projects: Kill And Restart" from the\n' "command palette in Atom.\n" ) tracker.add_problem(Problem(output.getvalue()))
def check_for_stale_mounts(tracker: ProblemTracker, mount_table: mtab.MountTable) -> None: stale_mounts = get_all_stale_eden_mount_points(mount_table) if stale_mounts: tracker.add_problem(StaleMountsFound(stale_mounts, mount_table))