Beispiel #1
0
def main(argv) -> None:
  """Main entry point."""
  if len(argv) > 1:
    raise app.UsageError('Too many command-line arguments.')

  tiers = pbutil.FromFile(pathlib.Path(FLAGS.data_tiers),
                          data_tiers_pb2.DataTiers())
  for tier in tiers.directory:
    logging.info('Processing %s', tier.path)
    _SetDirectorySize(tier)

  if FLAGS.summary:
    # Print the size per directory.
    df = pd.DataFrame([
      {
        'Path': d.path,
        'Tier': d.tier,
        'Size': humanize.naturalsize(d.size_bytes),
        'Size (bytes)': d.size_bytes
      } for d in tiers.directory if d.size_bytes
    ])
    df = df.sort_values(['Tier', 'Size (bytes)'], ascending=[True, False])
    print(df[['Path', 'Tier', 'Size']].to_string(index=False))

    # Print the total size per tier.
    df2 = df.groupby('Tier').sum()
    df2['Size'] = [humanize.naturalsize(d['Size (bytes)'])
                   for _, d in df2.iterrows()]
    df2 = df2.reset_index()
    df2 = df2.sort_values('Tier')
    print()
    print("Totals:")
    print(df2[['Tier', 'Size']].to_string(index=False))
  else:
    print(tiers)
Beispiel #2
0
def main(argv) -> None:
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    tiers = pbutil.FromFile(pathlib.Path(FLAGS.data_tiers),
                            data_tiers_pb2.DataTiers())
    for tier in tiers.directory:
        app.Log(1, "Processing %s", tier.path)
        _SetDirectorySize(tier)

    if FLAGS.summary:
        # Print the size per directory.
        df = pd.DataFrame([{
            "Path": d.path,
            "Tier": d.tier,
            "Size": humanize.BinaryPrefix(d.size_bytes, "B"),
            "Size (bytes)": d.size_bytes,
        } for d in tiers.directory if d.size_bytes])
        df = df.sort_values(["Tier", "Size (bytes)"], ascending=[True, False])
        print(df[["Path", "Tier", "Size"]].to_string(index=False))

        # Print the total size per tier.
        df2 = df.groupby("Tier").sum()
        df2["Size"] = [
            humanize.BinaryPrefix(d["Size (bytes)"], "B")
            for _, d in df2.iterrows()
        ]
        df2 = df2.reset_index()
        df2 = df2.sort_values("Tier")
        print()
        print("Totals:")
        print(df2[["Tier", "Size"]].to_string(index=False))
    else:
        print(tiers)
Beispiel #3
0
from absl import flags
from absl import logging

from labm8 import pbutil
from system.machines.proto import data_tiers_pb2


FLAGS = flags.FLAGS

flags.DEFINE_string('data_tiers', None,
                    'The path of the directory to package.')
flags.DEFINE_bool('summary', False, 'TODO')

flags.register_validator(
    'data_tiers',
    lambda path: pbutil.ProtoIsReadable(path, data_tiers_pb2.DataTiers()),
    message='--data_tiers must be a DataTiers message.')


def _SetDirectorySize(tier: data_tiers_pb2.Directory):
  path = pathlib.Path(tier.path).expanduser()
  if not path.is_dir():
    logging.warning("Directory '%s' not found", path)
    return

  os.chdir(path)
  excludes = ['--exclude={}'.format(pathlib.Path(e).expanduser())
              for e in tier.exclude]
  cmd = ['du', '-b', '-s', '.'] + excludes
  logging.info('$ cd %s && %s', path, ' '.join(cmd))
  proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True)