Esempio n. 1
0
 def get_status_for_source(self, context, source):
     source_file = state_filename(context, source, source)
     source_states = persistent_dict.PersistentDict(source_file)
     self.logger.info(self.inspect_source(context, source, source_states))
     for replica in self.config.get_replicas_for_context(context):
         replica_file = state_filename(context, source, replica)
         replica_states = persistent_dict.PersistentDict(replica_file)
         msg = self.inspect_replica(replica, source_states, replica_states)
         self.logger.info(f"  {msg}")
Esempio n. 2
0
 def test_lazyio(self):
     pd = persistent_dict.PersistentDict("testfile.txt", 5)
     pd.set("thing", 1)
     pd.set("thing two", "two")
     pd2 = persistent_dict.PersistentDict("testfile.txt")
     with self.assertRaises(KeyError):
         pd2.get("thing")
     time.sleep(6)
     pd.set("thing", 2)
     pd2.read()
     self.assertEqual(pd2.get("thing two"), "two")
Esempio n. 3
0
 def test_lazyio(self):
     pd = persistent_dict.PersistentDict("testfile.txt", lazy_write=5)
     pd["thing"] = 1
     pd["thing two"] = "two"
     pd2 = persistent_dict.PersistentDict("testfile.txt")
     print(pd.data)
     print(pd2.data)
     with self.assertRaises(KeyError):
         print(pd2["thing two"])
     time.sleep(6)
     pd["thing"] = 2
     pd2.read()
     self.assertEquals(pd2["thing two"], "two")
Esempio n. 4
0
 def get_status_for_replica(self, context, replica, brief=False):
     source = self.config.get_source_for_context(context)
     source_file = state_filename(context, replica, source)
     source_states = persistent_dict.PersistentDict(source_file)
     replica_file = state_filename(context, replica, replica)
     self.logger.debug(f"replica file: {replica_file}")
     replica_states = persistent_dict.PersistentDict(replica_file)
     msg = self.inspect_replica(replica, source_states, replica_states,
                                brief)
     self.logger.info(msg)
     if not brief:
         (files, bytes) = sizeof(source_states)
         self.logger.info(f"  Source: {source}: " \
                 + f"{files} files, {bytes/2**30:.2f}GB")
Esempio n. 5
0
    def __init__(self, context):
        super().__init__()
        self.context = context

        self.logger = logging.getLogger(utils.logger_str(__class__) \
                        + " " + context)
        # self.logger.setLevel(logging.INFO)
        self.config = config.Config.instance()
        self.copies = int(self.config.get(self.context, "copies", 2))
        self.path = config.path_for(self.config.get(self.context, "source"))
        self.scanner = scanner.Scanner(self.context, self.path)

        lazy_write = utils.str_to_duration(
            self.config.get(context, "LAZY WRITE", 5))
        # TODO: support expiration
        self.rescan = utils.str_to_duration(
            self.config.get(self.context, "rescan"))
        self.clients = persistent_dict.PersistentDict(
            f"/tmp/cb.s{context}.json.bz2",
            lazy_write=lazy_write,
            cls=lock.Lock,
            expiry=self.rescan)
        self.drains = elapsed.ExpiringDict(300)  # NOT persistent!
        self.locks = locker.Locker(5)
        # TODO: timers should relate to a configurable cycle time
        self.bailout = False
        self.stats = {'claims': 0, 'drops': 0}
        self.handling = False
Esempio n. 6
0
 def __init__(self, path, ignorals, state_filename):
     self.logger = logging.getLogger("gc.scanner")
     self.config = config.Config.instance()
     self.path = path
     self.ignorals = ignorals
     self.states = persistent_dict.PersistentDict(state_filename, \
                             self.config.getOption("LAZY_WRITE", 5))
 def __init__(self, context):
     super().__init__(context)
     self.source = self.config.get_source_for_context(context)
     self.path = config.path_for(self.source)
     persistent_dict_file = f"{self.path}/.ghetto_cluster/" \
                            f"source.{context}.json"
     self.states = persistent_dict.PersistentDict(persistent_dict_file, \
                         self.config.getOption("LAZY_WRITE", 5))
     self.logger = logging.getLogger(logger_str(__class__))
Esempio n. 8
0
 def test_dirty(self):
     pd = persistent_dict.PersistentDict("testfile.txt")
     pd.set("one", 1)
     pd.set("two", 1)
     pd.set("three", 1)
     # print(pd.items())
     pd.clear_dirtybits()
     pd.set("three", 3)
     pd.set("two", 2)
     self.assertEqual(pd.clean_keys()[0], "one")
Esempio n. 9
0
 def test_dirty(self):
     pd = persistent_dict.PersistentDict("testfile.txt")
     pd["one"] = 1
     pd["two"] = 1
     pd["three"] = 1
     # print(pd.items())
     pd.clear_dirtybits()
     pd["three"] = 3
     pd["two"] = 2
     self.assertEquals(pd.clean_keys()[0], "one")
Esempio n. 10
0
 def __init__(self, context, source, testing=False):
     self.logger = logging.getLogger("gc.GhettoClusterSource")
     self.config = config.Config.instance()
     self.context = context
     self.source = source
     self.testing = testing
     self.verbose = self.config.getOption("verbose", "False") == "True"
     self.path = config.path_for(source)
     hostname = config.host_for(source)
     self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json"
     self.states = persistent_dict.PersistentDict(self.states_filename, \
                     self.config.getOption("LAZY_WRITE", 5))
Esempio n. 11
0
    def test_metadata(self):
        metadata = {1: "one", 3: "three"}
        pd = persistent_dict.PersistentDict("testfile.txt", \
                                            metadata_key="__metadata__")
        pd.set("one", 1)
        pd.set("red", 2)
        self.assertEqual(len(pd.items()), 2)

        pd.set("__metadata__", metadata)
        # print(pd.items())
        # print(f"{pd.items()}: {len(pd.items())}")
        self.assertEqual(len(pd.items()), 2)

        more_data = pd.get("__metadata__")
        self.assertEqual(more_data, metadata)
 def __init__(self, context, dest, source):
     super().__init__(context)
     self.dest = dest
     if not source.endswith("/"):
         self.source = source + "/"
     else:
         self.source = source
     hostname = config.host_for(dest)
     self.path = config.path_for(dest)
     self.states_filename = f"{self.path}/.ghetto_cluster/" \
                            f"{hostname}.{context}.json"
     self.states = persistent_dict.PersistentDict(self.states_filename, \
                     self.config.getOption("LAZY_WRITE", 5))
     self.testing = False
     self.verbose = self.config.getOption("verbose", "False") == "True"
     self.logger = logging.getLogger(logger_str(__class__))
 def get_status(self, context, source, to_console=True):
     prefix = f"{config.path_for(source)}/.ghetto_cluster/"
     source_file = f"{prefix}/source.{context}.json"
     source_states = persistent_dict.PersistentDict(source_file)
     (files, bytes) = self.sizeof(source_states)
     # print(f"Source: {context}: {config.path_for(source)} " + \
     print(f"Source: {source}: " + \
             f"{files} files, {bytes/2**30:.2f}GB")   #TODO: pretty this
     source = self.config.get_source_for_context(context)
     for replica in self.config.get_replicas_for_context(context):
         msg = self.inspect_replica(source, source_states, context, replica)
         if to_console:
             print(msg)
         else:
             self.logger.info(msg)
     if to_console:
         print()
Esempio n. 14
0
 def test_item(self):
     pd = persistent_dict.PersistentDict("testfile.txt")
     pd["one"] = "1"
     self.assertEquals(pd["one"], "1")
     pd["two"] = "2"
     self.assertTrue("two" in pd)
Esempio n. 15
0
 def test_io(self):
     pd = persistent_dict.PersistentDict("testfile.txt")
     pd.set("thing", 1)
     pd.set("thing two", "two")
     pd2 = persistent_dict.PersistentDict("testfile.txt")
     self.assertEqual(pd2.get("thing two"), "two")
 def inspect_replica(self, source, source_states, context, replica):
     prefix = f"{config.path_for(source)}/.ghetto_cluster/"
     hostname = config.host_for(replica)
     replica_file = f"{prefix}/{hostname}.{context}.json"
     replica_states = persistent_dict.PersistentDict(replica_file)
     if self.config.getOption("verbose", "False") == "True":
         msg = f"{replica} ::\n"
         missing = mismatch = extra = 0
         lines = 0
         for fqde, source_state in source_states.items():
             if replica_states.contains_p(fqde):
                 replica_states.touch(fqde)
                 replica_state = replica_states.get(fqde)
                 if source_state["size"] != replica_state["size"]:
                     mismatch += 1
                     if lines < 10:
                         msg += f"\tmismatch: {fqde} "
                         if replica_state["ctime"] > source_state["ctime"]:
                             msg += "replica is newer"
                         else:
                             # TODO: tell how stale it is
                             msg += f"{duration_to_str(source_state['ctime'] - replica_state['ctime'])} stale"
                         msg += "\n"
                         lines += 1
             else:
                 missing += 1
                 if lines < 10:
                     msg += f"\tmissing: {fqde}\n"
                     lines += 1
             if lines == 10:
                 msg += "\t...\n"
                 lines = 11
         extra = len(replica_states.clean_keys())
         if missing + mismatch + extra != 0:
             pct = 100 * len(replica_states.items()) / len(
                 source_states.items())
             if pct > 100:
                 pct = 100
             if int(pct) == 100:
                 pct = 99
             msg += f"\tmissing: {missing} ({pct:.0f}% complete); " + \
                     f"mismatched: {mismatch}; " + \
                     f"extra: {extra}"
         else:
             # TODO: staleness report
             # msg = self.check_replica_staleness(source, source_states, context, replica) + msg[:-4]
             msg = "Complete: " + msg[:-4]
     else:
         (target_files, target_bytes) = \
             self.sizeof(source_states)
         (nlines, nbytes) = \
             self.sizeof(replica_states)
         pct_complete = int(100 * nlines / target_files)
         if nlines == target_files:
             # msg = self.check_replica_staleness(source, source_states, replica_file) + f": {replica}"
             if self.replica_is_current(source, source_states,
                                        replica_file):
                 msg = f"  Complete : {replica}"
             else:
                 msg = f"  Stale: {replica}"
         elif nlines == 0:
             msg = f"  Not started: {replica}"
         elif nlines < target_files:
             if self.replica_is_current(source, source_states,
                                        replica_file):
                 msg = "  Active: "
             else:
                 msg = "  Stale: "
             msg += f"{pct_complete:3d}% {nlines}/{target_files}: {replica}"
         else:
             msg += f"WARNING: too many files in replica " + \
                     f"{config.host_for(replica)}\n" + \
                     f"\t{nlines}/{target_files}: {replica}"
     return msg
Esempio n. 17
0
 def test_io(self):
     pd = persistent_dict.PersistentDict("testfile.txt")
     pd["thing"] = 1
     pd["thing two"] = "two"
     pd2 = persistent_dict.PersistentDict("testfile.txt")
     self.assertEquals(pd2["thing two"], "two")