Ejemplo n.º 1
0
    def load_state(self, allow_cached=True):
        if not allow_cached or self._cached_state is None:
            # First, get a list of all files
            all_datafile_keys = self._get_datafile_object_keys()

            items = []

            # Then for each datafile, append to items
            for datafile in all_datafile_keys:
                txt_content = read_s3_text(self.s3_client, self.s3_bucket_name,
                                           datafile)
                signal_type = self.get_signal_type_from_object_key(datafile)
                indicator_type = self.indicator_type_str_from_signal_type(
                    signal_type)

                if txt_content is None:
                    logger.warning("No TE state for %d. First run?",
                                   self.privacy_group)
                elif indicator_type is None:
                    logger.warning(
                        "Could not identify indicator type for signal with type: %s. Will not process.",
                        signal_type.get_name(),
                    )
                else:
                    csv.field_size_limit(65535)  # dodge field size problems
                    for row in csv.reader(txt_content):
                        items.append(
                            HMASerialization(
                                row[0],
                                indicator_type,
                                row[1],
                                SimpleDescriptorRollup.from_row(row[2:]),
                            ))
                    logger.info("%d rows loaded for %d", len(items),
                                self.privacy_group)

            # Do all in one assignment just in case of threads
            self._cached_state = {item.key: item for item in items}
        return self._cached_state
Ejemplo n.º 2
0
    def _apply_updates_impl(
        self,
        delta: tu.ThreatUpdatesDelta,
        post_apply_fn=lambda x: None,
    ) -> None:
        state: t.Dict = {}
        updated: t.Dict = {}
        if delta.start > 0:
            state = self.load_state()
        for update in delta:
            item = HMASerialization.from_threat_updates_json(
                self.app_id, update.raw_json)
            if update.should_delete:
                state.pop(item.key, None)
            else:
                state[item.key] = item
                updated[item.key] = item

        self._store_state(state.values())
        self._cached_state = state

        post_apply_fn(updated)
Ejemplo n.º 3
0
 def load_state(self, allow_cached=True):
     if not allow_cached or self._cached_state is None:
         txt_content = read_s3_text(self.s3_bucket, self.data_s3_key)
         items = []
         if txt_content is None:
             logger.warning("No TE state for %d. First run?",
                            self.privacy_group)
         else:
             # Violate your warranty with module state!
             csv.field_size_limit(65535)  # dodge field size problems
             for row in csv.reader(txt_content):
                 items.append(
                     HMASerialization(
                         row[0],
                         "HASH_PDQ",
                         row[1],
                         SimpleDescriptorRollup.from_row(row[2:]),
                     ))
             logger.info("%d rows loaded for %d", len(items),
                         self.privacy_group)
         # Do all in one assignment just in case of threads
         self._cached_state = {item.key: item for item in items}
     return self._cached_state