예제 #1
0
 def test_real_world_250(self):
     # given
     target = given_target()
     evaluated_sibling = EvaluatedSibling(SiblingCandidate(target, target))
     # when
     prop = evaluated_sibling.contribute_property_type(FrequencyProperty)
     # then
     self.assertEqual(prop[4].frequency, 250)
예제 #2
0
 def provide_for(cls, evaluated_sibling: EvaluatedSibling) -> 'Optional[PpdOutlierRemovalProperty]':
     ppd_prop = evaluated_sibling.contribute_property_type(PpdProperty)
     mean_outliers_prop = evaluated_sibling.contribute_property_type(MeanOutlierRemovalProperty)
     if not ppd_prop or not mean_outliers_prop:
         return None
     instance = cls(mean_outliers_prop[4], mean_outliers_prop[6], ppd_prop)
     if not instance[4].has_data() or not instance[6].has_data():
         return None
     return instance
 def provide_for(
     cls, evaluated_sibling: EvaluatedSibling
 ) -> 'Optional[BitcoinAddrNeighborsProperty]':
     if not evaluated_sibling.has_property(BitcoinProperty):
         return None
     # Cannot cache because we use both series
     btc_prop = evaluated_sibling.get_property(BitcoinProperty)
     if not btc_prop.has_response_for_both():
         return None
     return cls(cls._calc_neighbors(btc_prop))
예제 #4
0
 def provide_for(
         cls, evaluated_sibling: EvaluatedSibling
 ) -> 'Optional[SplineDiffProperty]':
     spline_prop = evaluated_sibling.contribute_property_type(
         SplineProperty)
     dynrange_prop = evaluated_sibling.contribute_property_type(
         DynamicRangeProperty)
     if not spline_prop or not dynrange_prop:
         return None
     # Cannot cache because we depend on SplineProperty, which depends on
     # PpdOutlierRemovalProperty, which uses both series. Also DynamicRangeProperty.
     return cls(spline_prop[4], spline_prop[6], dynrange_prop.diff_absolute)
예제 #5
0
 def test_clean_series(self):
     # given
     target = given_target()
     evaluated_sibling = EvaluatedSibling(SiblingCandidate(target, target))
     # when
     prop = evaluated_sibling.contribute_property_type(NormSeriesProperty)
     # then
     self.assertListEqual(list(prop[4].reception_times[:5]), [
         11.99898886680603, 27.997750520706177, 64.46249151229858,
         66.46238923072815, 70.46224164962769
     ])
     self.assertListEqual(list(prop[4].ts_vals[:5]),
                          [3000, 7000, 16117, 16617, 17617])
예제 #6
0
 def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus:
     if not self.__init_done:
         raise AssertionError(
             'You probably meant to call init_data_for() first')
     if not evaluated_sibling.has_property(BitcoinProperty):
         return SiblingStatus.ERROR
     prop = evaluated_sibling.get_property(BitcoinProperty)
     if not prop.can_conclude():
         return SiblingStatus.ERROR
     if prop.all_signs_point_to_no():
         return SiblingStatus.NEGATIVE
     else:
         return SiblingStatus.INDECISIVE
예제 #7
0
 def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus:
     if not self.__init_done:
         raise AssertionError(
             'You probably meant to call init_data_for() first')
     if not evaluated_sibling.has_property(SshProperty):
         return SiblingStatus.ERROR
     prop = evaluated_sibling.get_property(SshProperty)
     if not prop.has_data_for_both():
         # Tempting to return NEGATIVE here, but might be different firewall setups
         return SiblingStatus.INDECISIVE
     if prop.do_agents_match() and prop.do_keys_match():
         return SiblingStatus.POSITIVE
     else:
         return SiblingStatus.NEGATIVE
예제 #8
0
 def provide_for(
         cls,
         evaluated_sibling: EvaluatedSibling) -> 'Optional[PpdProperty]':
     mor_prop = evaluated_sibling.contribute_property_type(
         MeanOutlierRemovalProperty)
     if not mor_prop:
         return None
     return cls(mor_prop[4], mor_prop[6])
예제 #9
0
 def run(self, batch_id: int, candidate_iter: Iterator[SiblingCandidate]):
     evaluated = [EvaluatedSibling(c) for c in candidate_iter]
     if not len(evaluated):
         return  # happens if (#targets % batch_size) == 0
     batch_dir = pathlib.Path(self.conf.base_dir) / f'batch_{batch_id:06}'
     batch_dir.mkdir(parents=True, exist_ok=True)
     batch_stats = Stats(self.total_stats)
     self._handle_batch(evaluated, batch_stats, batch_dir)
     StatsExporter(batch_dir).export_all(batch_stats)
예제 #10
0
 def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus:
     prop = evaluated_sibling.contribute_property_type(
         BitcoinAddrNeighborsProperty)
     if not prop:
         return SiblingStatus.ERROR
     for neighbor in prop.neighbors:
         if self._any_diff_too_small(neighbor.shared_addrs_next, neighbor.next_v6) or \
                 self._any_diff_too_small(neighbor.shared_addrs_prev, neighbor.v4):
             return SiblingStatus.NEGATIVE
     return SiblingStatus.INDECISIVE
예제 #11
0
 def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus:
     prop = evaluated_sibling.contribute_property_type(
         BitcoinAddrNeighborsProperty)
     if not prop:
         return SiblingStatus.ERROR
     for neighbor in prop.neighbors:
         if self._any_addr_lost_svc_flag(neighbor.shared_addrs_prev) or \
                 self._any_addr_lost_svc_flag(neighbor.shared_addrs_next):
             return SiblingStatus.NEGATIVE
     # no contradiction
     return SiblingStatus.INDECISIVE
예제 #12
0
 def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus:
     # This throws if calculation should fail, so we do not need to handle this case by e.g. if delta_tcpraw
     diff_prop = evaluated_sibling.contribute_property_type(
         FirstTimestampDiffProperty)
     if not diff_prop:
         return SiblingStatus.ERROR
     delta_tcpraw = diff_prop.raw_timestamp_diff
     if delta_tcpraw <= self.threshold_tcpraw:
         return SiblingStatus.POSITIVE
     else:  # Could still be randomised
         return SiblingStatus.INDECISIVE
예제 #13
0
    def provide_for(
            cls, evaluated_sibling: EvaluatedSibling
    ) -> 'Optional[OffsetsProperty]':
        freq_prop = evaluated_sibling.contribute_property_type(
            FrequencyProperty)
        clean_prop = evaluated_sibling.contribute_property_type(
            NormSeriesProperty)
        if freq_prop and (freq_prop[4].frequency == 0 or freq_prop[6] == 0):
            log.debug(f'Frequency is zero for {evaluated_sibling}')
            return None
        if freq_prop and clean_prop:

            def provider(ip_version: int):
                return OffsetSeries.from_norm(clean_prop[ip_version],
                                              freq_prop[ip_version].frequency)

            return cls(
                cls._cache_get_or(evaluated_sibling[4], provider),
                cls._cache_get_or(evaluated_sibling[6], provider),
            )
        else:
            return None
예제 #14
0
 def provide_for(
     cls, evaluated_sibling: EvaluatedSibling
 ) -> 'Optional[FirstTimestampDiffProperty]':
     freq_prop = evaluated_sibling.contribute_property_type(
         FrequencyProperty)
     if not freq_prop:
         return None
     # Convert to Python int to prevent overflow. lol
     tsval_diff = (int(evaluated_sibling[4].first_ts_val) -
                   int(evaluated_sibling[6].first_ts_val))
     tcp_diff_secs = tsval_diff / freq_prop.mean_freq
     recv_time_diff_secs = evaluated_sibling[
         4].first_reception_time - evaluated_sibling[6].first_reception_time
     return cls(tcp_diff_secs, recv_time_diff_secs)
예제 #15
0
def _plot_evaluated(evaluated_sibling: EvaluatedSibling, plot_function,
                    **plotkwargs) -> bool:
    """
    Plot data to a matplotlib.pyplot figure.
    """
    if evaluated_sibling.property_failed(MeanOutlierRemovalProperty):
        log.debug(
            f'Unable to plot {evaluated_sibling}, outlier removal failed before.'
        )
        return False
    prop = evaluated_sibling.contribute_property_type(
        MeanOutlierRemovalProperty)
    if not prop:
        log.debug(
            f'Unable to plot {evaluated_sibling}, outlier removal failed now.')
        return False
    evaluated_sibling.contribute_property_type(SplineProperty)
    fig = pyplot.figure()
    axes = _plot_axes(evaluated_sibling, fig)
    _configure_plot_appearance(axes, evaluated_sibling)
    plot_function(fig, **plotkwargs)
    pyplot.close(fig)
    return True
예제 #16
0
    def provide_for(cls, evaluated_sibling: EvaluatedSibling) -> 'Optional[FrequencyProperty]':
        clean_prop = evaluated_sibling.contribute_property_type(NormSeriesProperty)
        if not clean_prop:
            return None
        try:
            def provider(ip_version: int):
                return FrequencyInfo(clean_prop[ip_version])

            return cls(
                cls._cache_get_or(evaluated_sibling[4], provider),
                cls._cache_get_or(evaluated_sibling[6], provider)
            )
        except FrequencyFailedException as e:
            log.debug(f'Failed to compute frequency for {evaluated_sibling}', exc_info=e)
예제 #17
0
 def provide_for(
         cls,
         evaluated_sibling: EvaluatedSibling) -> 'Optional[SplineProperty]':
     clean_prop = evaluated_sibling.contribute_property_type(
         PpdOutlierRemovalProperty)
     if not clean_prop:
         return None
     # Cannot cache because we depend on PpdOutlierRemovalProperty,
     # which uses both series
     spline4 = OffsetSpline.from_offsets(clean_prop[4])
     spline6 = OffsetSpline.from_offsets(clean_prop[6])
     if not spline4 or not spline6 or not spline4.has_data(
     ) or not spline6.has_data():
         return None
     return cls(spline4, spline6)
예제 #18
0
    def provide_for(
            cls, evaluated_sibling: EvaluatedSibling
    ) -> 'Optional[DenoiseProperty]':
        offset_prop = evaluated_sibling.contribute_property_type(
            OffsetsProperty)
        if not offset_prop:
            return None

        def provider(ip_version: int):
            return cls._denoise(offset_prop[ip_version])

        data4 = cls._cache_get_or(evaluated_sibling[4], provider)
        data6 = cls._cache_get_or(evaluated_sibling[6], provider)

        return cls(data4, data6)
예제 #19
0
 def _features_from_evaluated(self, evaluated_sibling: EvaluatedSibling) -> Dict[str, Any]:
     # Keys taken from old evaluation.py, feature_keys['no_raw']. Order is crucial!
     # Features are defined on in Starke p. 69, Table 4.4
     # we are using the no-rawts model (the only one provided as raw data),
     # note that this is not supposed to apply to constant-offset candidates that already
     # match the Delta-tcpraw criterion.
     freq = evaluated_sibling.contribute_property_type(FrequencyProperty)
     skew = evaluated_sibling.contribute_property_type(SkewProperty)
     dyn_range = evaluated_sibling.contribute_property_type(DynamicRangeProperty)
     if not freq or not skew or not dyn_range:
         return {}
     features = {
         'hz_diff': freq.diff,
         'hz_rsqrdiff': freq.r_squared_diff,
         'alphadiff': skew.skew_diff,
         'rsqrdiff': skew.r_square_diff,
         'dynrange_diff': dyn_range.diff_absolute,
         'dynrange_avg': dyn_range.average,
         'dynrange_diff_rel': dyn_range.diff_relative,
     }
     for key, value in features.items():
         if not value:
             features[key] = numpy.nan
     return features
예제 #20
0
 def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus:
     first_ts_prop = evaluated_sibling.contribute_property_type(FirstTimestampDiffProperty)
     if first_ts_prop and first_ts_prop.raw_timestamp_diff <= StarkeTcprawEvaluator.THRESHOLD:
         # The no-rawts ML model we are using is not supposed to be applied to
         # sibling candidates that already match the Delta-tcpraw criterion
         return SiblingStatus.INDECISIVE
     only_row = self._features_from_evaluated(evaluated_sibling)
     if not only_row:
         return SiblingStatus.ERROR
     data = pandas.DataFrame([only_row], columns=self._FEATURE_KEYS)
     results = self.classifier.predict(data)
     if results[0]:
         return SiblingStatus.POSITIVE
     else:
         return SiblingStatus.NEGATIVE
예제 #21
0
    def provide_for(
        cls, evaluated_sibling: EvaluatedSibling
    ) -> 'Optional[DynamicRangeProperty]':
        ppd_outliers_prop = evaluated_sibling.contribute_property_type(
            PpdOutlierRemovalProperty)
        if not ppd_outliers_prop:
            return None
        try:

            def provider(ip_version: int):
                return cls._calc_dynamic_range(ppd_outliers_prop[ip_version])

            # Cannot cache because we depend on PpdOutlierRemovalProperty,
            # which uses both series
            return cls(provider(4), provider(6))
        except IndexError:
            return None
예제 #22
0
    def provide_for(
        cls, evaluated_sibling: EvaluatedSibling
    ) -> 'Optional[MeanOutlierRemovalProperty]':
        denoise_prop = evaluated_sibling.contribute_property_type(
            DenoiseProperty)
        if not denoise_prop:
            return None

        def provider(ip_version: int):
            cleaned = cls._remove_outliers_97(denoise_prop[ip_version])
            return cleaned if cleaned.has_data() else None

        cleaned4 = cls._cache_get_or(evaluated_sibling[4], provider)
        cleaned6 = cls._cache_get_or(evaluated_sibling[6], provider)
        if not cleaned4 or not cleaned6:
            return None
        return cls(cleaned4, cleaned6)
예제 #23
0
 def provide_for(cls, evaluated_sibling: EvaluatedSibling) -> 'Optional[SkewProperty]':
     ppd_outliers_prop = evaluated_sibling.contribute_property_type(PpdOutlierRemovalProperty)
     if not ppd_outliers_prop:
         return None
     # Cannot cache because we depend on PpdOutlierRemovalProperty,
     # which uses both series
     try:
         return cls(
             cls._calc_skew_angle(ppd_outliers_prop[4]),
             cls._calc_skew_angle(ppd_outliers_prop[6])
         )
     except ValueError as e:
         if "cannot convert float NaN to integer" in str(e):
             log.debug(f'NaN in skew calculation, {evaluated_sibling.domains}')
             return None
         else:
             raise e