def test_real_world_250(self): # given target = given_target() evaluated_sibling = EvaluatedSibling(SiblingCandidate(target, target)) # when prop = evaluated_sibling.contribute_property_type(FrequencyProperty) # then self.assertEqual(prop[4].frequency, 250)
def provide_for(cls, evaluated_sibling: EvaluatedSibling) -> 'Optional[PpdOutlierRemovalProperty]': ppd_prop = evaluated_sibling.contribute_property_type(PpdProperty) mean_outliers_prop = evaluated_sibling.contribute_property_type(MeanOutlierRemovalProperty) if not ppd_prop or not mean_outliers_prop: return None instance = cls(mean_outliers_prop[4], mean_outliers_prop[6], ppd_prop) if not instance[4].has_data() or not instance[6].has_data(): return None return instance
def provide_for( cls, evaluated_sibling: EvaluatedSibling ) -> 'Optional[BitcoinAddrNeighborsProperty]': if not evaluated_sibling.has_property(BitcoinProperty): return None # Cannot cache because we use both series btc_prop = evaluated_sibling.get_property(BitcoinProperty) if not btc_prop.has_response_for_both(): return None return cls(cls._calc_neighbors(btc_prop))
def provide_for( cls, evaluated_sibling: EvaluatedSibling ) -> 'Optional[SplineDiffProperty]': spline_prop = evaluated_sibling.contribute_property_type( SplineProperty) dynrange_prop = evaluated_sibling.contribute_property_type( DynamicRangeProperty) if not spline_prop or not dynrange_prop: return None # Cannot cache because we depend on SplineProperty, which depends on # PpdOutlierRemovalProperty, which uses both series. Also DynamicRangeProperty. return cls(spline_prop[4], spline_prop[6], dynrange_prop.diff_absolute)
def test_clean_series(self): # given target = given_target() evaluated_sibling = EvaluatedSibling(SiblingCandidate(target, target)) # when prop = evaluated_sibling.contribute_property_type(NormSeriesProperty) # then self.assertListEqual(list(prop[4].reception_times[:5]), [ 11.99898886680603, 27.997750520706177, 64.46249151229858, 66.46238923072815, 70.46224164962769 ]) self.assertListEqual(list(prop[4].ts_vals[:5]), [3000, 7000, 16117, 16617, 17617])
def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus: if not self.__init_done: raise AssertionError( 'You probably meant to call init_data_for() first') if not evaluated_sibling.has_property(BitcoinProperty): return SiblingStatus.ERROR prop = evaluated_sibling.get_property(BitcoinProperty) if not prop.can_conclude(): return SiblingStatus.ERROR if prop.all_signs_point_to_no(): return SiblingStatus.NEGATIVE else: return SiblingStatus.INDECISIVE
def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus: if not self.__init_done: raise AssertionError( 'You probably meant to call init_data_for() first') if not evaluated_sibling.has_property(SshProperty): return SiblingStatus.ERROR prop = evaluated_sibling.get_property(SshProperty) if not prop.has_data_for_both(): # Tempting to return NEGATIVE here, but might be different firewall setups return SiblingStatus.INDECISIVE if prop.do_agents_match() and prop.do_keys_match(): return SiblingStatus.POSITIVE else: return SiblingStatus.NEGATIVE
def provide_for( cls, evaluated_sibling: EvaluatedSibling) -> 'Optional[PpdProperty]': mor_prop = evaluated_sibling.contribute_property_type( MeanOutlierRemovalProperty) if not mor_prop: return None return cls(mor_prop[4], mor_prop[6])
def run(self, batch_id: int, candidate_iter: Iterator[SiblingCandidate]): evaluated = [EvaluatedSibling(c) for c in candidate_iter] if not len(evaluated): return # happens if (#targets % batch_size) == 0 batch_dir = pathlib.Path(self.conf.base_dir) / f'batch_{batch_id:06}' batch_dir.mkdir(parents=True, exist_ok=True) batch_stats = Stats(self.total_stats) self._handle_batch(evaluated, batch_stats, batch_dir) StatsExporter(batch_dir).export_all(batch_stats)
def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus: prop = evaluated_sibling.contribute_property_type( BitcoinAddrNeighborsProperty) if not prop: return SiblingStatus.ERROR for neighbor in prop.neighbors: if self._any_diff_too_small(neighbor.shared_addrs_next, neighbor.next_v6) or \ self._any_diff_too_small(neighbor.shared_addrs_prev, neighbor.v4): return SiblingStatus.NEGATIVE return SiblingStatus.INDECISIVE
def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus: prop = evaluated_sibling.contribute_property_type( BitcoinAddrNeighborsProperty) if not prop: return SiblingStatus.ERROR for neighbor in prop.neighbors: if self._any_addr_lost_svc_flag(neighbor.shared_addrs_prev) or \ self._any_addr_lost_svc_flag(neighbor.shared_addrs_next): return SiblingStatus.NEGATIVE # no contradiction return SiblingStatus.INDECISIVE
def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus: # This throws if calculation should fail, so we do not need to handle this case by e.g. if delta_tcpraw diff_prop = evaluated_sibling.contribute_property_type( FirstTimestampDiffProperty) if not diff_prop: return SiblingStatus.ERROR delta_tcpraw = diff_prop.raw_timestamp_diff if delta_tcpraw <= self.threshold_tcpraw: return SiblingStatus.POSITIVE else: # Could still be randomised return SiblingStatus.INDECISIVE
def provide_for( cls, evaluated_sibling: EvaluatedSibling ) -> 'Optional[OffsetsProperty]': freq_prop = evaluated_sibling.contribute_property_type( FrequencyProperty) clean_prop = evaluated_sibling.contribute_property_type( NormSeriesProperty) if freq_prop and (freq_prop[4].frequency == 0 or freq_prop[6] == 0): log.debug(f'Frequency is zero for {evaluated_sibling}') return None if freq_prop and clean_prop: def provider(ip_version: int): return OffsetSeries.from_norm(clean_prop[ip_version], freq_prop[ip_version].frequency) return cls( cls._cache_get_or(evaluated_sibling[4], provider), cls._cache_get_or(evaluated_sibling[6], provider), ) else: return None
def provide_for( cls, evaluated_sibling: EvaluatedSibling ) -> 'Optional[FirstTimestampDiffProperty]': freq_prop = evaluated_sibling.contribute_property_type( FrequencyProperty) if not freq_prop: return None # Convert to Python int to prevent overflow. lol tsval_diff = (int(evaluated_sibling[4].first_ts_val) - int(evaluated_sibling[6].first_ts_val)) tcp_diff_secs = tsval_diff / freq_prop.mean_freq recv_time_diff_secs = evaluated_sibling[ 4].first_reception_time - evaluated_sibling[6].first_reception_time return cls(tcp_diff_secs, recv_time_diff_secs)
def _plot_evaluated(evaluated_sibling: EvaluatedSibling, plot_function, **plotkwargs) -> bool: """ Plot data to a matplotlib.pyplot figure. """ if evaluated_sibling.property_failed(MeanOutlierRemovalProperty): log.debug( f'Unable to plot {evaluated_sibling}, outlier removal failed before.' ) return False prop = evaluated_sibling.contribute_property_type( MeanOutlierRemovalProperty) if not prop: log.debug( f'Unable to plot {evaluated_sibling}, outlier removal failed now.') return False evaluated_sibling.contribute_property_type(SplineProperty) fig = pyplot.figure() axes = _plot_axes(evaluated_sibling, fig) _configure_plot_appearance(axes, evaluated_sibling) plot_function(fig, **plotkwargs) pyplot.close(fig) return True
def provide_for(cls, evaluated_sibling: EvaluatedSibling) -> 'Optional[FrequencyProperty]': clean_prop = evaluated_sibling.contribute_property_type(NormSeriesProperty) if not clean_prop: return None try: def provider(ip_version: int): return FrequencyInfo(clean_prop[ip_version]) return cls( cls._cache_get_or(evaluated_sibling[4], provider), cls._cache_get_or(evaluated_sibling[6], provider) ) except FrequencyFailedException as e: log.debug(f'Failed to compute frequency for {evaluated_sibling}', exc_info=e)
def provide_for( cls, evaluated_sibling: EvaluatedSibling) -> 'Optional[SplineProperty]': clean_prop = evaluated_sibling.contribute_property_type( PpdOutlierRemovalProperty) if not clean_prop: return None # Cannot cache because we depend on PpdOutlierRemovalProperty, # which uses both series spline4 = OffsetSpline.from_offsets(clean_prop[4]) spline6 = OffsetSpline.from_offsets(clean_prop[6]) if not spline4 or not spline6 or not spline4.has_data( ) or not spline6.has_data(): return None return cls(spline4, spline6)
def provide_for( cls, evaluated_sibling: EvaluatedSibling ) -> 'Optional[DenoiseProperty]': offset_prop = evaluated_sibling.contribute_property_type( OffsetsProperty) if not offset_prop: return None def provider(ip_version: int): return cls._denoise(offset_prop[ip_version]) data4 = cls._cache_get_or(evaluated_sibling[4], provider) data6 = cls._cache_get_or(evaluated_sibling[6], provider) return cls(data4, data6)
def _features_from_evaluated(self, evaluated_sibling: EvaluatedSibling) -> Dict[str, Any]: # Keys taken from old evaluation.py, feature_keys['no_raw']. Order is crucial! # Features are defined on in Starke p. 69, Table 4.4 # we are using the no-rawts model (the only one provided as raw data), # note that this is not supposed to apply to constant-offset candidates that already # match the Delta-tcpraw criterion. freq = evaluated_sibling.contribute_property_type(FrequencyProperty) skew = evaluated_sibling.contribute_property_type(SkewProperty) dyn_range = evaluated_sibling.contribute_property_type(DynamicRangeProperty) if not freq or not skew or not dyn_range: return {} features = { 'hz_diff': freq.diff, 'hz_rsqrdiff': freq.r_squared_diff, 'alphadiff': skew.skew_diff, 'rsqrdiff': skew.r_square_diff, 'dynrange_diff': dyn_range.diff_absolute, 'dynrange_avg': dyn_range.average, 'dynrange_diff_rel': dyn_range.diff_relative, } for key, value in features.items(): if not value: features[key] = numpy.nan return features
def evaluate(self, evaluated_sibling: EvaluatedSibling) -> SiblingStatus: first_ts_prop = evaluated_sibling.contribute_property_type(FirstTimestampDiffProperty) if first_ts_prop and first_ts_prop.raw_timestamp_diff <= StarkeTcprawEvaluator.THRESHOLD: # The no-rawts ML model we are using is not supposed to be applied to # sibling candidates that already match the Delta-tcpraw criterion return SiblingStatus.INDECISIVE only_row = self._features_from_evaluated(evaluated_sibling) if not only_row: return SiblingStatus.ERROR data = pandas.DataFrame([only_row], columns=self._FEATURE_KEYS) results = self.classifier.predict(data) if results[0]: return SiblingStatus.POSITIVE else: return SiblingStatus.NEGATIVE
def provide_for( cls, evaluated_sibling: EvaluatedSibling ) -> 'Optional[DynamicRangeProperty]': ppd_outliers_prop = evaluated_sibling.contribute_property_type( PpdOutlierRemovalProperty) if not ppd_outliers_prop: return None try: def provider(ip_version: int): return cls._calc_dynamic_range(ppd_outliers_prop[ip_version]) # Cannot cache because we depend on PpdOutlierRemovalProperty, # which uses both series return cls(provider(4), provider(6)) except IndexError: return None
def provide_for( cls, evaluated_sibling: EvaluatedSibling ) -> 'Optional[MeanOutlierRemovalProperty]': denoise_prop = evaluated_sibling.contribute_property_type( DenoiseProperty) if not denoise_prop: return None def provider(ip_version: int): cleaned = cls._remove_outliers_97(denoise_prop[ip_version]) return cleaned if cleaned.has_data() else None cleaned4 = cls._cache_get_or(evaluated_sibling[4], provider) cleaned6 = cls._cache_get_or(evaluated_sibling[6], provider) if not cleaned4 or not cleaned6: return None return cls(cleaned4, cleaned6)
def provide_for(cls, evaluated_sibling: EvaluatedSibling) -> 'Optional[SkewProperty]': ppd_outliers_prop = evaluated_sibling.contribute_property_type(PpdOutlierRemovalProperty) if not ppd_outliers_prop: return None # Cannot cache because we depend on PpdOutlierRemovalProperty, # which uses both series try: return cls( cls._calc_skew_angle(ppd_outliers_prop[4]), cls._calc_skew_angle(ppd_outliers_prop[6]) ) except ValueError as e: if "cannot convert float NaN to integer" in str(e): log.debug(f'NaN in skew calculation, {evaluated_sibling.domains}') return None else: raise e