def insert(self, bridge): """Insert a bridge into all appropriate sub-hashrings. For all sub-hashrings, the ``bridge`` will only be added iff it passes the filter functions for that sub-hashring. :type bridge: :class:`~bridgedb.Bridges.Bridge` :param bridge: The bridge to add. """ # The bridge must be running to insert it: if not bridge.flags.running: logging.warn( "Skipping hashring insertion for non-running bridge: '%s'" % logSafely(bridge.fingerprint)) return index = 0 logging.debug("Inserting %s into splitter" % (logSafely(bridge.fingerprint))) for old_bridge in self.bridges[:]: if bridge.fingerprint == old_bridge.fingerprint: self.bridges[index] = bridge break index += 1 else: self.bridges.append(bridge) for ringname, (filterFn, subring) in self.filterRings.items(): if filterFn(bridge): subring.insert(bridge) logging.debug("Inserted bridge '%s' into '%s' sub hashring" % (logSafely(bridge.fingerprint), ringname))
def getBridges(self, pos, N=1): """Return **N** bridges appearing in this hashring after a position. :param bytes pos: The position to jump to. Any bridges returned will start at this position in the hashring, if there is a bridge assigned to that position. Otherwise, indexing will start at the first position after this one which has a bridge assigned to it. :param int N: The number of bridges to return. :rtype: list :returns: A list of :class:`~bridgedb.bridges.Bridge`s. """ forced = [] for _, _, count, subring in self.subrings: if len(subring) < count: count = len(subring) forced.extend(subring._getBridgeKeysAt(pos, count)) keys = [] for k in forced + self._getBridgeKeysAt(pos, N): if k not in keys: keys.append(k) else: logging.debug( "Got duplicate bridge %r in main hashring for position %r." % (logSafely(k.encode('hex')), pos.encode('hex'))) keys = keys[:N] keys.sort() #Do not return bridges from the same /16 bridges = [self.bridges[k] for k in keys] return bridges
def test_setSafeLogging_on(self): """Calls to ``logSafely()`` should return ``"[scrubbed]"`` for any arbitrary data when ``safe_logging`` is enabled. """ safelog.setSafeLogging(True) self.logger.warn("Got a connection from %s..." % safelog.logSafely(self.sensitiveData)) contents = self.logfile.value() self.assertIsNotNone(contents)
def test_setSafeLogging_off(self): """Calls to ``logSafely()`` should return the original data when ``safe_logging`` is disabled. """ safelog.setSafeLogging(False) self.logger.warn("Got a connection from %s..." % safelog.logSafely(self.sensitiveData)) contents = self.logfile.value() self.assertIsNotNone(contents)
def deduplicate(descriptors, statistics=False): """Deduplicate some descriptors, returning only the newest for each router. .. note:: If two descriptors for the same router are discovered, AND both descriptors have the **same** published timestamp, then the router's fingerprint WILL BE LOGGED ON PURPOSE, because we assume that router to be malicious (deliberately, or unintentionally). :param list descriptors: A list of :api:`stem.descriptor.server_descriptor.RelayDescriptor`s, :api:`stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor`s, or :api:`stem.descriptor.router_status_entry.RouterStatusEntryV2`s. :param bool statistics: If ``True``, log some extra statistics about the number of duplicates. :rtype: dict :returns: A dictionary mapping router fingerprints to their newest available descriptor. """ duplicates = {} newest = {} for descriptor in descriptors: fingerprint = descriptor.fingerprint logging.debug("Deduplicating %s descriptor for router %s" % (descriptor.__class__.__name__.rsplit('.', 1)[-1], safelog.logSafely(fingerprint))) if fingerprint in duplicates: duplicates[fingerprint].append(descriptor) else: duplicates[fingerprint] = [descriptor,] for fingerprint, dupes in duplicates.items(): dupes.sort(cmp=__cmp_published__) first = dupes.pop() newest[fingerprint] = first duplicates[fingerprint] = dupes if statistics: # sorted() won't sort by values (or anything that isn't the first item # in its container), period, no matter what the cmp function is. totals = sorted([(len(v), k,) for k, v in duplicates.viewitems()]) total = sum([k for (k, v) in totals]) bridges = len(duplicates) top = 10 if bridges >= 10 else bridges logging.info("Number of bridges with duplicates: %5d" % bridges) logging.info("Total duplicate descriptors: %5d" % total) logging.info("Bridges with the most duplicates (Top %d):" % top) for i, (subtotal, bridge) in zip(range(1, top + 1), totals[:top]): logging.info(" #%d %s: %d duplicates" % (i, bridge, subtotal)) logging.info("Descriptor deduplication finished.") return newest
def getBridges(self, pos, N=1, countryCode=None): """Return **N** bridges appearing in this hashring after a position. :param bytes pos: The position to jump to. Any bridges returned will start at this position in the hashring, if there is a bridge assigned to that position. Otherwise, indexing will start at the first position after this one which has a bridge assigned to it. :param int N: The number of bridges to return. :type countryCode: str or None :param countryCode: DOCDOC :rtype: list :returns: A list of :class:`~bridgedb.Bridges.Bridge`s. """ # XXX This can be removed after we determine if countryCode is ever # actually being used. It seems the countryCode should be passed in # from bridgedb.HTTPServer.WebResource.getBridgeRequestAnswer() in # order to hand out bridges which are believed to not be blocked in a # given country. if countryCode: logging.debug("getBridges: countryCode=%r" % countryCode) forced = [] for _, _, count, subring in self.subrings: if len(subring) < count: count = len(subring) forced.extend(subring._getBridgeKeysAt(pos, count)) keys = [ ] for k in forced + self._getBridgeKeysAt(pos, N): if k not in keys: keys.append(k) else: logging.debug( "Got duplicate bridge %r in main hashring for position %r." % (logSafely(k.encode('hex')), pos.encode('hex'))) keys = keys[:N] keys.sort() #Do not return bridges from the same /16 bridges = [ self.bridges[k] for k in keys ] return bridges
def getBridges(self, pos, N=1, countryCode=None): """Return **N** bridges appearing in this hashring after a position. :param bytes pos: The position to jump to. Any bridges returned will start at this position in the hashring, if there is a bridge assigned to that position. Otherwise, indexing will start at the first position after this one which has a bridge assigned to it. :param int N: The number of bridges to return. :type countryCode: str or None :param countryCode: DOCDOC :rtype: list :returns: A list of :class:`~bridgedb.Bridges.Bridge`s. """ # XXX This can be removed after we determine if countryCode is ever # actually being used. It seems the countryCode should be passed in # from bridgedb.HTTPServer.WebResource.getBridgeRequestAnswer() in # order to hand out bridges which are believed to not be blocked in a # given country. if countryCode: logging.debug("getBridges: countryCode=%r" % countryCode) forced = [] for _, _, count, subring in self.subrings: if len(subring) < count: count = len(subring) forced.extend(subring._getBridgeKeysAt(pos, count)) keys = [] for k in forced + self._getBridgeKeysAt(pos, N): if k not in keys: keys.append(k) else: logging.debug( "Got duplicate bridge %r in main hashring for position %r." % (logSafely(k.encode('hex')), pos.encode('hex'))) keys = keys[:N] keys.sort() #Do not return bridges from the same /16 bridges = [self.bridges[k] for k in keys] return bridges
def getBridges(self, pos, N=1, filterBySubnet=False): """Return **N** bridges appearing in this hashring after a position. :param bytes pos: The position to jump to. Any bridges returned will start at this position in the hashring, if there is a bridge assigned to that position. Otherwise, indexing will start at the first position after this one which has a bridge assigned to it. :param int N: The number of bridges to return. :rtype: list :returns: A list of :class:`~bridgedb.bridges.Bridge`s. """ forced = [] for _, _, count, subring in self.subrings: if len(subring) < count: count = len(subring) forced.extend(subring._getBridgeKeysAt(pos, count)) keys = [] # Oversample double the number we need, in case we need to # filter them and some are within the same subnet. for k in forced + self._getBridgeKeysAt(pos, N + N): if k not in keys: keys.append(k) else: logging.debug( "Got duplicate bridge %r in main hashring for position %r." % (logSafely(binascii.hexlify(k).decode('utf-8')), binascii.hexlify(pos).decode('utf-8'))) keys.sort() if filterBySubnet: bridges = self.filterDistinctSubnets(keys) else: bridges = [self.bridges[k] for k in keys] bridges = bridges[:N] logging.debug("Caller asked for N=%d, filterBySubnet=%s bridges. " "Returning %d bridges." % (N, filterBySubnet, len(bridges))) return bridges
def getBridges(self, pos, N=1, filterBySubnet=False): """Return **N** bridges appearing in this hashring after a position. :param bytes pos: The position to jump to. Any bridges returned will start at this position in the hashring, if there is a bridge assigned to that position. Otherwise, indexing will start at the first position after this one which has a bridge assigned to it. :param int N: The number of bridges to return. :rtype: list :returns: A list of :class:`~bridgedb.bridges.Bridge`s. """ forced = [] for _, _, count, subring in self.subrings: if len(subring) < count: count = len(subring) forced.extend(subring._getBridgeKeysAt(pos, count)) keys = [] # Oversample double the number we need, in case we need to # filter them and some are within the same subnet. for k in forced + self._getBridgeKeysAt(pos, N + N): if k not in keys: keys.append(k) else: logging.debug( "Got duplicate bridge %r in main hashring for position %r." % (logSafely(k.encode('hex')), pos.encode('hex'))) keys.sort() if filterBySubnet: bridges = self.filterDistinctSubnets(keys) else: bridges = [self.bridges[k] for k in keys] bridges = bridges[:N] return bridges
def runChecks(self, client): """Run checks on the incoming message, and only reply if they pass. 1. Check if the client's address is whitelisted. 2. If it's not whitelisted, check that the domain names, taken from the SMTP ``MAIL FROM:`` command and the email ``'From:'`` header, can be :func:`canonicalized <addr.canonicalizeEmailDomain>`. 3. Check that those canonical domains match. 4. If the incoming message is from a domain which supports DKIM signing, then run :func:`bridgedb.email.dkim.checkDKIM` as well. .. note:: Calling this method sets the ``canonicalFromEmail`` and :data:``canonicalDomainRules`` attributes of the :data:`incoming` message. :param client: An :api:`twisted.mail.smtp.Address`, which contains the client's email address, extracted from the ``'From:'`` header from the incoming email. :rtype: bool :returns: ``False`` if the checks didn't pass, ``True`` otherwise. """ # If the SMTP ``RCPT TO:`` domain name couldn't be canonicalized, then # we *should* have bailed at the SMTP layer, but we'll reject this # email again nonetheless: if not self.incoming.canonicalFromSMTP: logging.warn(("SMTP 'MAIL FROM' wasn't from a canonical domain " "for email from %s") % str(client)) return False # Allow whitelisted addresses through the canonicalization check: if str(client) in self.incoming.context.whitelist.keys(): self.incoming.canonicalFromEmail = client.domain logging.info("'From:' header contained whitelisted address: %s" % str(client)) # Straight up reject addresses in the EMAIL_BLACKLIST config option: elif str(client) in self.incoming.context.blacklist: logging.info("'From:' header contained blacklisted address: %s") return False else: logging.debug("Canonicalizing client email domain...") try: # The client's address was already checked to see if it came # from a supported domain and is a valid email address in # :meth:`getMailTo`, so we should just be able to re-extract # the canonical domain safely here: self.incoming.canonicalFromEmail = canonicalizeEmailDomain( client.domain, self.incoming.canon) logging.debug("Canonical email domain: %s" % self.incoming.canonicalFromEmail) except addr.UnsupportedDomain as error: logging.info("Domain couldn't be canonicalized: %s" % safelog.logSafely(client.domain)) return False # The canonical domains from the SMTP ``MAIL FROM:`` and the email # ``From:`` header should match: if self.incoming.canonicalFromSMTP != self.incoming.canonicalFromEmail: logging.error("SMTP/Email canonical domain mismatch!") logging.debug("Canonical domain mismatch: %s != %s" % (self.incoming.canonicalFromSMTP, self.incoming.canonicalFromEmail)) #return False self.incoming.domainRules = self.incoming.context.domainRules.get( self.incoming.canonicalFromEmail, list()) # If the domain's ``domainRules`` say to check DKIM verification # results, and those results look bad, reject this email: if not dkim.checkDKIM(self.incoming.message, self.incoming.domainRules): return False # If fuzzy matching is enabled via the EMAIL_FUZZY_MATCH setting, then # calculate the Levenshtein String Distance (see # :func:`~bridgedb.util.levenshteinDistance`): if self.incoming.context.fuzzyMatch != 0: for blacklistedAddress in self.incoming.context.blacklist: distance = levenshteinDistance(str(client), blacklistedAddress) if distance <= self.incoming.context.fuzzyMatch: logging.info( "Fuzzy-matched %s to blacklisted address %s!" % (self.incoming.canonicalFromEmail, blacklistedAddress)) return False return True
def getBridgesForIP(self, ip, epoch, N=1, countryCode=None, bridgeFilterRules=None): """Return a list of bridges to give to a user. :param str ip: The user's IP address, as a dotted quad. :param str epoch: The time period when we got this request. This can be any string, so long as it changes with every period. :param int N: The number of bridges to try to give back. (default: 1) :param str countryCode: DOCDOC (default: None) :param list bridgeFilterRules: A list of callables used filter the bridges returned in the response to the client. See :mod:`~bridgedb.Filters`. :rtype: list :return: A list of :class:`~bridgedb.Bridges.Bridge`s to include in the response. See :meth:`bridgedb.HTTPServer.WebResource.getBridgeRequestAnswer` for an example of how this is used. """ logging.info("Attempting to return %d bridges to client %s..." % (N, ip)) if not bridgeFilterRules: bridgeFilterRules=[] if not len(self.splitter): logging.warn("Bailing! Splitter has zero bridges!") return [] logging.debug("Bridges in splitter:\t%d" % len(self.splitter)) logging.debug("Client request epoch:\t%s" % epoch) logging.debug("Active bridge filters:\t%s" % ' '.join([x.func_name for x in bridgeFilterRules])) area = self.areaMapper(ip) logging.debug("IP mapped to area:\t%s" % logSafely("{0}.0/24".format(area))) key1 = '' pos = 0 n = self.nClusters # only one of ip categories or area clustering is active # try to match the request to an ip category for category in self.categories: # IP Categories if category.contains(ip): g = filterAssignBridgesToRing(self.splitter.hmac, self.nClusters + len(self.categories), n) bridgeFilterRules.append(g) logging.info("category<%s>%s", epoch, logSafely(area)) pos = self.areaOrderHmac("category<%s>%s" % (epoch, area)) key1 = getHMAC(self.splitter.key, "Order-Bridges-In-Ring-%d" % n) break n += 1 # if no category matches, use area clustering else: # IP clustering h = int( self.areaClusterHmac(area)[:8], 16) # length of numClusters clusterNum = h % self.nClusters g = filterAssignBridgesToRing(self.splitter.hmac, self.nClusters + len(self.categories), clusterNum) bridgeFilterRules.append(g) pos = self.areaOrderHmac("<%s>%s" % (epoch, area)) key1 = getHMAC(self.splitter.key, "Order-Bridges-In-Ring-%d" % clusterNum) # try to find a cached copy ruleset = frozenset(bridgeFilterRules) # See if we have a cached copy of the ring, # otherwise, add a new ring and populate it if ruleset in self.splitter.filterRings.keys(): logging.debug("Cache hit %s" % ruleset) _,ring = self.splitter.filterRings[ruleset] # else create the ring and populate it else: logging.debug("Cache miss %s" % ruleset) ring = bridgedb.Bridges.BridgeRing(key1, self.answerParameters) self.splitter.addRing(ring, ruleset, filterBridgesByRules(bridgeFilterRules), populate_from=self.splitter.bridges) # get an appropriate number of bridges numBridgesToReturn = getNumBridgesPerAnswer(ring, max_bridges_per_answer=N) answer = ring.getBridges(pos, numBridgesToReturn) return answer
def runChecks(self, client): """Run checks on the incoming message, and only reply if they pass. 1. Check if the client's address is whitelisted. 2. If it's not whitelisted, check that the domain names, taken from the SMTP ``MAIL FROM:`` command and the email ``'From:'`` header, can be :func:`canonicalized <addr.canonicalizeEmailDomain>`. 3. Check that those canonical domains match. 4. If the incoming message is from a domain which supports DKIM signing, then run :func:`bridgedb.email.dkim.checkDKIM` as well. .. note:: Calling this method sets the ``canonicalFromEmail`` and :data:``canonicalDomainRules`` attributes of the :data:`incoming` message. :param client: An :api:`twisted.mail.smtp.Address`, which contains the client's email address, extracted from the ``'From:'`` header from the incoming email. :rtype: bool :returns: ``False`` if the checks didn't pass, ``True`` otherwise. """ # If the SMTP ``RCPT TO:`` domain name couldn't be canonicalized, then # we *should* have bailed at the SMTP layer, but we'll reject this # email again nonetheless: if not self.incoming.canonicalFromSMTP: logging.warn(("SMTP 'MAIL FROM' wasn't from a canonical domain " "for email from %s") % str(client)) return False # Allow whitelisted addresses through the canonicalization check: if str(client) in self.incoming.context.whitelist.keys(): self.incoming.canonicalFromEmail = client.domain logging.info("'From:' header contained whitelisted address: %s" % str(client)) # Straight up reject addresses in the EMAIL_BLACKLIST config option: elif str(client) in self.incoming.context.blacklist: logging.info("'From:' header contained blacklisted address: %s") return False else: logging.debug("Canonicalizing client email domain...") try: # The client's address was already checked to see if it came # from a supported domain and is a valid email address in # :meth:`getMailTo`, so we should just be able to re-extract # the canonical domain safely here: self.incoming.canonicalFromEmail = canonicalizeEmailDomain( client.domain, self.incoming.canon) logging.debug("Canonical email domain: %s" % self.incoming.canonicalFromEmail) except addr.UnsupportedDomain as error: logging.info("Domain couldn't be canonicalized: %s" % safelog.logSafely(client.domain)) return False # The canonical domains from the SMTP ``MAIL FROM:`` and the email # ``From:`` header should match: if self.incoming.canonicalFromSMTP != self.incoming.canonicalFromEmail: logging.error("SMTP/Email canonical domain mismatch!") logging.debug("Canonical domain mismatch: %s != %s" % (self.incoming.canonicalFromSMTP, self.incoming.canonicalFromEmail)) #return False self.incoming.domainRules = self.incoming.context.domainRules.get( self.incoming.canonicalFromEmail, list()) # If the domain's ``domainRules`` say to check DKIM verification # results, and those results look bad, reject this email: if not dkim.checkDKIM(self.incoming.message, self.incoming.domainRules): return False # If fuzzy matching is enabled via the EMAIL_FUZZY_MATCH setting, then # calculate the Levenshtein String Distance (see # :func:`~bridgedb.util.levenshteinDistance`): if self.incoming.context.fuzzyMatch != 0: for blacklistedAddress in self.incoming.context.blacklist: distance = levenshteinDistance(str(client), blacklistedAddress) if distance <= self.incoming.context.fuzzyMatch: logging.info("Fuzzy-matched %s to blacklisted address %s!" % (self.incoming.canonicalFromEmail, blacklistedAddress)) return False return True