Example #1
0
    def insert(self, bridge):
        """Insert a bridge into all appropriate sub-hashrings.

        For all sub-hashrings, the ``bridge`` will only be added iff it passes
        the filter functions for that sub-hashring.

        :type bridge: :class:`~bridgedb.Bridges.Bridge`
        :param bridge: The bridge to add.
        """
        # The bridge must be running to insert it:
        if not bridge.flags.running:
            logging.warn(
                "Skipping hashring insertion for non-running bridge: '%s'"
                % logSafely(bridge.fingerprint))
            return

        index = 0
        logging.debug("Inserting %s into splitter"
                      % (logSafely(bridge.fingerprint)))
        for old_bridge in self.bridges[:]:
            if bridge.fingerprint == old_bridge.fingerprint:
                self.bridges[index] = bridge
                break
            index += 1
        else:
            self.bridges.append(bridge)
        for ringname, (filterFn, subring) in self.filterRings.items():
            if filterFn(bridge):
                subring.insert(bridge)
                logging.debug("Inserted bridge '%s' into '%s' sub hashring"
                              % (logSafely(bridge.fingerprint), ringname))
Example #2
0
    def insert(self, bridge):
        """Insert a bridge into all appropriate sub-hashrings.

        For all sub-hashrings, the ``bridge`` will only be added iff it passes
        the filter functions for that sub-hashring.

        :type bridge: :class:`~bridgedb.Bridges.Bridge`
        :param bridge: The bridge to add.
        """
        # The bridge must be running to insert it:
        if not bridge.flags.running:
            logging.warn(
                "Skipping hashring insertion for non-running bridge: '%s'" %
                logSafely(bridge.fingerprint))
            return

        index = 0
        logging.debug("Inserting %s into splitter" %
                      (logSafely(bridge.fingerprint)))
        for old_bridge in self.bridges[:]:
            if bridge.fingerprint == old_bridge.fingerprint:
                self.bridges[index] = bridge
                break
            index += 1
        else:
            self.bridges.append(bridge)
        for ringname, (filterFn, subring) in self.filterRings.items():
            if filterFn(bridge):
                subring.insert(bridge)
                logging.debug("Inserted bridge '%s' into '%s' sub hashring" %
                              (logSafely(bridge.fingerprint), ringname))
Example #3
0
    def getBridges(self, pos, N=1):
        """Return **N** bridges appearing in this hashring after a position.

        :param bytes pos: The position to jump to. Any bridges returned will
            start at this position in the hashring, if there is a bridge
            assigned to that position. Otherwise, indexing will start at the
            first position after this one which has a bridge assigned to it.
        :param int N: The number of bridges to return.
        :rtype: list
        :returns: A list of :class:`~bridgedb.bridges.Bridge`s.
        """
        forced = []
        for _, _, count, subring in self.subrings:
            if len(subring) < count:
                count = len(subring)
            forced.extend(subring._getBridgeKeysAt(pos, count))

        keys = []
        for k in forced + self._getBridgeKeysAt(pos, N):
            if k not in keys:
                keys.append(k)
            else:
                logging.debug(
                    "Got duplicate bridge %r in main hashring for position %r."
                    % (logSafely(k.encode('hex')), pos.encode('hex')))
        keys = keys[:N]
        keys.sort()

        #Do not return bridges from the same /16
        bridges = [self.bridges[k] for k in keys]

        return bridges
Example #4
0
 def test_setSafeLogging_on(self):
     """Calls to ``logSafely()`` should return ``"[scrubbed]"`` for any
     arbitrary data when ``safe_logging`` is enabled.
     """
     safelog.setSafeLogging(True)
     self.logger.warn("Got a connection from %s..." %
                      safelog.logSafely(self.sensitiveData))
     contents = self.logfile.value()
     self.assertIsNotNone(contents)
Example #5
0
 def test_setSafeLogging_off(self):
     """Calls to ``logSafely()`` should return the original data when
     ``safe_logging`` is disabled.
     """
     safelog.setSafeLogging(False)
     self.logger.warn("Got a connection from %s..." %
                      safelog.logSafely(self.sensitiveData))
     contents = self.logfile.value()
     self.assertIsNotNone(contents)
Example #6
0
 def test_setSafeLogging_on(self):
     """Calls to ``logSafely()`` should return ``"[scrubbed]"`` for any
     arbitrary data when ``safe_logging`` is enabled.
     """
     safelog.setSafeLogging(True)
     self.logger.warn("Got a connection from %s..."
                      % safelog.logSafely(self.sensitiveData))
     contents = self.logfile.value()
     self.assertIsNotNone(contents)
Example #7
0
 def test_setSafeLogging_off(self):
     """Calls to ``logSafely()`` should return the original data when
     ``safe_logging`` is disabled.
     """
     safelog.setSafeLogging(False)
     self.logger.warn("Got a connection from %s..."
                      % safelog.logSafely(self.sensitiveData))
     contents = self.logfile.value()
     self.assertIsNotNone(contents)
Example #8
0
def deduplicate(descriptors, statistics=False):
    """Deduplicate some descriptors, returning only the newest for each router.

    .. note:: If two descriptors for the same router are discovered, AND both
        descriptors have the **same** published timestamp, then the router's
        fingerprint WILL BE LOGGED ON PURPOSE, because we assume that router
        to be malicious (deliberately, or unintentionally).

    :param list descriptors: A list of
        :api:`stem.descriptor.server_descriptor.RelayDescriptor`s,
        :api:`stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor`s,
        or :api:`stem.descriptor.router_status_entry.RouterStatusEntryV2`s.
    :param bool statistics: If ``True``, log some extra statistics about the
        number of duplicates.
    :rtype: dict
    :returns: A dictionary mapping router fingerprints to their newest
        available descriptor.
    """
    duplicates = {}
    newest = {}

    for descriptor in descriptors:
        fingerprint = descriptor.fingerprint
        logging.debug("Deduplicating %s descriptor for router %s"
                      % (descriptor.__class__.__name__.rsplit('.', 1)[-1],
                         safelog.logSafely(fingerprint)))
        if fingerprint in duplicates:
            duplicates[fingerprint].append(descriptor)
        else:
            duplicates[fingerprint] = [descriptor,]

    for fingerprint, dupes in duplicates.items():
        dupes.sort(cmp=__cmp_published__)
        first = dupes.pop()
        newest[fingerprint] = first
        duplicates[fingerprint] = dupes

    if statistics:
        # sorted() won't sort by values (or anything that isn't the first item
        # in its container), period, no matter what the cmp function is.
        totals  = sorted([(len(v), k,) for k, v in duplicates.viewitems()])
        total   = sum([k for (k, v) in totals])
        bridges = len(duplicates)
        top     = 10 if bridges >= 10 else bridges
        logging.info("Number of bridges with duplicates: %5d" % bridges)
        logging.info("Total duplicate descriptors:       %5d" % total)
        logging.info("Bridges with the most duplicates (Top %d):" % top)
        for i, (subtotal, bridge) in zip(range(1, top + 1), totals[:top]):
            logging.info("  #%d %s: %d duplicates" % (i, bridge, subtotal))

    logging.info("Descriptor deduplication finished.")

    return newest
Example #9
0
    def getBridges(self, pos, N=1, countryCode=None):
        """Return **N** bridges appearing in this hashring after a position.

        :param bytes pos: The position to jump to. Any bridges returned will
                          start at this position in the hashring, if there is
                          a bridge assigned to that position. Otherwise,
                          indexing will start at the first position after this
                          one which has a bridge assigned to it.
        :param int N: The number of bridges to return.
        :type countryCode: str or None
        :param countryCode: DOCDOC
        :rtype: list
        :returns: A list of :class:`~bridgedb.Bridges.Bridge`s.
        """
        # XXX This can be removed after we determine if countryCode is ever
        # actually being used. It seems the countryCode should be passed in
        # from bridgedb.HTTPServer.WebResource.getBridgeRequestAnswer() in
        # order to hand out bridges which are believed to not be blocked in a
        # given country.
        if countryCode:
            logging.debug("getBridges: countryCode=%r" % countryCode)

        forced = []
        for _, _, count, subring in self.subrings:
            if len(subring) < count:
                count = len(subring)
            forced.extend(subring._getBridgeKeysAt(pos, count))

        keys = [ ]
        for k in forced + self._getBridgeKeysAt(pos, N):
            if k not in keys:
                keys.append(k)
            else:
                logging.debug(
                    "Got duplicate bridge %r in main hashring for position %r."
                    % (logSafely(k.encode('hex')), pos.encode('hex')))
        keys = keys[:N]
        keys.sort()

        #Do not return bridges from the same /16
        bridges = [ self.bridges[k] for k in keys ]

        return bridges
Example #10
0
    def getBridges(self, pos, N=1, countryCode=None):
        """Return **N** bridges appearing in this hashring after a position.

        :param bytes pos: The position to jump to. Any bridges returned will
                          start at this position in the hashring, if there is
                          a bridge assigned to that position. Otherwise,
                          indexing will start at the first position after this
                          one which has a bridge assigned to it.
        :param int N: The number of bridges to return.
        :type countryCode: str or None
        :param countryCode: DOCDOC
        :rtype: list
        :returns: A list of :class:`~bridgedb.Bridges.Bridge`s.
        """
        # XXX This can be removed after we determine if countryCode is ever
        # actually being used. It seems the countryCode should be passed in
        # from bridgedb.HTTPServer.WebResource.getBridgeRequestAnswer() in
        # order to hand out bridges which are believed to not be blocked in a
        # given country.
        if countryCode:
            logging.debug("getBridges: countryCode=%r" % countryCode)

        forced = []
        for _, _, count, subring in self.subrings:
            if len(subring) < count:
                count = len(subring)
            forced.extend(subring._getBridgeKeysAt(pos, count))

        keys = []
        for k in forced + self._getBridgeKeysAt(pos, N):
            if k not in keys:
                keys.append(k)
            else:
                logging.debug(
                    "Got duplicate bridge %r in main hashring for position %r."
                    % (logSafely(k.encode('hex')), pos.encode('hex')))
        keys = keys[:N]
        keys.sort()

        #Do not return bridges from the same /16
        bridges = [self.bridges[k] for k in keys]

        return bridges
Example #11
0
    def getBridges(self, pos, N=1, filterBySubnet=False):
        """Return **N** bridges appearing in this hashring after a position.

        :param bytes pos: The position to jump to. Any bridges returned will
            start at this position in the hashring, if there is a bridge
            assigned to that position. Otherwise, indexing will start at the
            first position after this one which has a bridge assigned to it.
        :param int N: The number of bridges to return.
        :rtype: list
        :returns: A list of :class:`~bridgedb.bridges.Bridge`s.
        """
        forced = []
        for _, _, count, subring in self.subrings:
            if len(subring) < count:
                count = len(subring)
            forced.extend(subring._getBridgeKeysAt(pos, count))

        keys = []

        # Oversample double the number we need, in case we need to
        # filter them and some are within the same subnet.
        for k in forced + self._getBridgeKeysAt(pos, N + N):
            if k not in keys:
                keys.append(k)
            else:
                logging.debug(
                    "Got duplicate bridge %r in main hashring for position %r."
                    % (logSafely(binascii.hexlify(k).decode('utf-8')),
                       binascii.hexlify(pos).decode('utf-8')))
        keys.sort()

        if filterBySubnet:
            bridges = self.filterDistinctSubnets(keys)
        else:
            bridges = [self.bridges[k] for k in keys]

        bridges = bridges[:N]
        logging.debug("Caller asked for N=%d, filterBySubnet=%s bridges.  "
                      "Returning %d bridges." %
                      (N, filterBySubnet, len(bridges)))

        return bridges
Example #12
0
    def getBridges(self, pos, N=1, filterBySubnet=False):
        """Return **N** bridges appearing in this hashring after a position.

        :param bytes pos: The position to jump to. Any bridges returned will
            start at this position in the hashring, if there is a bridge
            assigned to that position. Otherwise, indexing will start at the
            first position after this one which has a bridge assigned to it.
        :param int N: The number of bridges to return.
        :rtype: list
        :returns: A list of :class:`~bridgedb.bridges.Bridge`s.
        """
        forced = []
        for _, _, count, subring in self.subrings:
            if len(subring) < count:
                count = len(subring)
            forced.extend(subring._getBridgeKeysAt(pos, count))

        keys = []

        # Oversample double the number we need, in case we need to
        # filter them and some are within the same subnet.
        for k in forced + self._getBridgeKeysAt(pos, N + N):
            if k not in keys:
                keys.append(k)
            else:
                logging.debug(
                    "Got duplicate bridge %r in main hashring for position %r."
                    % (logSafely(k.encode('hex')), pos.encode('hex')))
        keys.sort()

        if filterBySubnet:
            bridges = self.filterDistinctSubnets(keys)
        else:
            bridges = [self.bridges[k] for k in keys]

        bridges = bridges[:N]

        return bridges
Example #13
0
    def runChecks(self, client):
        """Run checks on the incoming message, and only reply if they pass.

        1. Check if the client's address is whitelisted.

        2. If it's not whitelisted, check that the domain names, taken from
        the SMTP ``MAIL FROM:`` command and the email ``'From:'`` header, can
        be :func:`canonicalized <addr.canonicalizeEmailDomain>`.

        3. Check that those canonical domains match.

        4. If the incoming message is from a domain which supports DKIM
        signing, then run :func:`bridgedb.email.dkim.checkDKIM` as well.

        .. note:: Calling this method sets the ``canonicalFromEmail`` and
            :data:``canonicalDomainRules`` attributes of the :data:`incoming`
            message.

        :param client: An :api:`twisted.mail.smtp.Address`, which contains
            the client's email address, extracted from the ``'From:'`` header
            from the incoming email.
        :rtype: bool
        :returns: ``False`` if the checks didn't pass, ``True`` otherwise.
        """
        # If the SMTP ``RCPT TO:`` domain name couldn't be canonicalized, then
        # we *should* have bailed at the SMTP layer, but we'll reject this
        # email again nonetheless:
        if not self.incoming.canonicalFromSMTP:
            logging.warn(("SMTP 'MAIL FROM' wasn't from a canonical domain "
                          "for email from %s") % str(client))
            return False

        # Allow whitelisted addresses through the canonicalization check:
        if str(client) in self.incoming.context.whitelist.keys():
            self.incoming.canonicalFromEmail = client.domain
            logging.info("'From:' header contained whitelisted address: %s" %
                         str(client))
        # Straight up reject addresses in the EMAIL_BLACKLIST config option:
        elif str(client) in self.incoming.context.blacklist:
            logging.info("'From:' header contained blacklisted address: %s")
            return False
        else:
            logging.debug("Canonicalizing client email domain...")
            try:
                # The client's address was already checked to see if it came
                # from a supported domain and is a valid email address in
                # :meth:`getMailTo`, so we should just be able to re-extract
                # the canonical domain safely here:
                self.incoming.canonicalFromEmail = canonicalizeEmailDomain(
                    client.domain, self.incoming.canon)
                logging.debug("Canonical email domain: %s" %
                              self.incoming.canonicalFromEmail)
            except addr.UnsupportedDomain as error:
                logging.info("Domain couldn't be canonicalized: %s" %
                             safelog.logSafely(client.domain))
                return False

        # The canonical domains from the SMTP ``MAIL FROM:`` and the email
        # ``From:`` header should match:
        if self.incoming.canonicalFromSMTP != self.incoming.canonicalFromEmail:
            logging.error("SMTP/Email canonical domain mismatch!")
            logging.debug("Canonical domain mismatch: %s != %s" %
                          (self.incoming.canonicalFromSMTP,
                           self.incoming.canonicalFromEmail))
            #return False

        self.incoming.domainRules = self.incoming.context.domainRules.get(
            self.incoming.canonicalFromEmail, list())

        # If the domain's ``domainRules`` say to check DKIM verification
        # results, and those results look bad, reject this email:
        if not dkim.checkDKIM(self.incoming.message,
                              self.incoming.domainRules):
            return False

        # If fuzzy matching is enabled via the EMAIL_FUZZY_MATCH setting, then
        # calculate the Levenshtein String Distance (see
        # :func:`~bridgedb.util.levenshteinDistance`):
        if self.incoming.context.fuzzyMatch != 0:
            for blacklistedAddress in self.incoming.context.blacklist:
                distance = levenshteinDistance(str(client), blacklistedAddress)
                if distance <= self.incoming.context.fuzzyMatch:
                    logging.info(
                        "Fuzzy-matched %s to blacklisted address %s!" %
                        (self.incoming.canonicalFromEmail, blacklistedAddress))
                    return False

        return True
Example #14
0
    def getBridgesForIP(self, ip, epoch, N=1, countryCode=None,
                        bridgeFilterRules=None):
        """Return a list of bridges to give to a user.

        :param str ip: The user's IP address, as a dotted quad.
        :param str epoch: The time period when we got this request.  This can
                          be any string, so long as it changes with every
                          period.
        :param int N: The number of bridges to try to give back. (default: 1)
        :param str countryCode: DOCDOC (default: None)
        :param list bridgeFilterRules: A list of callables used filter the
                                       bridges returned in the response to the
                                       client. See :mod:`~bridgedb.Filters`.
        :rtype: list
        :return: A list of :class:`~bridgedb.Bridges.Bridge`s to include in
                 the response. See
                 :meth:`bridgedb.HTTPServer.WebResource.getBridgeRequestAnswer`
                 for an example of how this is used.
        """
        logging.info("Attempting to return %d bridges to client %s..."
                     % (N, ip))

        if not bridgeFilterRules:
            bridgeFilterRules=[]

        if not len(self.splitter):
            logging.warn("Bailing! Splitter has zero bridges!")
            return []

        logging.debug("Bridges in splitter:\t%d" % len(self.splitter))
        logging.debug("Client request epoch:\t%s" % epoch)
        logging.debug("Active bridge filters:\t%s"
                      % ' '.join([x.func_name for x in bridgeFilterRules]))

        area = self.areaMapper(ip)
        logging.debug("IP mapped to area:\t%s"
                      % logSafely("{0}.0/24".format(area)))

        key1 = ''
        pos = 0
        n = self.nClusters

        # only one of ip categories or area clustering is active
        # try to match the request to an ip category
        for category in self.categories:
            # IP Categories
            if category.contains(ip):
                g = filterAssignBridgesToRing(self.splitter.hmac,
                                              self.nClusters +
                                              len(self.categories),
                                              n)
                bridgeFilterRules.append(g)
                logging.info("category<%s>%s", epoch, logSafely(area))
                pos = self.areaOrderHmac("category<%s>%s" % (epoch, area))
                key1 = getHMAC(self.splitter.key,
                               "Order-Bridges-In-Ring-%d" % n)
                break
            n += 1

        # if no category matches, use area clustering
        else:
            # IP clustering
            h = int( self.areaClusterHmac(area)[:8], 16)
            # length of numClusters
            clusterNum = h % self.nClusters

            g = filterAssignBridgesToRing(self.splitter.hmac,
                                          self.nClusters +
                                          len(self.categories),
                                          clusterNum)
            bridgeFilterRules.append(g)
            pos = self.areaOrderHmac("<%s>%s" % (epoch, area))
            key1 = getHMAC(self.splitter.key,
                           "Order-Bridges-In-Ring-%d" % clusterNum)

        # try to find a cached copy
        ruleset = frozenset(bridgeFilterRules)

        # See if we have a cached copy of the ring,
        # otherwise, add a new ring and populate it
        if ruleset in self.splitter.filterRings.keys():
            logging.debug("Cache hit %s" % ruleset)
            _,ring = self.splitter.filterRings[ruleset]

        # else create the ring and populate it
        else:
            logging.debug("Cache miss %s" % ruleset)
            ring = bridgedb.Bridges.BridgeRing(key1, self.answerParameters)
            self.splitter.addRing(ring,
                                  ruleset,
                                  filterBridgesByRules(bridgeFilterRules),
                                  populate_from=self.splitter.bridges)

        # get an appropriate number of bridges
        numBridgesToReturn = getNumBridgesPerAnswer(ring,
                                                    max_bridges_per_answer=N)
        answer = ring.getBridges(pos, numBridgesToReturn)
        return answer
Example #15
0
    def runChecks(self, client):
        """Run checks on the incoming message, and only reply if they pass.

        1. Check if the client's address is whitelisted.

        2. If it's not whitelisted, check that the domain names, taken from
        the SMTP ``MAIL FROM:`` command and the email ``'From:'`` header, can
        be :func:`canonicalized <addr.canonicalizeEmailDomain>`.

        3. Check that those canonical domains match.

        4. If the incoming message is from a domain which supports DKIM
        signing, then run :func:`bridgedb.email.dkim.checkDKIM` as well.

        .. note:: Calling this method sets the ``canonicalFromEmail`` and
            :data:``canonicalDomainRules`` attributes of the :data:`incoming`
            message.

        :param client: An :api:`twisted.mail.smtp.Address`, which contains
            the client's email address, extracted from the ``'From:'`` header
            from the incoming email.
        :rtype: bool
        :returns: ``False`` if the checks didn't pass, ``True`` otherwise.
        """
        # If the SMTP ``RCPT TO:`` domain name couldn't be canonicalized, then
        # we *should* have bailed at the SMTP layer, but we'll reject this
        # email again nonetheless:
        if not self.incoming.canonicalFromSMTP:
            logging.warn(("SMTP 'MAIL FROM' wasn't from a canonical domain "
                          "for email from %s") % str(client))
            return False

        # Allow whitelisted addresses through the canonicalization check:
        if str(client) in self.incoming.context.whitelist.keys():
            self.incoming.canonicalFromEmail = client.domain
            logging.info("'From:' header contained whitelisted address: %s"
                         % str(client))
        # Straight up reject addresses in the EMAIL_BLACKLIST config option:
        elif str(client) in self.incoming.context.blacklist:
            logging.info("'From:' header contained blacklisted address: %s")
            return False
        else:
            logging.debug("Canonicalizing client email domain...")
            try:
                # The client's address was already checked to see if it came
                # from a supported domain and is a valid email address in
                # :meth:`getMailTo`, so we should just be able to re-extract
                # the canonical domain safely here:
                self.incoming.canonicalFromEmail = canonicalizeEmailDomain(
                    client.domain, self.incoming.canon)
                logging.debug("Canonical email domain: %s"
                              % self.incoming.canonicalFromEmail)
            except addr.UnsupportedDomain as error:
                logging.info("Domain couldn't be canonicalized: %s"
                             % safelog.logSafely(client.domain))
                return False

        # The canonical domains from the SMTP ``MAIL FROM:`` and the email
        # ``From:`` header should match:
        if self.incoming.canonicalFromSMTP != self.incoming.canonicalFromEmail:
            logging.error("SMTP/Email canonical domain mismatch!")
            logging.debug("Canonical domain mismatch: %s != %s"
                          % (self.incoming.canonicalFromSMTP,
                             self.incoming.canonicalFromEmail))
            #return False

        self.incoming.domainRules = self.incoming.context.domainRules.get(
            self.incoming.canonicalFromEmail, list())

        # If the domain's ``domainRules`` say to check DKIM verification
        # results, and those results look bad, reject this email:
        if not dkim.checkDKIM(self.incoming.message, self.incoming.domainRules):
            return False

        # If fuzzy matching is enabled via the EMAIL_FUZZY_MATCH setting, then
        # calculate the Levenshtein String Distance (see
        # :func:`~bridgedb.util.levenshteinDistance`):
        if self.incoming.context.fuzzyMatch != 0:
            for blacklistedAddress in self.incoming.context.blacklist:
                distance = levenshteinDistance(str(client), blacklistedAddress)
                if distance <= self.incoming.context.fuzzyMatch:
                    logging.info("Fuzzy-matched %s to blacklisted address %s!"
                                 % (self.incoming.canonicalFromEmail,
                                    blacklistedAddress))
                    return False

        return True