Exemple #1
0
def nearby_probes_diff_ldns(svl, rmask=16):

    print("reducing svl to only probes with public ldns")
    svl = [sv for sv in svl if ipp.is_public(sv.get_ldns())]
    print("getting asn descriptor list...")
    asvl = vv.asn_svl(svl)

    nearbies = list()
    strnearbies = list()

    for asn in asvl:
        if asn is None:
            continue
        ssvl = vv.subnet_svl(asvl[asn], 16)
        for subnet in ssvl:
            if subnet is None:
                continue
            if len(ssvl[subnet]) > 1:
                csvl = vv.country_svl(ssvl[subnet])
                for country in csvl:
                    if country is None:
                        continue
                    if len(csvl[country]) > 1:
                        osvl = vv.owner_svl(csvl[country])
                        for owner in osvl:
                            if owner is None:
                                continue
                            if len(osvl[owner]) > 1:
                                resolvers = [z.get_ldns() for z in osvl[owner]]
                                # collapse redundant resolvers in the same
                                # subnet (e.g., 8.8.8.8 and 8.8.4.4 -> 8.8.x.x)
                                fmtmask = ipp.make_v4_prefix_mask(rmask)
                                r2 = defaultdict(list)
                                for r in resolvers:
                                    r2[r & fmtmask].append(r)
                                # keep resolvers with at least 1 probe //2 probes
                                keep = list()
                                k2 = list()
                                for z in r2:
                                    if len(r2[z]) > 1:
                                        # // keep += r2[z]
                                        k2.append(z)
                                    keep += r2[z]
                                #if len(k2) > 0:
                                if len(r2) > 1:
                                    print("has different resolvers!!!")
                                if len(keep
                                       ) > 1:  # if there's stuff to compare
                                    print("found some!!")
                                    print("asn: " + str(asn))
                                    svl = [sv for sv in osvl[owner] if sv.get_ldns() \
                                            in keep]
                                    nearbies.append(svl)
                                    strnearbies.append(["|~"+ipp.int2ip(z.get_ldns())+\
                                            "__"+ipp.int2ip(z.get_ip())+"~|" \
                                            for z in svl])
    df.overwrite(plotsdir + "nearbies.csv", df.list2col(strnearbies))
    logger.warning("nearbies: " + str(len(nearbies)))
    return nearbies
Exemple #2
0
 def sees_private_ldns(self):
     tmp = self.get_probe_info()
     if tmp is not None:
         ip = tmp['ipv4']['perceived_ldns']
         if IP(ipp.int2ip(ip) + "/32").iptype() == "PUBLIC":
             return False
     return True
Exemple #3
0
    def get_owner(self):
        if hasattr(self, "owner"):
            if self.owner is not None:
                return self.owner
        self.owner = dt.get_owner_name(ipp.int2ip(self.get_ip()))

        return self.owner
Exemple #4
0
    def __init__(self, d, mask=32, oddballs=False):
        '''
        :param d: a single subdict d from the dict dd output from
            window_to_dicts()
        :param mask: prefix mask t use on domain IPs
        :param oddballs: if True, will include non-public IPs (10.x.x.x, etc); if
            False, will only include public IPs

        container for a set of query results from a single probe

        NOTE: mask is only applied to domain IPs (not the probe's IP)
        NOTE: after mask is applied, original IPs are not preserved; for
            comparisons, separate sv's should be constructed
        '''
        self.vec = defaultdict(ddfloat)  # {domain: {ans: cum. weight}}
        fmtmask = ipp.make_v4_prefix_mask(mask)
        self.query_count = defaultdict(float)
        self.answer_count = defaultdict(float)
        self.inds = defaultdict(list)
        for i, dom in enumerate(d['domains']):
            query_count = float(len(d['ind'][i]))
            answer_count = float(len(d['answers'][i]))
            qaratio = query_count / answer_count
            self.query_count[dom] = query_count
            self.answer_count[dom] = answer_count
            self.inds[dom] = d['ind'][i]
            for ip in d['answers'][i]:
                ipm = ip & fmtmask
                ipstr = ipp.int2ip(ipm)
                if IP(ipstr + "/32").iptype() == "PUBLIC" or oddballs:
                    self.vec[dom][ipm] += qaratio
            if len(self.vec[dom]) < 1:
                del self.vec[dom]
            else:
                self.vec[dom] = dict(self.vec[dom])
        self.vec = dict(self.vec)
        self.mask = mask
        self.ip = set()
        for ipset in d['probe_ip']:
            for ip in ipset:
                if not isinstance(ip, numbers.Number):
                    continue
                self.ip.add(ip)
        self.ldns = set()
        for ipset in d['ldns']:
            for ip in ipset:
                if not isinstance(ip, numbers.Number):
                    continue
                self.ldns.add(ip)
        self.ldns = list(self.ldns)
        self.id = d['_id']
        self.probe_info = None
        self.owner = None
Exemple #5
0
def dom_traits(svld, anssets):
    '''
    :param svld: output from arrange_self_data()
    :return: dict {<trait>: {<trait val>: {dom: [clients]}}}, where 'clients'
    refers to the list of clients that witnessed said trait value from said
    domain

    traits include:
        ansm -> order of magnitude (bins) of the number of answers
        spacem -> order of magnitude (log bins) of the number of IPs seen
            across all probes
        prefix -> number of bits in registered prefix (TODO)
        shared -> bool: does dom'full IP set intersect with that of another dom
            NOTE: this is technically difficult to know; for simplicity, I'll
            start by only comparing to what I see from my data; manual
            inspection or something else is necessary to really say this or not
        lpm -> longest prefix match between IPs in dom's full answer space
        spm -> shorted prefix match between IPs in dom's full answer space
        pilpm -> the (average) longest prefix match for EACH IP. For example,
            the 4 bit IPs [1100, 1101, 0110, 0111] would have a pilpm of 3: the
            some IP's match '110', and the rest match '011', which both have
            a length of 3 bits and in turn average to (3+3+3+3/4) = 3 bits
    '''
    # {<trait>: {<trait val>: {dom: [clients]}}}
    dtd = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
    checked = set()
    for a in anssets:
        if len(anssets[a]) == 0:
            continue
        checked.add(a)
        dtd['spacem'][int(round(log(len(anssets[a]), 2)))][a]
        for b in [z for z in anssets if z not in checked]:
            overlap = anssets[a].symmetric_difference(anssets[b])
            overlap = [ip for ip in overlap if ipp.is_public(ip)]
            if len(overlap) > 0:
                dtd['shared'][True][a] = True
                dtd['shared'][True][b] = True
        if a not in dtd['shared'][True]:
            dtd['shared'][False][a] = True
        mpms = list()
        for ip in anssets[a]:
            matches = [ipp.prefix_match(ip, z) for z in anssets[a] if \
                                z != ip]
            mpms.append(max(matches))
        dtd['pilpm'][int(round(log(np.mean(mpms), 2)))][a] = True

    for pid in svld:
        svl = svld[pid]
        for sv in svl:
            for dom in sv:
                dtd['ansm'][len(sv.vec[dom])][dom].append(sv)
                ipstrs = [ipp.int2ip(z) for z in sv.vec[dom]]
                cidrs = [dt.get_cidr(ip) for ip in ipstrs if ipp.is_public(ip)]
                masks = [int(cidr.split("/")[1]) for cidr in cidrs if cidr is \
                            not None]
                for mask in masks:
                    dtd['prefix'][mask][dom].append(sv)
                pms = list()
                for ip in sv.vec[dom]:
                    matches = [ipp.prefix_match(ip, z) for z in anssets[dom] if \
                                z != ip]
                    pms += matches
                dtd['lpm'][max(pms)][dom].append(sv)
                dtd['spm'][min(pms)][dom].append(sv)

    return dtd
Exemple #6
0
def get_svl(start_time,
            duration=30000,
            mask=32,
            fmt=None,
            country_set=None,
            oddballs=True,
            maxmissing=0,
            return_ccache=True,
            ccachef=df.rightdir(statedir + "pickles/") + "ccache.pickle",
            mindomsize=2):
    '''
    :param t: int indicating the earliest query the window should include
    :param duration: int indication the span of time covered by the window,
        in seconds
    :param mask: int, prefix mask to use over domain IPs
    :param fmt: see transform fmt
    :param country_set: the set of countries the window should include queries from.
        If None, then all countries will be inluded
    :param oddballs: if True, will include non-public IPs (10.x.x.x, etc); if
        False, will only include public IPs
    :return: list of smartvecs
    '''
    # if the duration is too long, mongo times out the cursor before we can
    # finish processing, so we should break it into smaller jobs with
    # get_big_svl()
    if duration > 24 * 60 * 60:
        logger.warning(
            "using get_big_svl(), which assumes that 'duration' is a multple of 24 hours"
        )
        return get_big_svl(start_time, duration, mask, fmt, country_set,
                           oddballs, maxmissing, return_ccache, ccachef,
                           mindomsize)
    logger.info("->window...")
    window = get_window(start_time,
                        duration,
                        country_set=country_set,
                        domain_set=fmt)

    logger.info("->svl...")
    svl, doms, anssets = dicts_to_svl(window, mask, oddballs)

    logger.debug(str(doms))

    fmt = transform_fmt(fmt, doms)
    # remove any domains that only have 1 IP (since all nodes will see the
    # same thing)
    for dom in fmt:
        if len(anssets[dom]) < mindomsize \
          or len([sv for sv in svl if dom not in sv]) > 0.5*len(svl):
            del anssets[dom]
    fmt = sorted(list(set(anssets.keys()).intersection(set(fmt))))
    svl = reduce_svl(svl, fmt, maxmissing)

    for dom in fmt:
        logger.debug("-----------" + dom + "-----------")
        tmp = sorted(anssets[dom])
        for val in tmp:
            if type(val) is int:
                logger.debug(ipp.int2ip(val))

    if return_ccache:
        ccache = init_ccache(None, ccachef, start_time, duration, mask, fmt,
                             oddballs, maxmissing)
        return svl, fmt, dict(anssets), ccache
    else:
        return svl, fmt, dict(anssets)
Exemple #7
0
 def get_subnet(self, mask=24):
     fmtmask = ipp.make_v4_prefix_mask(mask)
     return ipp.int2ip(self.get_ip() & fmtmask)
Exemple #8
0
 def get_answers(self, dom):
     if dom in self.vec:
         return [ipp.int2ip(ip) for ip in self.vec[dom]]
     return None
Exemple #9
0
 def get_ip_str(self):
     return ipp.int2ip(self.get_ip())
Exemple #10
0
 def __str__(self):
     s = ""
     for dom in self.vec:
         s += dom + ": " + ", ".join(
             [ipp.int2ip(ip) for ip in self.vec[dom]]) + "\n\n"
     return s
Exemple #11
0
 def __repr__(self):
     return "smartvec(ip=" + ipp.int2ip(self.get_ip()) + ")"