Example #1
0
 def test_mastodon(self):
     expected = InstanceUserAgent(pattern_name='mastodon',
                                  server='Mastodon',
                                  version='2.6.5',
                                  url='https://example.org/',
                                  http_client='http.rb',
                                  http_client_version='3.3.0')
     iua = classify_user_agent(
         'http.rb/3.3.0 (Mastodon/2.6.5; +https://example.org/)')
     self.assertEqual(iua, expected)
Example #2
0
 def test_pleroma_mediaproxy(self):
     expected = InstanceUserAgent(
         pattern_name='pleroma_mediaproxy',
         server='Pleroma',
         version='MediaProxy',
         url='https://example.org',
         email='*****@*****.**',
     )
     iua = classify_user_agent(
         'Pleroma/MediaProxy; https://example.org <*****@*****.**>')
     self.assertEqual(iua, expected)
Example #3
0
 def test_postactiv(self):
     iua = classify_user_agent('postActiv/1.0.3-rc1 (Genesis)')
     self.assertEqual(iua.server, 'postActiv')
Example #4
0
 def test_misskey(self):
     iua = classify_user_agent('Misskey/10.66.2 (https://example.org)')
     self.assertEqual(iua.server, 'Misskey')
     self.assertEqual(iua.url, 'https://example.org')
Example #5
0
 def test_gnu_social_no_url(self):
     iua = classify_user_agent('GNU social/1.2.1-beta1 (Not decided yet)')
     self.assertEqual(iua.server, 'GNU social')
     self.assertIsNone(iua.url)
Example #6
0
 def test_gnu_social_with_url(self):
     iua = classify_user_agent(
         'GNU social/1.2.1-beta1 (https://example.org)')
     self.assertEqual(iua.server, 'GNU social')
     self.assertEqual(iua.url, 'https://example.org')
Example #7
0
 def test_frendica(self):
     iua = classify_user_agent(
         "Friendica 'The Tazmans Flax-lily' 2018.12-rc-1291; "
         "https://example.org")
     self.assertEqual(iua.server, 'Friendica')
     self.assertEqual(iua.url, 'https://example.org')
Example #8
0
 def test_pleroma_probably(self):
     iua = classify_user_agent('hackney/1.13.0')
     self.assertEqual(iua.server, 'Pleroma')
     self.assertEqual(iua.pattern_name, 'pleroma_probably')
Example #9
0
 def test_mastodon_probably(self):
     iua = classify_user_agent('http.rb/3.3.0')
     self.assertEqual(iua.server, 'Mastodon')
     self.assertEqual(iua.pattern_name, 'mastodon_probably')
Example #10
0
def main(args: List[str]) -> None:
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    all_evidence = []

    log_records_all_files: Iterable[LogRecord] = \
        itertools.chain.from_iterable(parse_log_file(path) for path in args[1:])
    # noinspection PyTypeHints
    incoming_ips: DefaultDict[bytes, DefaultDict[InstanceUserAgent, TimeWindowAcc]] = \
        DefaultDict(lambda: DefaultDict(TimeWindowAcc))

    for log_record in log_records_all_files:
        if log_record.user_agent is None:
            continue
        instance_user_agent = classify_user_agent(log_record.user_agent)
        if instance_user_agent is None:
            continue
        incoming_ips[log_record.ip][instance_user_agent].add(
            log_record.timestamp)

    possible_instance_ips: Set[bytes] = set(incoming_ips.keys())
    possible_instance_hostnames: Set[str] = set()
    possible_instance_hostnames_and_ports: Set[Tuple[str, int]] = set()

    for ip in incoming_ips.keys():
        for instance_user_agent in incoming_ips[ip].keys():
            time_window = incoming_ips[ip][instance_user_agent]

            if instance_user_agent.url is not None:
                hostname_and_port = extract_hostname_and_port(
                    instance_user_agent.url)
                if hostname_and_port is not None:
                    hostname, port = hostname_and_port

                    possible_instance_hostnames.add(hostname)
                    possible_instance_hostnames_and_ports.add(
                        hostname_and_port)

                    all_evidence.append(
                        UserAgentEvidence(
                            ip=ip,
                            hostname=hostname,
                            domain=get_domain(hostname),
                            port=port,
                            instance_user_agent=instance_user_agent,
                            time_window=time_window,
                        ))

    for ip in possible_instance_ips:
        ip_str = fmt_ip(ip)
        try:
            time = datetime.now(timezone.utc)
            hostname, aliases, addresses = socket.gethostbyaddr(ip_str)
            aliases = [
                alias for alias in aliases
                if not alias.endswith('.in-addr.arpa')
                and not alias.endswith('.ip6.arpa')
            ]
            if addresses != [ip_str]:
                # TODO: when would this happen?
                logger.warning(
                    '%(ip_str)s resolved to multiple IPs: %(addresses)r', {
                        'ip_str': ip_str,
                        'addresses': addresses
                    })

            for alias in [hostname] + aliases:
                all_evidence.append(
                    ReverseDNSEvidence(
                        ip=ip,
                        hostname=alias,
                        domain=get_domain(alias),
                        time=time,
                    ))
        except OSError:
            logger.warning("Exception on reverse DNS lookup for %(ip_str)s!",
                           {'ip_str': ip_str},
                           exc_info=True)

    for hostname in possible_instance_hostnames:
        try:
            time = datetime.now(timezone.utc)
            # noinspection PyArgumentList
            for af, _, _, _, sockaddr in socket.getaddrinfo(
                    hostname,
                    None,
                    family=socket.AF_INET,
                    type=socket.SOCK_STREAM,
                    proto=socket.IPPROTO_IP):
                ip_str = sockaddr[0]
                ip = socket.inet_pton(af, ip_str)
                all_evidence.append(
                    ForwardDNSEvidence(
                        ip=ip,
                        hostname=hostname,
                        domain=get_domain(hostname),
                        time=time,
                    ))
        except OSError:
            logger.warning("Exception on forward DNS lookup for %(hostname)s!",
                           {'hostname': hostname},
                           exc_info=True)

    for hostname, port in possible_instance_hostnames_and_ports:
        logger.info("%s:%d", hostname, port)  # DEBUG
        time = datetime.now(timezone.utc)
        instance_user_agent = get_instance_info(hostname, port)

        if instance_user_agent is not None:
            all_evidence.append(
                TLSCertCheckEvidence(
                    hostname=hostname,
                    domain=get_domain(hostname),
                    port=port,
                    time=time,
                ))

            if instance_user_agent.server != UNKNOWN_SERVER_TYPE \
                    and instance_user_agent.url is not None:
                reported_hostname_and_port = extract_hostname_and_port(
                    instance_user_agent.url)
                if reported_hostname_and_port is not None:
                    reported_hostname, reported_port = reported_hostname_and_port
                    if hostname == reported_hostname and port == reported_port:
                        all_evidence.append(
                            InstanceAPIEvidence(
                                hostname=hostname,
                                domain=get_domain(hostname),
                                port=port,
                                instance_user_agent=instance_user_agent,
                                time=time,
                            ))

    # TODO: Ignores ports: I've not seen a non-443 instance yet.

    # Map of hostname to instance info accumulator.
    # noinspection PyTypeHints
    instances: DefaultDict[str, InstanceInfoAcc] = DefaultDict(InstanceInfoAcc)
    for evidence in all_evidence:
        instances[evidence.domain].add(evidence)

    frozen: OrderedDict[str, InstanceInfoFrozen] = OrderedDict()
    for instance in sorted(instances.keys()):
        frozen[instance] = instances[instance].freeze()

    # Dump output as YAML.
    yaml = YAML()
    yaml.indent(mapping=2, sequence=2, offset=1)
    yaml.dump(CommentedMap(frozen),
              sys.stdout)  # Hack: prevents !!omap annotation in YAML output