def test_mastodon(self): expected = InstanceUserAgent(pattern_name='mastodon', server='Mastodon', version='2.6.5', url='https://example.org/', http_client='http.rb', http_client_version='3.3.0') iua = classify_user_agent( 'http.rb/3.3.0 (Mastodon/2.6.5; +https://example.org/)') self.assertEqual(iua, expected)
def test_pleroma_mediaproxy(self): expected = InstanceUserAgent( pattern_name='pleroma_mediaproxy', server='Pleroma', version='MediaProxy', url='https://example.org', email='*****@*****.**', ) iua = classify_user_agent( 'Pleroma/MediaProxy; https://example.org <*****@*****.**>') self.assertEqual(iua, expected)
def test_postactiv(self): iua = classify_user_agent('postActiv/1.0.3-rc1 (Genesis)') self.assertEqual(iua.server, 'postActiv')
def test_misskey(self): iua = classify_user_agent('Misskey/10.66.2 (https://example.org)') self.assertEqual(iua.server, 'Misskey') self.assertEqual(iua.url, 'https://example.org')
def test_gnu_social_no_url(self): iua = classify_user_agent('GNU social/1.2.1-beta1 (Not decided yet)') self.assertEqual(iua.server, 'GNU social') self.assertIsNone(iua.url)
def test_gnu_social_with_url(self): iua = classify_user_agent( 'GNU social/1.2.1-beta1 (https://example.org)') self.assertEqual(iua.server, 'GNU social') self.assertEqual(iua.url, 'https://example.org')
def test_frendica(self): iua = classify_user_agent( "Friendica 'The Tazmans Flax-lily' 2018.12-rc-1291; " "https://example.org") self.assertEqual(iua.server, 'Friendica') self.assertEqual(iua.url, 'https://example.org')
def test_pleroma_probably(self): iua = classify_user_agent('hackney/1.13.0') self.assertEqual(iua.server, 'Pleroma') self.assertEqual(iua.pattern_name, 'pleroma_probably')
def test_mastodon_probably(self): iua = classify_user_agent('http.rb/3.3.0') self.assertEqual(iua.server, 'Mastodon') self.assertEqual(iua.pattern_name, 'mastodon_probably')
def main(args: List[str]) -> None: logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) all_evidence = [] log_records_all_files: Iterable[LogRecord] = \ itertools.chain.from_iterable(parse_log_file(path) for path in args[1:]) # noinspection PyTypeHints incoming_ips: DefaultDict[bytes, DefaultDict[InstanceUserAgent, TimeWindowAcc]] = \ DefaultDict(lambda: DefaultDict(TimeWindowAcc)) for log_record in log_records_all_files: if log_record.user_agent is None: continue instance_user_agent = classify_user_agent(log_record.user_agent) if instance_user_agent is None: continue incoming_ips[log_record.ip][instance_user_agent].add( log_record.timestamp) possible_instance_ips: Set[bytes] = set(incoming_ips.keys()) possible_instance_hostnames: Set[str] = set() possible_instance_hostnames_and_ports: Set[Tuple[str, int]] = set() for ip in incoming_ips.keys(): for instance_user_agent in incoming_ips[ip].keys(): time_window = incoming_ips[ip][instance_user_agent] if instance_user_agent.url is not None: hostname_and_port = extract_hostname_and_port( instance_user_agent.url) if hostname_and_port is not None: hostname, port = hostname_and_port possible_instance_hostnames.add(hostname) possible_instance_hostnames_and_ports.add( hostname_and_port) all_evidence.append( UserAgentEvidence( ip=ip, hostname=hostname, domain=get_domain(hostname), port=port, instance_user_agent=instance_user_agent, time_window=time_window, )) for ip in possible_instance_ips: ip_str = fmt_ip(ip) try: time = datetime.now(timezone.utc) hostname, aliases, addresses = socket.gethostbyaddr(ip_str) aliases = [ alias for alias in aliases if not alias.endswith('.in-addr.arpa') and not alias.endswith('.ip6.arpa') ] if addresses != [ip_str]: # TODO: when would this happen? logger.warning( '%(ip_str)s resolved to multiple IPs: %(addresses)r', { 'ip_str': ip_str, 'addresses': addresses }) for alias in [hostname] + aliases: all_evidence.append( ReverseDNSEvidence( ip=ip, hostname=alias, domain=get_domain(alias), time=time, )) except OSError: logger.warning("Exception on reverse DNS lookup for %(ip_str)s!", {'ip_str': ip_str}, exc_info=True) for hostname in possible_instance_hostnames: try: time = datetime.now(timezone.utc) # noinspection PyArgumentList for af, _, _, _, sockaddr in socket.getaddrinfo( hostname, None, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=socket.IPPROTO_IP): ip_str = sockaddr[0] ip = socket.inet_pton(af, ip_str) all_evidence.append( ForwardDNSEvidence( ip=ip, hostname=hostname, domain=get_domain(hostname), time=time, )) except OSError: logger.warning("Exception on forward DNS lookup for %(hostname)s!", {'hostname': hostname}, exc_info=True) for hostname, port in possible_instance_hostnames_and_ports: logger.info("%s:%d", hostname, port) # DEBUG time = datetime.now(timezone.utc) instance_user_agent = get_instance_info(hostname, port) if instance_user_agent is not None: all_evidence.append( TLSCertCheckEvidence( hostname=hostname, domain=get_domain(hostname), port=port, time=time, )) if instance_user_agent.server != UNKNOWN_SERVER_TYPE \ and instance_user_agent.url is not None: reported_hostname_and_port = extract_hostname_and_port( instance_user_agent.url) if reported_hostname_and_port is not None: reported_hostname, reported_port = reported_hostname_and_port if hostname == reported_hostname and port == reported_port: all_evidence.append( InstanceAPIEvidence( hostname=hostname, domain=get_domain(hostname), port=port, instance_user_agent=instance_user_agent, time=time, )) # TODO: Ignores ports: I've not seen a non-443 instance yet. # Map of hostname to instance info accumulator. # noinspection PyTypeHints instances: DefaultDict[str, InstanceInfoAcc] = DefaultDict(InstanceInfoAcc) for evidence in all_evidence: instances[evidence.domain].add(evidence) frozen: OrderedDict[str, InstanceInfoFrozen] = OrderedDict() for instance in sorted(instances.keys()): frozen[instance] = instances[instance].freeze() # Dump output as YAML. yaml = YAML() yaml.indent(mapping=2, sequence=2, offset=1) yaml.dump(CommentedMap(frozen), sys.stdout) # Hack: prevents !!omap annotation in YAML output