예제 #1
0
def whois(server, port, domain, ignorelist, replacelist):
    cached = _cache_get(domain, 'whois')
    if cached is not None:
        return cached
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(15)
    try:
        s.connect((server, port))
    except Exception:
        print("Connection problems - check WHOIS server")
        print(("WHOIS request while problem occurred: ", domain))
        print(("WHOIS server: {}:{}".format(server, port)))
        sys.exit(0)
    if domain.startswith('http'):
        fex = Faup()
        fex.decode(domain)
        d = fex.get_domain().lower()
    else:
        d = domain
    s.send(d + "\r\n")
    response = ''
    while True:
        d = s.recv(4096)
        response += d
        if d == '':
            break
    s.close()
    match = re.findall(r'[\w\.-]+@[\w\.-]+', response)
    emails = process_emails(match, ignorelist, replacelist)
    if len(emails) == 0:
        return None
    list_mail = list(set(emails))
    _cache_set(domain, list_mail, 'whois')
    return list_mail
예제 #2
0
def whois(server, port, domain, ignorelist, replacelist):
    cached = _cache_get(domain, 'whois')
    if cached is not None:
        return cached
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(15)
    try:
        s.connect((server, port))
    except Exception:
        print("Connection problems - check WHOIS server")
        print(("WHOIS request while problem occurred: ", domain))
        print(("WHOIS server: {}:{}".format(server, port)))
        sys.exit(0)
    if domain.startswith('http'):
        fex = Faup()
        fex.decode(domain)
        d = fex.get_domain().lower()
    else:
        d = domain
    s.send(d + "\r\n")
    response = ''
    while True:
        d = s.recv(4096)
        response += d
        if d == '':
            break
    s.close()
    match = re.findall(r'[\w\.-]+@[\w\.-]+', response)
    emails = process_emails(match, ignorelist, replacelist)
    if len(emails) == 0:
        return None
    list_mail = list(set(emails))
    _cache_set(domain, list_mail, 'whois')
    return list_mail
예제 #3
0
def getmisp_urls(key, url, timeframe):
    response_domains = []
    headers = {
        'Authorization': '{}'.format(key),
        'Content-type': 'application/json',
        'Accept': 'application/json'
    }
    payload = '{ "returnFormat": "json", "type": "url", "last": "%s", "enforceWarninglist": true }' % timeframe
    response = requests.post(url, headers=headers, data=payload, verify=False)
    json_response = json.loads(response.text)
    fp = Faup()
    try:
        for attr in json_response['response']['Attribute']:
            url = attr['value']
            eventid = attr['event_id']
            if eventid not in ignore_eventid:
                category = attr['category']
                timestamp = datetime.datetime.utcfromtimestamp(
                    int(attr['timestamp'])).strftime('%Y-%m-%d')
                fp.decode(url)
                domain = fp.get_domain()
                if re.match(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", domain):
                    response_domains.append({
                        'domain': domain,
                        'eventid': eventid,
                        'category': category,
                        'timestamp': timestamp
                    })

        return response_domains
    except:
        return response_domains
예제 #4
0
 def process(self):
     list_domains=self.db['new_domaines'].distinct('domaine')
     fex=Faup()
     for domain in list_domains:
         url='http://'+str(domain)
         fex.decode(url, False)
         print (fex.get_tld()+','+fex.get_domain()+','+','.join(fex.get_subdomain().split('.')[::-1]).replace('www','')).replace(',,',',')
예제 #5
0
 def __post_init__(self):
     if self.domain is None:
         f = Faup(
         )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
         f.decode(self.address.split("@")[-1])
         self.top_level_domain = f.get_tld()
         self.domain = f.get_domain()
         self.subdomain = f.get_subdomain()
예제 #6
0
 def process(self):
     list_domains = self.db['new_domaines'].distinct('domaine')
     fex = Faup()
     for domain in list_domains:
         url = 'http://' + str(domain)
         fex.decode(url, False)
         print(fex.get_tld() + ',' + fex.get_domain() + ',' +
               ','.join(fex.get_subdomain().split('.')[::-1]).replace(
                   'www', '')).replace(',,', ',')
예제 #7
0
    def __post_init__(self):
        f = Faup(
        )  # Example code at https://programtalk.com/python-examples-amp/pyfaup.faup.Faup/
        f.decode(self.url)

        self.scheme = f.get_scheme()
        self.top_level_domain = f.get_tld()
        self.domain = f.get_domain()
        self.subdomain = f.get_subdomain()
        self.path = f.get_resource_path()
예제 #8
0
    misp.add_hashes(new_event, md5=h)
for h in hashlist_sha1:
    misp.add_hashes(new_event, sha1=h)
for h in hashlist_sha256:
    misp.add_hashes(new_event, sha256=h)

if (len(hashlist_md5) > 0) or (len(hashlist_sha1) > 0) or (len(hashlist_sha256)
                                                           > 0):
    for tag in hash_only_tags:
        misp.add_tag(new_event, tag)

# Add IOCs and expanded information to MISP
for entry in urllist:
    ids_flag = True
    f.decode(entry)
    domainname = f.get_domain().decode('utf-8', 'ignore')
    hostname = f.get_host().decode('utf-8', 'ignore')
    try:
        schema = f.get_scheme().decode('utf-8', 'ignore')
    except:
        schema = False
    if debug:
        syslog.syslog(domainname)
    if domainname not in excludelist:
        if domainname in internallist:
            misp.add_named_attribute(new_event,
                                     'link',
                                     entry,
                                     category='Internal reference',
                                     to_ids=False,
                                     distribution=0)
예제 #9
0
hashlist_md5 = re.findall(hashmarker.MD5_REGEX, email_data)
hashlist_sha1 = re.findall(hashmarker.SHA1_REGEX, email_data)
hashlist_sha256 = re.findall(hashmarker.SHA256_REGEX, email_data)

for h in hashlist_md5:
    misp.add_hashes(new_event, md5=h)
for h in hashlist_sha1:
    misp.add_hashes(new_event, sha1=h)
for h in hashlist_sha256:
    misp.add_hashes(new_event, sha256=h)

# Add IOCs and expanded information to MISP
for entry in urllist:
    ids_flag = True
    f.decode(entry)
    domainname = f.get_domain().lower()
    hostname = f.get_host().lower()
    if debug:
        target.write(domainname + "\n")
    if domainname not in excludelist:
        if domainname in externallist:
            misp.add_named_attribute(new_event,
                                     'link',
                                     entry,
                                     category='External analysis',
                                     to_ids=False)
        else:
            if (domainname in noidsflaglist) or (hostname in noidsflaglist):
                ids_flag = False
            if debug:
                target.write(entry + "\n")
예제 #10
0
        if message is not None:
            filename, score = message.split()

            if prec_filename is None or filename != prec_filename:
                domains_list = []
                PST = Paste.Paste(filename)
                client = ip2asn()
                for x in PST.get_regex(url_regex):
                    matching_url = re.search(url_regex, PST.get_p_content())
                    url = matching_url.group(0)

                    to_send = "{} {} {}".format(url, PST._get_p_date(), filename)
                    p.populate_set_out(to_send, 'Url')

                    faup.decode(url)
                    domain = faup.get_domain()
                    subdomain = faup.get_subdomain()
                    f1 = None

                    domains_list.append(domain)

                    publisher.debug('{} Published'.format(url))

                    if f1 == "onion":
                        print domain

                    hostl = unicode(avoidNone(subdomain)+avoidNone(domain))
                    try:
                        socket.setdefaulttimeout(1)
                        ip = socket.gethostbyname(unicode(hostl))
                    except:
예제 #11
0
class Mail2MISP():
    def __init__(self,
                 misp_url,
                 misp_key,
                 verifycert,
                 config,
                 offline=False,
                 urlsonly=False):
        self.offline = offline
        if not self.offline:
            self.misp = ExpandedPyMISP(misp_url,
                                       misp_key,
                                       verifycert,
                                       debug=config.debug)
        self.config = config
        self.urlsonly = urlsonly
        if not hasattr(self.config, 'enable_dns'):
            setattr(self.config, 'enable_dns', True)
        if self.urlsonly is False:
            setattr(self.config, 'enable_dns', False)
        self.debug = self.config.debug
        self.config_from_email_body = {}
        # Init Faup
        self.f = Faup()
        self.sightings_to_add = []

    def load_email(self, pseudofile):
        self.pseudofile = pseudofile
        self.original_mail = message_from_bytes(self.pseudofile.getvalue(),
                                                policy=policy.default)
        self.subject = self.original_mail.get('Subject')
        try:
            self.sender = self.original_mail.get('From')
        except:
            self.sender = "<unknown sender>"

        # Remove words from subject
        for removeword in self.config.removelist:
            self.subject = re.sub(removeword, "", self.subject).strip()

        # Initialize the MISP event
        self.misp_event = MISPEvent()
        self.misp_event.info = f'{self.config.email_subject_prefix} - {self.subject}'
        self.misp_event.distribution = self.config.default_distribution
        self.misp_event.threat_level_id = self.config.default_threat_level
        self.misp_event.analysis = self.config.default_analysis

    def sighting(self, value, source):
        if self.offline:
            raise Exception('The script is running in offline mode, ')
        '''Add a sighting'''
        s = MISPSighting()
        s.from_dict(value=value, source=source)
        self.misp.add_sighting(s)

    def _find_inline_forward(self):
        '''Does the body contains a forwarded email?'''
        for identifier in self.config.forward_identifiers:
            if identifier in self.clean_email_body:
                self.clean_email_body, fw_email = self.clean_email_body.split(
                    identifier)
                return self.forwarded_email(
                    pseudofile=BytesIO(fw_email.encode()))

    def _find_attached_forward(self):
        forwarded_emails = []
        for attachment in self.original_mail.iter_attachments():
            attachment_content = attachment.get_content()
            # Search for email forwarded as attachment
            # I could have more than one, attaching everything.
            if isinstance(attachment_content, message.EmailMessage):
                forwarded_emails.append(
                    self.forwarded_email(
                        pseudofile=BytesIO(attachment_content.as_bytes())))
            else:
                if isinstance(attachment_content, str):
                    attachment_content = attachment_content.encode()
                filename = attachment.get_filename()
                if not filename:
                    filename = 'missing_filename'
                if self.config_from_email_body.get(
                        'attachment'
                ) == self.config.m2m_benign_attachment_keyword:
                    # Attach sane file
                    self.misp_event.add_attribute(
                        'attachment',
                        value=filename,
                        data=BytesIO(attachment_content))
                else:
                    f_object, main_object, sections = make_binary_objects(
                        pseudofile=BytesIO(attachment_content),
                        filename=filename,
                        standalone=False)
                    self.misp_event.add_object(f_object)
                    if main_object:
                        self.misp_event.add_object(main_object)
                        [
                            self.misp_event.add_object(section)
                            for section in sections
                        ]
        return forwarded_emails

    def email_from_spamtrap(self):
        '''The email comes from a spamtrap and should be attached as-is.'''
        raw_body = self.original_mail.get_body(preferencelist=('html',
                                                               'plain'))
        if raw_body:
            self.clean_email_body = html.unescape(
                raw_body.get_payload(decode=True).decode(
                    'utf8', 'surrogateescape'))
        else:
            self.clean_email_body = ''
        return self.forwarded_email(self.pseudofile)

    def forwarded_email(self, pseudofile: BytesIO):
        '''Extracts all possible indicators out of an email and create a MISP event out of it.
        * Gets all relevant Headers
        * Attach the body
        * Create MISP file objects (uses lief if possible)
        * Set all references
        '''
        email_object = EMailObject(pseudofile=pseudofile,
                                   attach_original_mail=True,
                                   standalone=False)
        if email_object.attachments:
            # Create file objects for the attachments
            for attachment_name, attachment in email_object.attachments:
                if not attachment_name:
                    attachment_name = 'NameMissing.txt'
                if self.config_from_email_body.get(
                        'attachment'
                ) == self.config.m2m_benign_attachment_keyword:
                    a = self.misp_event.add_attribute('attachment',
                                                      value=attachment_name,
                                                      data=attachment)
                    email_object.add_reference(a.uuid, 'related-to',
                                               'Email attachment')
                else:
                    f_object, main_object, sections = make_binary_objects(
                        pseudofile=attachment,
                        filename=attachment_name,
                        standalone=False)
                    if self.config.vt_key:
                        try:
                            vt_object = VTReportObject(
                                self.config.vt_key,
                                f_object.get_attributes_by_relation(
                                    'sha256')[0].value,
                                standalone=False)
                            self.misp_event.add_object(vt_object)
                            f_object.add_reference(vt_object.uuid,
                                                   'analysed-with')
                        except InvalidMISPObject as e:
                            print(e)
                            pass
                    self.misp_event.add_object(f_object)
                    if main_object:
                        self.misp_event.add_object(main_object)
                        for section in sections:
                            self.misp_event.add_object(section)
                    email_object.add_reference(f_object.uuid, 'related-to',
                                               'Email attachment')
        self.process_body_iocs(email_object)
        if self.config.spamtrap or self.config.attach_original_mail or self.config_from_email_body.get(
                'attach_original_mail'):
            self.misp_event.add_object(email_object)
        return email_object

    def process_email_body(self):
        mail_as_bytes = self.original_mail.get_body(
            preferencelist=('html', 'plain')).get_payload(decode=True)
        if mail_as_bytes:
            self.clean_email_body = html.unescape(
                mail_as_bytes.decode('utf8', 'surrogateescape'))
            # Check if there are config lines in the body & convert them to a python dictionary:
            #   <config.body_config_prefix>:<key>:<value> => {<key>: <value>}
            self.config_from_email_body = {
                k.strip(): v.strip()
                for k, v in re.findall(
                    f'{self.config.body_config_prefix}:(.*):(.*)',
                    self.clean_email_body)
            }
            if self.config_from_email_body:
                # ... remove the config lines from the body
                self.clean_email_body = re.sub(
                    rf'^{self.config.body_config_prefix}.*\n?',
                    '',
                    html.unescape(
                        self.original_mail.get_body(
                            preferencelist=('html', 'plain')).get_payload(
                                decode=True).decode('utf8',
                                                    'surrogateescape')),
                    flags=re.MULTILINE)
            # Check if autopublish key is present and valid
            if self.config_from_email_body.get(
                    'm2mkey') == self.config.m2m_key:
                if self.config_from_email_body.get('distribution') is not None:
                    self.misp_event.distribution = self.config_from_email_body.get(
                        'distribution')
                if self.config_from_email_body.get('threat_level') is not None:
                    self.misp_event.threat_level_id = self.config_from_email_body.get(
                        'threat_level')
                if self.config_from_email_body.get('analysis') is not None:
                    self.misp_event.analysis = self.config_from_email_body.get(
                        'analysis')
                if self.config_from_email_body.get('publish'):
                    self.misp_event.publish()

            self._find_inline_forward()
        else:
            self.clean_email_body = ''
        self._find_attached_forward()

    def process_body_iocs(self, email_object=None):
        if email_object:
            body = html.unescape(
                email_object.email.get_body(
                    preferencelist=('html',
                                    'plain')).get_payload(decode=True).decode(
                                        'utf8', 'surrogateescape'))
        else:
            body = self.clean_email_body

        # Cleanup body content
        # Depending on the source of the mail, there is some cleanup to do. Ignore lines in body of message
        for ignoreline in self.config.ignorelist:
            body = re.sub(rf'^{ignoreline}.*\n?', '', body, flags=re.MULTILINE)

        # Remove everything after the stopword from the body
        body = body.split(self.config.stopword, 1)[0]

        # Add tags to the event if keywords are found in the mail
        for tag in self.config.tlptags:
            for alternativetag in self.config.tlptags[tag]:
                if alternativetag in body.lower():
                    self.misp_event.add_tag(tag)

        # Prepare extraction of IOCs
        # Refang email data
        body = refang(body)

        # Extract and add hashes
        contains_hash = False
        for h in set(re.findall(hashmarker.MD5_REGEX, body)):
            contains_hash = True
            attribute = self.misp_event.add_attribute(
                'md5', h, enforceWarninglist=self.config.enforcewarninglist)
            if email_object:
                email_object.add_reference(attribute.uuid, 'contains')
            if self.config.sighting:
                self.sightings_to_add.append((h, self.config.sighting_source))
        for h in set(re.findall(hashmarker.SHA1_REGEX, body)):
            contains_hash = True
            attribute = self.misp_event.add_attribute(
                'sha1', h, enforceWarninglist=self.config.enforcewarninglist)
            if email_object:
                email_object.add_reference(attribute.uuid, 'contains')
            if self.config.sighting:
                self.sightings_to_add.append((h, self.config.sighting_source))
        for h in set(re.findall(hashmarker.SHA256_REGEX, body)):
            contains_hash = True
            attribute = self.misp_event.add_attribute(
                'sha256', h, enforceWarninglist=self.config.enforcewarninglist)
            if email_object:
                email_object.add_reference(attribute.uuid, 'contains')
            if self.config.sighting:
                self.sightings_to_add.append((h, self.config.sighting_source))

        if contains_hash:
            [
                self.misp_event.add_tag(tag)
                for tag in self.config.hash_only_tags
            ]

        # # Extract network IOCs
        urllist = []
        urllist += re.findall(urlmarker.WEB_URL_REGEX, body)
        urllist += re.findall(urlmarker.IP_REGEX, body)
        if self.debug:
            syslog.syslog(str(urllist))

        hostname_processed = []

        # Add IOCs and expanded information to MISP
        for entry in set(urllist):
            ids_flag = True
            self.f.decode(entry)

            domainname = self.f.get_domain()
            if domainname in self.config.excludelist:
                # Ignore the entry
                continue

            hostname = self.f.get_host()

            scheme = self.f.get_scheme()
            if scheme:
                scheme = scheme

            resource_path = self.f.get_resource_path()
            if resource_path:
                resource_path = resource_path

            if self.debug:
                syslog.syslog(domainname)

            if domainname in self.config.internallist and self.urlsonly is False:  # Add link to internal reference unless in urlsonly mode
                attribute = self.misp_event.add_attribute(
                    'link',
                    entry,
                    category='Internal reference',
                    to_ids=False,
                    enforceWarninglist=False)
                if email_object:
                    email_object.add_reference(attribute.uuid, 'contains')
            elif domainname in self.config.externallist or self.urlsonly is False:  # External analysis
                attribute = self.misp_event.add_attribute(
                    'link',
                    entry,
                    category='External analysis',
                    to_ids=False,
                    enforceWarninglist=False)
                if email_object:
                    email_object.add_reference(attribute.uuid, 'contains')
            elif domainname in self.config.externallist or self.urlsonly:  # External analysis
                if self.urlsonly:
                    comment = self.subject + " (from: " + self.sender + ")"
                else:
                    comment = ""
                attribute = self.misp.add_attribute(
                    self.urlsonly, {
                        "type": 'link',
                        "value": entry,
                        "category": 'External analysis',
                        "to_ids": False,
                        "comment": comment
                    })
                for tag in self.config.tlptags:
                    for alternativetag in self.config.tlptags[tag]:
                        if alternativetag in self.subject.lower():
                            self.misp.tag(attribute["uuid"], tag)
                            new_subject = comment.replace(alternativetag, '')
                            self.misp.change_comment(attribute["uuid"],
                                                     new_subject)

            else:  # The URL is probably an indicator.
                comment = ""
                if (domainname in self.config.noidsflaglist) or (
                        hostname in self.config.noidsflaglist):
                    ids_flag = False
                    comment = "Known host (mostly for connectivity test or IP lookup)"
                if self.debug:
                    syslog.syslog(str(entry))

                if scheme:
                    if is_ip(hostname):
                        attribute = self.misp_event.add_attribute(
                            'url',
                            entry,
                            to_ids=False,
                            enforceWarninglist=self.config.enforcewarninglist)
                        if email_object:
                            email_object.add_reference(attribute.uuid,
                                                       'contains')
                    else:
                        if resource_path:  # URL has path, ignore warning list
                            attribute = self.misp_event.add_attribute(
                                'url',
                                entry,
                                to_ids=ids_flag,
                                enforceWarninglist=False,
                                comment=comment)
                            if email_object:
                                email_object.add_reference(
                                    attribute.uuid, 'contains')
                        else:  # URL has no path
                            attribute = self.misp_event.add_attribute(
                                'url',
                                entry,
                                to_ids=ids_flag,
                                enforceWarninglist=self.config.
                                enforcewarninglist,
                                comment=comment)
                            if email_object:
                                email_object.add_reference(
                                    attribute.uuid, 'contains')
                    if self.config.sighting:
                        self.sightings_to_add.append(
                            (entry, self.config.sighting_source))

                if hostname in hostname_processed:
                    # Hostname already processed.
                    continue

                hostname_processed.append(hostname)
                if self.config.sighting:
                    self.sightings_to_add.append(
                        (hostname, self.config.sighting_source))

                if self.debug:
                    syslog.syslog(hostname)

                comment = ''
                port = self.f.get_port()
                if port:
                    port = port
                    comment = f'on port: {port}'

                if is_ip(hostname):
                    attribute = self.misp_event.add_attribute(
                        'ip-dst',
                        hostname,
                        to_ids=ids_flag,
                        enforceWarninglist=self.config.enforcewarninglist,
                        comment=comment)
                    if email_object:
                        email_object.add_reference(attribute.uuid, 'contains')
                else:
                    related_ips = []
                    if HAS_DNS and self.config.enable_dns:
                        try:
                            syslog.syslog(hostname)
                            for rdata in dns.resolver.query(hostname, 'A'):
                                if self.debug:
                                    syslog.syslog(str(rdata))
                                related_ips.append(rdata.to_text())
                        except Exception as e:
                            if self.debug:
                                syslog.syslog(str(e))

                    if related_ips:
                        hip = MISPObject(name='ip-port')
                        hip.add_attribute(
                            'hostname',
                            value=hostname,
                            to_ids=ids_flag,
                            enforceWarninglist=self.config.enforcewarninglist,
                            comment=comment)
                        for ip in set(related_ips):
                            hip.add_attribute('ip',
                                              type='ip-dst',
                                              value=ip,
                                              to_ids=False,
                                              enforceWarninglist=self.config.
                                              enforcewarninglist)
                        self.misp_event.add_object(hip)
                        if email_object:
                            email_object.add_reference(hip.uuid, 'contains')
                    else:
                        if self.urlsonly is False:
                            attribute = self.misp_event.add_attribute(
                                'hostname',
                                value=hostname,
                                to_ids=ids_flag,
                                enforceWarninglist=self.config.
                                enforcewarninglist,
                                comment=comment)
                        if email_object:
                            email_object.add_reference(attribute.uuid,
                                                       'contains')

    def add_event(self):
        '''Add event on the remote MISP instance.'''

        # Add additional tags depending on others
        tags = []
        for tag in [t.name for t in self.misp_event.tags]:
            if self.config.dependingtags.get(tag):
                tags += self.config.dependingtags.get(tag)

        # Add additional tags according to configuration
        for malware in self.config.malwaretags:
            if malware.lower() in self.subject.lower():
                tags += self.config.malwaretags.get(malware)
        if tags:
            [self.misp_event.add_tag(tag) for tag in tags]

        has_tlp_tag = False
        for tag in [t.name for t in self.misp_event.tags]:
            if tag.lower().startswith('tlp'):
                has_tlp_tag = True
        if not has_tlp_tag:
            self.misp_event.add_tag(self.config.tlptag_default)

        if self.offline:
            return self.misp_event.to_json()
        event = self.misp.add_event(self.misp_event, pythonify=True)
        if self.config.sighting:
            for value, source in self.sightings_to_add:
                self.sighting(value, source)
        return event
예제 #12
0
class Query():
    def __init__(self, loglevel: int = logging.DEBUG):
        self.__init_logger(loglevel)
        self.fex = Faup()
        self.cache = Redis(unix_socket_path=get_socket_path('cache'),
                           db=1,
                           decode_responses=True)

    def __init_logger(self, loglevel) -> None:
        self.logger = logging.getLogger(f'{self.__class__.__name__}')
        self.logger.setLevel(loglevel)

    def _cache_set(self, key, value, field=None):
        if field is None:
            self.cache.setex(key, json.dumps(value), 3600)
        else:
            self.cache.hset(key, field, json.dumps(value))
            self.cache.expire(key, 3600)

    def _cache_get(self, key, field=None):
        if field is None:
            value_json = self.cache.get(key)
        else:
            value_json = self.cache.hget(key, field)
        if value_json is not None:
            return json.loads(value_json)
        return None

    def to_bool(self, s):
        """
        Converts the given string to a boolean.
        """
        return s.lower() in ('1', 'true', 'yes', 'on')

    def get_submissions(self, url, day=None):
        if day is None:
            day = date.today().isoformat()
        else:
            day = day.isoformat()
        return self.cache.zscore(f'{day}_submissions', url)

    def get_mail_sent(self, url, day=None):
        if day is None:
            day = date.today().isoformat()
        else:
            day = day.isoformat()
        self.fex.decode(url)
        host = self.fex.get_host()
        return self.cache.sismember(f'{day}_mails', host)

    def set_mail_sent(self, url, day=None):
        if day is None:
            day = date.today().isoformat()
        else:
            day = day.isoformat()
        self.fex.decode(url)
        host = self.fex.get_host()
        return self.cache.sadd(f'{day}_mails', host)

    def is_valid_url(self, url):
        cached = self._cache_get(url, 'valid')
        key = f'{date.today().isoformat()}_submissions'
        self.cache.zincrby(key, 1, url)
        if cached is not None:
            return cached
        if url.startswith('hxxp'):
            url = 'http' + url[4:]
        elif not url.startswith('http'):
            url = 'http://' + url
        logging.debug("Checking validity of URL: " + url)
        self.fex.decode(url)
        scheme = self.fex.get_scheme()
        host = self.fex.get_host()
        if scheme is None or host is None:
            reason = "Not a valid http/https URL/URI"
            return False, url, reason
        self._cache_set(url, (True, url, None), 'valid')
        return True, url, None

    def is_ip(self, host):
        try:
            ipaddress.ip_address(host)
            return True
        except ValueError:
            return False

    def try_resolve(self, url):
        self.fex.decode(url)
        host = self.fex.get_host().lower()
        if self.is_ip(host):
            return True, None
        try:
            ipaddr = dns.resolver.query(host, 'A')
        except Exception:
            reason = "DNS server problem. Check resolver settings."
            return False, reason
        if not ipaddr:
            reason = "Host " + host + " does not exist."
            return False, reason
        return True, None

    def get_urls(self, url, depth=1):
        if depth > 5:
            print('Too many redirects.')
            return

        def meta_redirect(content):
            c = content.lower()
            soup = BeautifulSoup(c, "html.parser")
            for result in soup.find_all(attrs={'http-equiv': 'refresh'}):
                if result:
                    out = result["content"].split(";")
                    if len(out) == 2:
                        wait, text = out
                        try:
                            a, url = text.split('=', 1)
                            return url.strip()
                        except Exception:
                            print(text)
            return None

        resolve, reason = self.try_resolve(url)
        if not resolve:
            # FIXME: inform that the domain does not resolve
            yield url
            return

        logging.debug(f"Making HTTP connection to {url}")

        headers = {
            'User-agent':
            'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'
        }
        try:
            response = requests.get(url,
                                    allow_redirects=True,
                                    headers=headers,
                                    timeout=15,
                                    verify=False)
        except Exception:
            # That one can fail (DNS for example)
            # FIXME: inform that the get failed
            yield url
            return
        if response.history is not None:
            for h in response.history:
                # Yeld the urls in the order we find them
                yield h.url

        yield response.url

        meta_redir_url = meta_redirect(response.content)
        if meta_redir_url is not None:
            depth += 1
            if not meta_redir_url.startswith('http'):
                self.fex.decode(url)
                base = '{}://{}'.format(self.fex.get_scheme(),
                                        self.fex.get_host())
                port = self.fex.get_port()
                if port is not None:
                    base += f':{port}'
                if not meta_redir_url.startswith('/'):
                    # relative redirect. resource_path has the initial '/'
                    if self.fex.get_resource_path() is not None:
                        base += self.fex.get_resource_path()
                if not base.endswith('/'):
                    base += '/'
                meta_redir_url = base + meta_redir_url
            for url in self.get_urls(meta_redir_url, depth):
                yield url

    def url_list(self, url):
        cached = self._cache_get(url, 'list')
        if cached is not None:
            return cached
        list_urls = []
        for u in self.get_urls(url):
            if u is None or u in list_urls:
                continue
            list_urls.append(u)
        self._cache_set(url, list_urls, 'list')
        return list_urls

    def dns_resolve(self, url):
        cached = self._cache_get(url, 'dns')
        if cached is not None:
            return cached
        self.fex.decode(url)
        host = self.fex.get_host().lower()
        ipv4 = None
        ipv6 = None
        if self.is_ip(host):
            if ':' in host:
                try:
                    socket.inet_pton(socket.AF_INET6, host)
                    ipv6 = [host]
                except Exception:
                    pass
            else:
                try:
                    socket.inet_aton(host)
                    ipv4 = [host]
                except Exception:
                    pass
        else:
            try:
                ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')]
            except Exception:
                logging.debug("No IPv4 address assigned to: " + host)
            try:
                ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')]
            except Exception:
                logging.debug("No IPv6 address assigned to: " + host)
        self._cache_set(url, (ipv4, ipv6), 'dns')
        return ipv4, ipv6

    def phish_query(self, url, key, query):
        cached = self._cache_get(query, 'phishtank')
        if cached is not None:
            return cached
        postfields = {'url': quote(query), 'format': 'json', 'app_key': key}
        response = requests.post(url, data=postfields)
        res = response.json()
        if res["meta"]["status"] == "success":
            if res["results"]["in_database"]:
                self._cache_set(query, res["results"]["phish_detail_page"],
                                'phishtank')
                return res["results"]["phish_detail_page"]
            else:
                # no information
                pass
        elif res["meta"]["status"] == 'error':
            # Inform the user?
            # errormsg = res["errortext"]
            pass
        return None

    def sphinxsearch(server, port, url, query):
        # WARNING: too dangerous to have on the public interface
        return ''
        """
        if not sphinx:
            return None
        cached = _cache_get(query, 'sphinx')
        if cached is not None:
            return cached
        client = sphinxapi.SphinxClient()
        client.SetServer(server, port)
        client.SetMatchMode(2)
        client.SetConnectTimeout(5.0)
        result = []
        res = client.Query(query)
        if res.get("matches") is not None:
            for ticket in res["matches"]:
                ticket_id = ticket["id"]
                ticket_link = url + str(ticket_id)
                result.append(ticket_link)
        _cache_set(query, result, 'sphinx')
        return result

        """

    def vt_query_url(self, url, url_up, key, query, upload=True):
        cached = self._cache_get(query, 'vt')
        if cached is not None and cached[2] is not None:
            return cached
        parameters = {"resource": query, "apikey": key}
        if upload:
            parameters['scan'] = 1
        response = requests.post(url, data=parameters)
        if response.text is None or len(response.text) == 0:
            return None
        res = response.json()
        msg = res["verbose_msg"]
        link = res.get("permalink")
        positives = res.get("positives")
        total = res.get("total")
        self._cache_set(query, (msg, link, positives, total), 'vt')
        return msg, link, positives, total

    def gsb_query(self, url, query):
        cached = self._cache_get(query, 'gsb')
        if cached is not None:
            return cached
        param = '1\n' + query
        response = requests.post(url, data=param)
        status = response.status_code
        if status == 200:
            self._cache_set(query, response.text, 'gsb')
            return response.text

    '''
    def urlquery_query(url, key, query):
        return None
        cached = _cache_get(query, 'urlquery')
        if cached is not None:
            return cached
        try:
            urlquery.url = url
            urlquery.key = key
            response = urlquery.search(query)
        except Exception:
            return None
        if response['_response_']['status'] == 'ok':
            if response.get('reports') is not None:
                total_alert_count = 0
                for r in response['reports']:
                    total_alert_count += r['urlquery_alert_count']
                    total_alert_count += r['ids_alert_count']
                    total_alert_count += r['blacklist_alert_count']
                    _cache_set(query, total_alert_count, 'urlquery')
                    return total_alert_count
            else:
                return None
    '''

    def process_emails(self, emails, ignorelist, replacelist):
        to_return = list(set(emails))
        for mail in reversed(to_return):
            for ignorelist_entry in ignorelist:
                if re.search(ignorelist_entry, mail, re.I):
                    if mail in to_return:
                        to_return.remove(mail)
            for k, v in list(replacelist.items()):
                if re.search(k, mail, re.I):
                    if k in to_return:
                        to_return.remove(k)
                        to_return += v
        return to_return

    def whois(self, server, port, domain, ignorelist, replacelist):
        cached = self._cache_get(domain, 'whois')
        if cached is not None:
            return cached
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.settimeout(15)
        try:
            s.connect((server, port))
        except Exception:
            print("Connection problems - check WHOIS server")
            print(("WHOIS request while problem occurred: ", domain))
            print(("WHOIS server: {}:{}".format(server, port)))
            return None
        if domain.startswith('http'):
            self.fex.decode(domain)
            d = self.fex.get_domain().lower()
        else:
            d = domain
        s.send(("{}\r\n".format(d)).encode())
        response = b''
        while True:
            d = s.recv(4096)
            response += d
            if d == b'':
                break
        s.close()
        match = re.findall(r'[\w\.-]+@[\w\.-]+', response.decode())
        emails = self.process_emails(match, ignorelist, replacelist)
        if len(emails) == 0:
            return None
        list_mail = list(set(emails))
        self._cache_set(domain, list_mail, 'whois')
        return list_mail

    def pdnscircl(self, url, user, passwd, q):
        cached = self._cache_get(q, 'pdns')
        if cached is not None:
            return cached
        pdns = PyPDNS(url, basic_auth=(user, passwd))
        response = pdns.query(q)
        all_uniq = []
        for e in reversed(response):
            host = e['rrname'].lower()
            if host in all_uniq:
                continue
            else:
                all_uniq.append(host)
        response = (len(all_uniq), all_uniq[:5])
        self._cache_set(q, response, 'pdns')
        return response

    def psslcircl(self, url, user, passwd, q):
        cached = self._cache_get(q, 'pssl')
        if cached is not None:
            return cached
        pssl = PyPSSL(url, basic_auth=(user, passwd))
        response = pssl.query(q)
        if response.get(q) is not None:
            certinfo = response.get(q)
            entries = {}
            for sha1 in certinfo['certificates']:
                entries[sha1] = []
                if certinfo['subjects'].get(sha1):
                    for value in certinfo['subjects'][sha1]['values']:
                        entries[sha1].append(value)
            self._cache_set(q, entries, 'pssl')
            return entries
        return None

    def eupi(self, url, key, q):
        cached = self._cache_get(q, 'eupi')
        if cached is not None:
            return cached
        eu = PyEUPI(key, url)
        response = eu.search_url(url=q)
        if response.get('results'):
            r = response.get('results')[0]['tag_label']
            self._cache_set(q, r, 'eupi')
            return r
        eu.post_submission(q)
        return None

    def bgpranking(self, ip):
        cached = self._cache_get(ip, 'ipasn')
        if cached is not None:
            asn = cached['asn']
            prefix = cached['prefix']
        else:
            ipasn = IPASNHistory()
            response = ipasn.query(ip)
            if 'response' not in response:
                asn = None
                prefix = None
            entry = response['response'][list(response['response'].keys())[0]]
            if entry:
                self._cache_set(ip, entry, 'ipasn')
                asn = entry['asn']
                prefix = entry['prefix']
            else:
                asn = None
                prefix = None

        if not asn or not prefix:
            # asn, prefix, asn_descr, rank, position, known_asns
            return None, None, None, None, None, None

        cached = self._cache_get(ip, 'bgpranking')
        if cached is not None:
            return cached
        bgpranking = BGPRanking()
        response = bgpranking.query(asn,
                                    date=(date.today() -
                                          timedelta(1)).isoformat())
        if 'response' not in response or not response['response']:
            return None, None, None, None, None, None
        to_return = (asn, prefix, response['response']['asn_description'],
                     response['response']['ranking']['rank'],
                     response['response']['ranking']['position'],
                     response['response']['ranking']['total_known_asns'])
        self._cache_set(ip, to_return, 'bgpranking')
        return to_return

    def lookyloo(self, url):
        cached = self._cache_get(url, 'lookyloo')
        if cached is not None:
            return cached
        lookyloo = Lookyloo()
        lookyloo_perma_url = lookyloo.enqueue(url)
        if lookyloo_perma_url:
            self._cache_set(url, lookyloo_perma_url, 'lookyloo')
            return lookyloo_perma_url
        return None

    def _deserialize_cached(self, entry):
        to_return = {}
        redirects = []
        h = self.cache.hgetall(entry)
        for key, value in h.items():
            v = json.loads(value)
            if key == 'list':
                redirects = v
                continue
            to_return[key] = v
        return to_return, redirects

    def get_url_data(self, url):
        data, redirects = self._deserialize_cached(url)
        if data.get('dns') is not None:
            ipv4, ipv6 = data['dns']
            ip_data = {}
            if ipv4 is not None:
                for ip in ipv4:
                    info, _ = self._deserialize_cached(ip)
                    ip_data[ip] = info
            if ipv6 is not None:
                for ip in ipv6:
                    info, _ = self._deserialize_cached(ip)
                    ip_data[ip] = info
            if len(ip_data) > 0:
                data.update(ip_data)
        return {url: data}, redirects

    def cached(self, url, digest=False):
        url_data, redirects = self.get_url_data(url)
        to_return = [url_data]
        for u in redirects:
            if u == url:
                continue
            data, redir = self.get_url_data(u)
            to_return.append(data)
        if digest:
            return {'result': to_return, 'digest': self.digest(to_return)}
        return {'result': to_return}

    def ip_details_digest(self, ips, all_info, all_asns, all_mails):
        to_return = ''
        for ip in ips:
            to_return += '\t' + ip + '\n'
            data = all_info[ip]
            if data.get('bgpranking'):
                to_return += '\t\tis announced by {} ({}). Position {}/{}.\n'.format(
                    data['bgpranking'][2], data['bgpranking'][0],
                    data['bgpranking'][4], data['bgpranking'][5])
                all_asns.add('{} ({})'.format(data['bgpranking'][2],
                                              data['bgpranking'][0]))
            if data.get('whois'):
                all_mails.update(data.get('whois'))
        return to_return

    def digest(self, data):
        to_return = ''
        all_mails = set()
        all_asns = set()
        for entry in data:
            # Each URL we're redirected to
            for url, info in entry.items():
                # info contains the information we got for the URL.
                to_return += '\n{}\n'.format(url)
                if 'whois' in info:
                    all_mails.update(info['whois'])
                if 'lookyloo' in info:
                    to_return += '\tLookyloo permanent URL: {}\n'.format(
                        info['lookyloo'])
                if 'vt' in info and len(info['vt']) == 4:
                    if info['vt'][2] is not None:
                        to_return += '\t{} out of {} positive detections in VT - {}\n'.format(
                            info['vt'][2], info['vt'][3], info['vt'][1])
                    else:
                        to_return += '\t{} - {}\n'.format(
                            info['vt'][0], info['vt'][1])
                if 'gsb' in info:
                    to_return += '\tKnown as malicious on Google Safe Browsing: {}\n'.format(
                        info['gsb'])
                if 'phishtank' in info:
                    to_return += '\tKnown on PhishTank: {}\n'.format(
                        info['phishtank'])

                if 'dns' in info:
                    ipv4, ipv6 = info['dns']
                    if ipv4 is not None:
                        to_return += self.ip_details_digest(
                            ipv4, info, all_asns, all_mails)
                    if ipv6 is not None:
                        to_return += self.ip_details_digest(
                            ipv6, info, all_asns, all_mails)
        return to_return, list(all_mails), list(all_asns)
예제 #13
0
class Web(AbstractModule):
    """
    Web module for AIL framework
    """

    # Used to prevent concat with empty fields due to url parsing
    def avoidNone(self, a_string):
        if a_string is None:
            return ""
        else:
            return a_string

    def __init__(self):
        """
        Init Web
        """
        super(Web, self).__init__()

        # REDIS Cache
        self.r_serv2 = redis.StrictRedis(
            host=self.process.config.get("Redis_Cache", "host"),
            port=self.process.config.getint("Redis_Cache", "port"),
            db=self.process.config.getint("Redis_Cache", "db"),
            decode_responses=True)

        # Country to log as critical
        self.cc_critical = self.process.config.get("Url", "cc_critical")

        # FUNCTIONS #

        self.faup = Faup()

        # Protocol file path
        protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
                                          self.process.config.get("Directories", "protocolsfile"))
        # Get all uri from protocolsfile (Used for Curve)
        uri_scheme = ""
        with open(protocolsfile_path, 'r') as scheme_file:
            for scheme in scheme_file:
                uri_scheme += scheme[:-1]+"|"
        uri_scheme = uri_scheme[:-1]

        self.url_regex = "((?i:"+uri_scheme + \
            ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"

        self.prec_filename = None

        # Send module state to logs
        self.redis_logger.info("Module %s initialized" % (self.module_name))

    def compute(self, message):
        """
        Search for Web links from given message
        """
        # Extract item
        filename, score = message.split()

        if self.prec_filename is None or filename != self.prec_filename:
            domains_list = set()
            PST = Paste.Paste(filename)
            client = ip2asn()

            detected_urls = PST.get_regex(self.url_regex)
            if len(detected_urls) > 0:
                to_print = 'Web;{};{};{};'.format(
                    PST.p_source, PST.p_date, PST.p_name)
                self.redis_logger.info('{}Detected {} URL;{}'.format(
                    to_print, len(detected_urls), PST.p_rel_path))

            for url in detected_urls:
                self.redis_logger.debug("match regex: %s" % (url))

                # self.redis_logger.debug("match regex search: %s"%(url))

                to_send = "{} {} {}".format(url, PST._get_p_date(), filename)
                self.process.populate_set_out(to_send, 'Url')
                self.redis_logger.debug("url_parsed: %s" % (to_send))

                self.faup.decode(url)
                domain = self.faup.get_domain()
                subdomain = self.faup.get_subdomain()

                self.redis_logger.debug('{} Published'.format(url))

                if subdomain is not None:
                    # TODO: # FIXME: remove me
                    try:
                        subdomain = subdomain.decode()
                    except:
                        pass

                if domain is not None:
                    # TODO: # FIXME: remove me
                    try:
                        domain = domain.decode()
                    except:
                        pass
                    domains_list.add(domain)

                hostl = self.avoidNone(subdomain) + self.avoidNone(domain)

                try:
                    socket.setdefaulttimeout(1)
                    ip = socket.gethostbyname(hostl)
                    # If the resolver is not giving any IPv4 address,
                    # ASN/CC lookup is skip.
                    l = client.lookup(ip, qType='IP')
                except ipaddress.AddressValueError:
                    self.redis_logger.debug(
                        f'ASN/CC lookup failed for IP {ip}')
                    continue
                except:
                    self.redis_logger.debug(
                        f'Resolver IPv4 address failed for host {hostl}')
                    continue

                cc = getattr(l, 'cc')
                asn = ''
                if getattr(l, 'asn') is not None:
                    asn = getattr(l, 'asn')[2:]  # remobe b'

                # EU is not an official ISO 3166 code (but used by RIPE
                # IP allocation)
                if cc is not None and cc != "EU":
                    self.redis_logger.debug('{};{};{};{}'.format(hostl, asn, cc,
                                                                 pycountry.countries.get(alpha_2=cc).name))
                    if cc == self.cc_critical:
                        to_print = 'Url;{};{};{};Detected {} {}'.format(
                            PST.p_source, PST.p_date, PST.p_name,
                            hostl, cc)
                        self.redis_logger.info(to_print)
                else:
                    self.redis_logger.debug('{};{};{}'.format(hostl, asn, cc))

            A_values = lib_refine.checking_A_record(self.r_serv2,
                                                    domains_list)

            if A_values[0] >= 1:

                pprint.pprint(A_values)
                # self.redis_logger.info('Url;{};{};{};Checked {} URL;{}'.format(
                #     PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))

        self.prec_filename = filename