Exemple #1
0
def get_malwareurl_list():
    """Produce a list of malware URLs from the MalShare feed.

    Returns:
    - url_list: (type: MalwareUrl list) list of malware URLs.
    """
    try:
        payload = {'action': 'getsourcesraw', 'api_key': API_KEY}
        user_agent = {'User-agent': BASECONFIG.user_agent}

        LOGGING.info('Fetching latest MalShare list...')

        request = requests.get('https://malshare.com/api.php',
                               params=payload,
                               headers=user_agent)

        if request.status_code == 200:
            LOGGING.info('Processing MalShare list...')

            url_list = []

            for line in request.text.split('\n'):
                url = clean_url(line.strip())

                if url is None or len(url) == 0:
                    continue

                host_name = get_host_from_url(url)
                ip_addr = resolve_dns(host_name)

                if ip_addr:
                    LOGGING.info('Discovered malicious URL: {0}'.format(url))

                    url_list.append(
                        MalwareUrl(host_name, ip_addr, url, 'Malshare'))

            return url_list

        else:
            LOGGING.error(
                'Problem connecting to MalShare. Status code:{0}. Please try again later.'
                .format(request.status_code))

    except requests.exceptions.ConnectionError as e:
        LOGGING.warning('Problem connecting to Malshare. Error: {0}'.format(e))

    except Exception as e:
        LOGGING.warning('Problem connecting to Malshare. Aborting task.')
        LOGGING.exception(sys.exc_info())
        LOGGING.exception(type(e))
        LOGGING.exception(e.args)
        LOGGING.exception(e)

    return []
Exemple #2
0
def get_malwareurl_list():
    """Produce a list of malware URLs from the OTX feed.

    Returns:
    - url_list: (type: MalwareUrl list) list of malware URLs.
    """
    try:
        ip_list, domain_list, url_list = get_otx_data()

        if len(domain_list) > 0:
            host_list = []

            for domain in domain_list:
                ip_addr = resolve_dns(domain)

                if ip_addr:
                    if ip_addr not in host_list:
                        host_list.append(ip_addr)
                        domain_urls = get_urls_for_ip(ip_addr, NAME)

                        if len(domain_urls) > 0:
                            url_list.extend(domain_urls)

        else:
            LOGGING.warning('OTX URL list (via domain) is empty.')

        if len(ip_list) > 0:
            for ip_addr in ip_list:
                ip_results = get_urls_for_ip(ip_addr, NAME)

                if len(ip_results) > 0:
                    url_list.extend(ip_results)

        else:
            LOGGING.warning('OTX URL list (via IP) is empty.')

        return url_list

    except Exception as e:
        LOGGING.warning('Problem connecting to Cymon. Aborting task.')
        LOGGING.exception(sys.exc_info())
        LOGGING.exception(type(e))
        LOGGING.exception(e.args)
        LOGGING.exception(e)

    return []
Exemple #3
0
def get_malwareurl_list():
    """Produce a list of malware URLs from the URLhaus feed.

    Returns:
    - url_list: (type: MalwareUrl list) list of malware URLs.
    """
    try:
        user_agent = {'User-agent': BASECONFIG.user_agent}

        LOGGING.info('Fetching latest URLhaus list...')

        request = requests.get(
            'https://urlhaus.abuse.ch/downloads/csv_online/',
            headers=user_agent)

        if request.status_code == 200:
            LOGGING.info('Processing URLhaus list...')

            url_list = []

            lines = request.text.split('\n')

            for line in lines:
                if line.startswith('#'):
                    lines.remove(line)

            reader = csv.reader(lines,
                                quotechar='"',
                                delimiter=',',
                                quoting=csv.QUOTE_ALL,
                                skipinitialspace=True)
            next(reader)

            for item in reader:
                if len(item) > 1:
                    if item[3] == 'offline':
                        continue

                    url = clean_url(item[2])

                    if url is None or len(url) == 0:
                        continue

                    date = dateutil.parser.parse(item[1])

                    valid_since = datetime.now() - timedelta(
                        days=BASECONFIG.malware_days)

                    if date > valid_since:
                        host_name = get_host_from_url(url)
                        ip_addr = resolve_dns(host_name)

                        if ip_addr:
                            LOGGING.info(
                                'Discovered malicious URL: {0}'.format(url))

                            url_list.append(
                                MalwareUrl(host_name, ip_addr, url, NAME))

                    else:
                        break

            return url_list

        else:
            LOGGING.error(
                'Problem connecting to URLhaus. Status code:{0}. Please try again later.'
                .format(request.status_code))

    except requests.exceptions.ConnectionError as e:
        LOGGING.warning('Problem connecting to URLhaus. Error: {0}'.format(e))

    except Exception as e:
        LOGGING.warning('Problem connecting to URLhaus. Aborting task.')
        LOGGING.exception(sys.exc_info())
        LOGGING.exception(type(e))
        LOGGING.exception(e.args)
        LOGGING.exception(e)

    return []
Exemple #4
0
def get_otx_data():
    """Produce a list of IP addresses, domains and URLs from the OTX feed.

    Returns:
    - ip_list: (type: string list) list of IP addresses.
    - domain_list: (type: string list) list of domains.
    - url_list: (type: string list) list of URLs.
    """
    try:
        LOGGING.info('Querying AlienVault OTX for recent pulses...')

        otx = OTXv2(API_KEY)

        pulses = otx.getsince(
            (datetime.utcnow() -
             timedelta(days=BASECONFIG.malware_days)).isoformat(),
            limit=None)

        stale_days = STALE_DAYS
        stale_since = (datetime.utcnow() - timedelta(days=stale_days))

        domain_list = []
        ip_list = []
        url_list = []

        LOGGING.info('Processing OTX pulses...')

        for pulse in pulses:
            if dateutil.parser.parse(pulse['created']) < stale_since:
                LOGGING.warning(
                    'Pulse added more than {0} days ago: {1} ({2})'.format(
                        str(STALE_DAYS), pulse['name'], pulse['id']))
                continue

            indicators = pulse['indicators']

            if len(indicators) > 0:
                for indicator in indicators:
                    if indicator['type'] == 'URL':
                        url = clean_url(indicator['indicator'])

                        if url is None:
                            continue

                        host_name = get_host_from_url(url)
                        ip_addr = resolve_dns(host_name)

                        if ip_addr:
                            LOGGING.info(
                                'Discovered malicious URL: {0}'.format(url))

                            url_list.append(
                                MalwareUrl(host_name, ip_addr, url, NAME))

                    host_indicators = ['domain', 'hostname', 'IPv4']

                    if indicator['type'] in host_indicators:
                        if validators.ipv4(indicator['indicator']):
                            ip_list.append(indicator['indicator'])

                        else:
                            host_name = indicator['indicator']
                            domain_list.append(host_name)

        return ip_list, domain_list, url_list

    except Exception as e:
        LOGGING.warning('Problem connecting to Cymon. Aborting task.')
        LOGGING.exception(sys.exc_info())
        LOGGING.exception(type(e))
        LOGGING.exception(e.args)
        LOGGING.exception(e)

    return [], [], []
Exemple #5
0
def get_cymon_feed(jwt, feed_id, pages):
    """Produce a list of URLs for IPs found in the feed.

    Params:
    - jwt: (type: string) JWT token.
    - feed_id: (type: string) Cymon feed ID.
    - pages: (type: int) number of pages to retrieve.

    Returns:
    - url_list: (type: MalwareUrl list) list of malware URLs.
    """
    try:
        today = datetime.utcnow()
        threshold = today - timedelta(days=BASECONFIG.malware_days)

        headers = {'Authorization': 'Bearer {0}'.format(jwt)}

        LOGGING.info('Fetching data from Cymon feed: {0}'.format(feed_id))

        ip_list = []

        for n in range(1, pages + 1):
            payload = {
                'startDate': threshold.strftime('%Y-%m-%d'),
                'endDate': today.strftime('%Y-%m-%d'),
                'size': BATCH_SIZE,
                'from': (
                    BATCH_SIZE *
                    n -
                    BATCH_SIZE)}

            request = requests.get(
                'https://api.cymon.io/v2/ioc/search/feed/{0}'.format(feed_id),
                params=payload,
                headers=headers,
                verify=False)

            if request.status_code == 200:
                LOGGING.info('Request successful!')

                response = json.loads(request.text)

                if 'hits' in response:
                    for feed_entry in response['hits']:
                        if 'ioc' in feed_entry:
                            if 'ip' in feed_entry['ioc']:
                                mal_ip = feed_entry['ioc']['ip']

                                if mal_ip not in ip_list:
                                    ip_list.append(mal_ip)

                                elif 'hostname' in feed_entry['ioc']:
                                    host_name = feed_entry['ioc']['hostname']
                                    mal_ip = resolve_dns(host_name)

                                    if mal_ip:
                                        if mal_ip not in ip_list:
                                            ip_list.append(mal_ip)

            else:
                LOGGING.error(
                    'Problem connecting to Cymon. Status code:{0}. Please try again later.'.format(
                        request.status_code))

        if len(ip_list) > 0:
            url_list = []

            for ip_addr in ip_list:
                ip_results = get_urls_for_ip(ip_addr, 'Cymon')

                if len(ip_results) > 0:
                    url_list.extend(ip_results)

            return url_list

        else:
            LOGGING.warning('No hosts of interest.')

    except requests.exceptions.ConnectionError as e:
        LOGGING.warning('Problem connecting to Cymon. Error: {0}'.format(e))

    except Exception as e:
        LOGGING.warning('Problem connecting to Cymon. Aborting task.')
        LOGGING.exception(sys.exc_info())
        LOGGING.exception(type(e))
        LOGGING.exception(e.args)
        LOGGING.exception(e)

    return []