Ejemplo n.º 1
0
    def GetInboundURLs(this):
        domain = this.URL.Hostname
        URLs = this.Tree.xpath('(//a)[contains(@href, "' + domain +
                               '") or not(contains(@href, "http"))]/@href')

        urls_found = []
        result = []
        for url in URLs:
            try:
                excluded = False
                for exclude in this.Excludes:
                    if exclude in url.lower():
                        excluded = True
                        break
                if not excluded:
                    if domain in url:
                        url = BooterURL(url)
                        url = url.Hostname + url.Path
                        if url[len(url) - 1] == '/':
                            url = url[:-1]
                        if url not in urls_found:
                            result.append(url)
                    else:
                        url = (this.RelativeURL + '/' + url).replace('//', '/')
                        if url[len(url) - 1] == '/':
                            url = url[:-1]
                        if url not in urls_found:
                            result.append(url)
                    urls_found.append(url)  # to check for duplicates
            except Exception as ex:
                pass

        return result
Ejemplo n.º 2
0
    def Crawl(this, max_results=100):
        keywords = ['Booter', 'DDOSer', 'Stresser']

        nr_pages = int(max_results / 100)
        this.PrintUpdate('initiating crawling procedures: Google')

        for keyword in keywords:
            this.PrintDivider()
            this.PrintNote('KEYWORD: ' + keyword)
            this.PrintDivider()
            for i in range(0, nr_pages):
                counter = 0
                # dynamically generate search query
                query = "&q=" + keyword + '&start=' + str(
                    i * 100) + '&filter=0'
                url = this.Target + query

                this.PrintDebug('crawling: ' + query)
                # read html and parse JSON
                response = this.JSCrawl(url)
                tree = html.fromstring(response.text)

                urls = tree.xpath('//*/div[@class="r"]/a/@href')

                split = 10
                for url in urls:
                    try:
                        # parse url
                        if '/url?q=' in url:
                            url = url[7:].split('&sa')[0]
                        this.AddToList(BooterURL(url), 'Google')

                        if counter % split == 0:
                            this.PrintDivider()
                        counter = counter + 1
                    except Exception as ex:
                        this.PrintError('EXCEPTION: ' + str(ex))
                this.Sleep()

        this.PrintUpdate('DONE; found ' + str(len(this.URLs)) +
                         ' potential Booters')
        this.PrintDivider()
Ejemplo n.º 3
0
 def AddToList(this, URL, source='?'):
     if not this.IsDuplicate(URL) and not this.IsExcluded(URL):
         try:
             # get status/respnse-code and resolved-url of URL
             status = this.GetStatus(URL.Full_URL)
             URL = BooterURL(status[2])
             # save in database
             Crawler.crawler_api.storage.SaveURL(URL, source, status[0])
             # then save in list if proper response or post error
             if status[1] == 200 or status[1] == 403 or status[1] == 202:
                 BooterURL.Status = status  # add status to URL for later use
                 this.URLs.append(URL)
                 this.PrintLine('CRAWLER: ' + URL.Full_URL, Fore.BLUE)
                 if status[1] == 403 or status[1] == 202:
                     print('Website blocked crawler; manually verify!')
             else:
                 this.PrintNote('incorrect response code [' +
                                str(status[1]) + ']: ' + URL.Full_URL)
         except Exception as ex:
             this.PrintError('EXCEPTION: ' + str(ex))
Ejemplo n.º 4
0
    def __init__(this, response):
        this.URL = BooterURL(response.url)
        this.HTML = response.text
        this.Tree = html.fromstring(this.HTML)

        # create relative path for further URL queries
        path = this.URL.Path
        if len(path) > 0 and path[0] == '/':
            this.RelativeURL = (this.URL.Hostname).replace('//', '/')
        else:
            for i in reversed(path):
                if i == '/':
                    break
                else:
                    path = path[:-1]
            this.RelativeURL = (this.URL.Hostname + path).replace('//', '/')
        # print('relative:' + this.RelativeURL)

        # add a 'contains' list of URLs NOT to scrape
        this.Excludes = {
            '#',
            'mailto',
            '.pdf',
            '.doc',
            '.rar',
            '.zip',
            '.png',
            '.jpeg',
            '.jpg',
            '.gif',
            '.bmp',
            '.atom',
            '.rss',
            'skype:',
            'javascript:',
            'facebook',
            'twitter',
            '.tar.gz',
            '.exe',
            '.apk',
        }
Ejemplo n.º 5
0
    def Crawl(this, max_results=100):
        keywords = ['online booter', 'stresser', 'ddoser']

        nr_pages = int(max_results / 10)

        this.PrintUpdate('initiating crawling procedures: Youtube')

        for keyword in keywords:
            this.PrintDivider()
            this.PrintNote('KEYWORD: ' + keyword)
            for i in range(0, nr_pages):
                counter = 0
                try:
                    # dynamically generate search query
                    query = '&search_query="' + keyword + '"&page=' + str(i)
                    url = this.Target + query
                    this.PrintDivider()
                    this.PrintDebug('crawling: ' + query)
                    # read html and parse JSON
                    response = this.JSCrawl(url)
                    tree = html.fromstring(response.text)
                    split = 10

                    urls_found = []

                    descriptions = tree.xpath(
                        '(//div)[contains(@class, "yt-lockup-description")]/descendant-or-self::*/text()'
                    )
                    for description in descriptions:
                        # check whether description certainly doesn't hold an online booter
                        if this.StopSearching(description):
                            continue

                        # find all urls in description
                        urls = re.findall(
                            'http[s]?:\/\/(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
                            description)
                        for url in urls:
                            urls_found.append(url)

                    # also check for explicit urls in descriptions
                    urls = tree.xpath(
                        '(//div)[contains(@class, "yt-lockup-description")]//a/@href'
                    )
                    for url in urls:
                        if url not in urls_found:
                            urls_found.append(url)

                    this.PrintDivider()
                    this.PrintUpdate('obtained ' + str(len(urls_found)) +
                                     ' potential URLs; extracting...')
                    this.PrintDivider()

                    # resolve each url and add returned url to final urls
                    for url in urls_found:
                        this.AddToList(BooterURL(url), 'Youtube')
                        counter = counter + 1
                        if counter % split == 0:
                            this.PrintDivider()

                    this.Sleep()
                except Exception as ex:
                    this.PrintError('EXCEPTION: ' + str(ex))
                    this.Sleep()

        this.PrintUpdate('DONE; found ' + str(len(this.URLs)) +
                         ' potential Booters')
        this.PrintDivider()
Ejemplo n.º 6
0
    connection.execute(query)
    connection.commit()


# utility function for easy update statements
def Update(table, key, column, value):
    query = 'UPDATE ' + table + ' SET ' + column + ' = \'' + str(
        value) + '\' WHERE domainName = \'' + key + '\''
    connection.execute(query)
    connection.commit()


def Select(query):
    result = []
    for row in connection.execute(query):
        result.append(row)
    return result


def CloseConnection():
    connection.close()


# if explicitly called as root python file, do some debugging operations
if __name__ == "__main__":
    # store in database
    url = BooterURL('http://booter.xyz/register.php')
    SaveURL(url, 'joeydevries.com', 'Y')
    url = BooterURL('http://www.booter.io')
    SaveURL(url, 'learnopengl.com')
Ejemplo n.º 7
0
## SCRAPE AND GENERATE SCORES                                                ##
###############################################################################
crawler.PrintDivider();
crawler.PrintUpdate('INITIATING SCRAPING PROCEDURES;')
crawler.PrintDivider();

# query all to-scrape URLs
# from_date    = datetime.datetime(2015, 8, 1).strftime('%Y-%m-%d %H:%M:%S') # test_scores
# from_date    = datetime.datetime(2015, 8, 19).strftime('%Y-%m-%d %H:%M:%S') #test_scores2
from_date = datetime.datetime(2015, 8, 20, 13, 30).strftime('%Y-%m-%d %H:%M:%S')  # test_scores3
delay_period = 7
for url in Crawler.crawler_api.storage.Select(
        'SELECT fullURL FROM urls WHERE status != \'off\' AND timeUpdate >= \'' + str(from_date) + '\''):
    delay = delay_period + random.randint(0, 14)  # add a slight randomness to delay_period as to divide workload
    delay = 1
    crawler.Scrape(BooterURL(url[0]), delay)

crawler.PrintDivider();
crawler.PrintUpdate('DONE;')
crawler.PrintDivider();

# TESTING SCRAPE ALGORITHM
# crawler.Scrape(BooterURL('http://joeydevries.com'))
# crawler.Scrape(BooterURL('k-stress.pw'))
# crawler.Scrape(BooterURL('http://impulse101.org/'))
# crawler.Scrape(BooterURL('http://www.davidairey.com/'))
# crawler.Scrape(BooterURL('booter.org'))
# crawler.Scrape(BooterURL('joeyfladderak.com/'))
# crawler.Scrape(BooterURL('layer7.pw'))
# crawler.Scrape(BooterURL('tweakers.net'))
# crawler.Scrape(BooterURL('ragebooter.com'))
Ejemplo n.º 8
0
    def Crawl(self, max_date):
        # crawl of hackforums.net is executed in three steps:
        # 1. we retrieve all interesting forum posts
        # 2. we extract potential Booter URLs from these posts
        # 3. collect all related evidence and calculate scores
        #    for later evaluation
        target_url = self.Target + 'forumdisplay.php?fid=232&page='
        ### step 1. retrieve all relevant forum posts
        forum_items = []
        current_page = 1
        max_pages = 1
        max_date_reached = False
        self.PrintUpdate('initiating crawling procedures: HackForums')
        self.PrintDivider()

        # crawl first forum page and parse into XML tree
        response = self.Session.post(target_url + str(current_page), headers=self.Header)
        tree = html.fromstring(response.text)

        # analyze structure and get relevant properties (via XPATH)
        self.PrintUpdate('analyzing structure and retrieving Booter candidates')
        self.PrintDivider()
        max_pages = int(tree.xpath('//a[@class="pagination_last"]/text()')[0])
        # max_pages = 1 # for debug
        # now start crawling
        while current_page <= max_pages and not max_date_reached:
            self.PrintUpdate('crawling page ' + str(current_page) + '/' + str(max_pages))
            self.PrintDivider()
            # get forum items
            forum_titles = tree.xpath(
                '//td[contains(@class,"forumdisplay_")]/div/span[1]//a[contains(@class, " subject_")]/text()')
            forum_urls = tree.xpath(
                '//td[contains(@class,"forumdisplay_")]/div/span[1]//a[contains(@class, " subject_")]/@href')
            forum_dates = tree.xpath('//td[contains(@class,"forumdisplay_")]/span/text()[1]')
            # get data of each forum item
            for i in range(len(forum_titles)):
                item = ForumItem(forum_titles[i], self.Target + forum_urls[i], forum_dates[i])
                if item.IsPotentialBooter():
                    forum_items.append(item)
                    print(item)
                # check if max date is reached
                if item.Date < max_date:
                    max_date_reached = True
                    self.PrintDivider()
                    self.PrintUpdate('date limit reached; aborting...')
                    self.PrintDivider()
                    break
            # print a divider after each forum page
            self.PrintDivider()
            # get url of next page and re-iterate
            current_page = current_page + 1
            next_url = target_url + str(current_page)
            response = self.Session.post(next_url, headers=self.Header)
            tree = html.fromstring(response.text)

            if current_page <= max_pages:
                self.Sleep()
        # forum crawling is complete, print (sub)results
        self.PrintUpdate('items found: ' + str(len(forum_items)))
        self.PrintDivider()

        ### step 2. extract potential Booters from target forum posts
        self.PrintUpdate('attempting to obtain Booter URLs')
        self.PrintDivider()
        # start crawilng for each forum item
        counter = 0
        for item in forum_items:
            # parse html
            response = self.Session.post(item.URL, headers=self.Header)
            tree = html.fromstring(response.text)
            url = ''
            # check for URLs inside image tags
            tree_image = tree.xpath(
                '(//tbody)[1]//div[contains(@class,"post_body")]//a[.//img and not(contains(@href, "hackforums.net")) and not(contains(@href, ".jpg") or contains(@href, ".png") or contains(@href, ".jpeg") or contains(@href, "gif"))]/@href')
            if tree_image:
                url = tree_image[0]
            else:
                # otherwise check for URL in the post's content
                tree_links = tree.xpath(
                    '(//tbody)[1]//div[contains(@class,"post_body")]//a[not(@onclick) and not(contains(@href, "hackforums.net")) and not(contains(@href, ".jpg") or contains(@href, ".png") or contains(@href, ".jpeg") or contains(@href, ".gif"))]/@href')
                if tree_links:
                    url = tree_links[0]

            # add found url to list
            if url != '':
                self.AddToList(BooterURL(url), item.URL)

            # print a divider line every 10 results (to keep things organized)
            counter = counter + 1
            if counter % 10 == 0:
                self.PrintDivider()

        # finished, print results
        self.PrintDivider()
        self.PrintUpdate('DONE; Resolved: ' + str(len(self.URLs)) + ' Booter URLs')
        self.PrintDivider()
Ejemplo n.º 9
0
    finally:
        termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
    return ch


from_date = datetime.datetime(2015, 8, 10).strftime('%Y-%m-%d %H:%M:%S')  # date of specific test items

# open chrome driver
driver = webdriver.Chrome()

# get all domains to check
for url in Crawler.crawler_api.storage.Select('SELECT fullURL FROM urls WHERE timeAdd >= \'' + str(from_date) + '\''):
    function_key_pressed = False

    while not function_key_pressed:
        booterURL = BooterURL(url[0])
        driver.get(booterURL.Full_URL)

        ch = getch()
        if ch == 'n':
            print('N PRESSED')
            Crawler.Crawler.crawler_api.storage.Update('urls', booterURL.UniqueName(), '[booter?]', 'N')
            Crawler.crawler_api.storage.Update('urls', booterURL.UniqueName(), 'notes', 'manual')
            function_key_pressed = True
        elif ch == 'y':
            print('Y PRESSED')
            Crawler.crawler_api.storage.Update('urls', booterURL.UniqueName(), '[booter?]', 'Y')
            Crawler.crawler_api.storage.Update('urls', booterURL.UniqueName(), 'notes', 'manual')
            function_key_pressed = True
        elif ch == 'q':
            print('Q PRESSED')