Example #1
0
class TestUrlManager(unittest.TestCase):

    #Setup an instance of UrlManager 
    def setUp(self):
        self.urlmanager = UrlManager()

    #Test getList method of UrlManager class - should return a list of vulnerabilites
    #found on the search results page
    def test_getList(self):
        l = [92445,89684,89659,89878,81401,90453,76466,73954,71943,71936,71938,70588,90044,7128]
        l2 = self.urlmanager.getList('http://www.osvdb.org/search/search?search%5Bvuln_title%5D=&search%5Btext_type%5D=titles&search%5Bs_date%5D=&search%5Be_date%5D=May+13%2C+2009&search%5Brefid%5D=&search%5Breferencetypes%5D=&search%5Bvendors%5D=&search%5Bcvss_score_from%5D=&search%5Bcvss_score_to%5D=&search%5Bcvss_av%5D=L&search%5Bcvss_ac%5D=*&search%5Bcvss_a%5D=M&search%5Bcvss_ci%5D=*&search%5Bcvss_ii%5D=*&search%5Bcvss_ai%5D=*&location_local=1&kthx=search')
        count = 0
        for element in l:
            assert str(element) == str(l2[count])
            count+=1

    #Test getVulnerability method of UrlManager class - should return a vulenrability
    #object based on the vulnerability url page        
    def test_getVulnerability(self):
        v = Vulnerability(7128,'MySQL show database Database Name Exposure',719,'2002-01-01','MySQL contains a flaw that may lead to an unauthorized information disclosure.The issue is triggered when an attacker issues the "show databases" command. In multiuser environments, this may expose the names of every databaseresulting in a loss of confidentiality.','Local Access Required, Remote / Network Access','Information Disclosure','Loss of Confidentiality',None,'Exploit Public',None,'Concern','Upgrade to version 4.0.2 or higher, as it has been reported to fix this vulnerability. An upgrade is required as there are no known workarounds.','Unknown or Incomplete',None,None,None,None,None,None,None)
        boolean = self.urlmanager.getVulnerability(7128) == v
        assert boolean is True
        
    def tearDown(self):
        pass
Example #2
0
 def url_manager_proc(self, url_q, conn_q, root_url):
     url_manager = UrlManager.UrlManager()
     url_manager.add_new_url(root_url)
     while True:
         while (url_manager.has_new_url()):
             #从URl管理器获取新的URL
             new_url = url_manager.get_new_url()
             #将新的URL发给工作节点
             url_q.put(new_url)
             print('old_url=', url_manager.old_url_size())
             #加一个判断条件,当爬取2000个链接后就关闭,并保存进度
             if (url_manager.old_url_size() > 2000):
                 #通知爬行节点工作结束
                 url_q.put('end')
                 print('控制节点发起结束通知!')
                 #关闭管理节点,同时存储set状态
                 url_manager.save_progress('new_urls.txt',
                                           url_manager.new_urls)
                 url_manager.save_progress('old_urls.txt',
                                           url_manager.old_urls)
                 return
         #将从result_solve_proc获取到的URL添加到URL管理器
         try:
             if not conn_q.empty():
                 urls = conn_q.get()
                 url_manager.add_new_urls(urls)
         except BaseException as e:
             time.sleep(0.1)  #延时休息
Example #3
0
 def __init__(self, opts):
     self.opts = opts
     self.urlmanager = UrlManager.UrlManager()
     self.downloader = DownLoader.DownLoader(opts)
     self.parser = Parser.Parser(opts)
     self.db = DB.Db(opts)
     self.logger = Log.Logger(opts)
Example #4
0
 def __init__(self):
     '''
     初始化各类
     '''
     self.UM = UrlManager.UrlManager()
     self.HP = HtmlParser.HtmlParser()
     self.DA = DataArranger.DataArrange()
     self.driver = webdriver.Chrome()
Example #5
0
 def test_urlManager(self):
     urlSavedFileName = "cfm20180514.cfm"
     if (os.path.isfile(urlSavedFileName)):
         urlmanager = UrlManager.UrlManager()
         urlmanager.urlSavedFileName = urlSavedFileName
         urlmanager.loadURL()
         self.assertTrue(urlmanager.hasUrl("www.google.com"))
         self.assertFalse(urlmanager.hasUrl("www.yahoo.com"))
Example #6
0
 def __init__(self):
     '''
     初始化各模块
     '''
     self.DA = DataArranger.DataArranger()
     self.HD = HtmlDownloader.HtmlDownloader()
     self.HP = HtmlParser.HtmlParser()
     self.UM = UrlManager.UrlManager()
Example #7
0
    def __init__(self, parseClass):
        self.urls = UrlManager.UrlManager()  # url管理
        self.comms = UrlManager.UrlManager()  # 小区管理
        self.downloader = Downloader.Downloader()  # 下载器
        self.parser = parseClass()
        self.outputer = Outputer.Outputer()
        self.rqBuilder = ReqBuilder.ReqBuilder()

        self.headers = {}  # 构筑请求头
        self.HTTP404 = 0  # 计数:被404的次数
        self.HTTP404_stop = 3  # 设置:累计404多少次后暂停程序
        self.retry_times = 3  # 设置:下载失败后重试的次数
        self.count = 1  # 计数:下载页面数
        self.delay = 0  # 设置:下载页面之间的延时秒数
        self.total = 0  # 计数:成功加入数据库的记录数量

        self.nodata = 0  # 计数:连续出现解析不出数据的次数
        self.nodata_stop = 4  # 设置:同一页面如果解析不出数据,可重复次数
Example #8
0
    def __init__(self, *args, **kw):
        super(ChoboFileManagerFrame, self).__init__(*args, **kw)
        self.Bind(wx.EVT_CLOSE, self.onCloseApp)
        ctrl_D_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onFocusOnFileCMD, id=ctrl_D_Id)
        ctrl_F_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onFind, id=ctrl_F_Id)
        ctrl_G_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onGoFolder, id=ctrl_G_Id)
        ctrl_L_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onFocusOnUrl, id=ctrl_L_Id)
        ctrl_P_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onRunPaint, id=ctrl_P_Id)
        ctrl_R_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onRun, id=ctrl_R_Id)
        ctrl_U_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onFocusOnUrlCMD, id=ctrl_U_Id)
        ctrl_Q_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onClose, id=ctrl_Q_Id)

        alt_F_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onFocusOnFileList, id=alt_F_Id)
        alt_U_Id = wx.NewIdRef()
        self.Bind(wx.EVT_MENU, self.onFocusOnUrlList, id=alt_U_Id)

        accel_tbl = wx.AcceleratorTable([(wx.ACCEL_CTRL, ord('D'), ctrl_D_Id),
                                         (wx.ACCEL_CTRL, ord('F'), ctrl_F_Id),
                                         (wx.ACCEL_CTRL, ord('G'), ctrl_G_Id),
                                         (wx.ACCEL_CTRL, ord('L'), ctrl_L_Id),
                                         (wx.ACCEL_CTRL, ord('P'), ctrl_P_Id),
                                         (wx.ACCEL_CTRL, ord('R'), ctrl_R_Id),
                                         (wx.ACCEL_CTRL, ord('U'), ctrl_U_Id),
                                         (wx.ACCEL_CTRL, ord('Q'), ctrl_Q_Id),
                                         (wx.ACCEL_ALT, ord('F'), alt_F_Id),
                                         (wx.ACCEL_ALT, ord('U'), alt_U_Id)])
        self.SetAcceleratorTable(accel_tbl)

        self.splitter = wx.SplitterWindow(self, -1, wx.Point(0, 0),
                                          wx.Size(800, 800),
                                          wx.SP_3D | wx.SP_BORDER)
        self.urlManger = UrlManager.UrlManager()
        self.fileManagerPanel = ChoboFileManagerPanel.ChoboFileManagerPanel(
            self.splitter)
        self.fileManagerPanel.setUrlManager(self.urlManger)
        self.urlManagerPanel = ChoboUrlManagerPanel.ChoboUrlManagerPanel(
            self.splitter)
        self.urlManagerPanel.setUrlManager(self.urlManger)
        self.urlManagerPanel.drawUI()
        self.splitter.SplitHorizontally(self.fileManagerPanel,
                                        self.urlManagerPanel)
        self.splitter.SetMinimumPaneSize(20)

        sizer = wx.BoxSizer(wx.VERTICAL)
        sizer.Add(self.splitter, 1, wx.EXPAND)
        self.SetSizer(sizer)
Example #9
0
 def __init__(self):
     # 爬取深度(页数)
     self.maxPageDeep = 1
     # 地址管理器
     self.UrlsManager = UrlManager.UrlManager()
     # 下载器
     self.Downloader = HtmlDownloader.HtmlDownloader()
     # 解析器
     self.Parser = HtmlParser.HtmlParser()
     # 输出器
     self.Outputer = HtmlOutputer.HtmlOutputer()
Example #10
0
    def __init__(self):
        #self.login = login()
        self.urls = UrlManager.url_manager()
        self.downloader = HtmlDownloader.htmldownloader()
        self.parser = HtmlParser.htmlparser()
        self.imgdownloader = ImgDownloader.imgdownloader()

        self.url_list = self.get_url_list()
        self.url_list_num = len(self.url_list)
        self.url_list_cnt = 0

        self.img_list = None
        self.img_list_num = 0
        self.img_list_cnt = 0
    def url_manager_proc(self, root_url, urlQueue, parseQueue):
        url_manager = UrlManager.UrlManager()
        url_manager.add_new_urls(root_url)
        while True:
            if (url_manager.has_new_url()):
                new_url = url_manager.get_new_url()
                urlQueue.put(new_url)
            else:
                urlQueue.put('END')

            try:
                if not parseQueue.empty():
                    # 获取解析出来的新的urls
                    urls = parseQueue.get()
                    url_manager.add_new_urls(urls)
                else:
                    time.sleep(0.1)
            except BaseException, e:
                time.sleep(0.1)
    def __init__(self):

        self.urlManager = UrlManager.UrlManager()
        self.htmlParse = HtmlParse()
        self.htmlDownload = HtmlDownloader.HtmlDownload()
        self.dataSave = DataSave.DataSave()
Example #13
0
 def __init__(self):
     self.manager = UrlManager.UrlManager()
     self.downloader = HtmlDownloader.HtmlDownloader()
     self.parser = HtmlParser.HtmlParser()
     self.output = DataOutput.DataOutput()
Example #14
0
 def __init__(self, path, reg):
     self.index_url = UrlManager.UrlManager()
     self.path = path
     self.reg = reg
Example #15
0
            count += 1
            new_url = url.get_new_url()
            print('正在爬第' + str(count) + '条:' + new_url)
            mutex.release()
            html = downloader.download(new_url)
            url_list = parser.parser(html)
            url.add_new_urls(url_list)
    except:
        print('未知异常')


if __name__ == '__main__':

    # 添加新的URL,并开始一轮爬取
    url.add_new_url(
        'https://baike.baidu.com/item/%E6%99%BA%E6%85%A7%E5%9C%B0%E7%90%83/1071533'
    )
    crawl()

    # 添加旧的URL,在进行了一轮爬取后再添加旧的
    print('已经添加旧的url')
    all_url_path = './all_url.txt'
    all_url_file = open(all_url_path, mode='r', encoding='utf-8')
    url_list = all_url_file.readlines()
    for url_1 in url_list:
        if url_1.find('http') == 0:
            url.add_old_url(url_1[:-1])

    # 开启多线程
    while url.has_new_url():
        t = None
Example #16
0
 def __init__(self, root, threadNum):
     self.urls = UrlManager.UrlManager()
     self.download = Downloader.Downloader()
     self.root = root
     self.threadNum = threadNum
Example #17
0
 def __init__(self):
     super(SpiderMain, self).__init__()
     self.urls = UrlManager.UrlManager()
     self.downloader = HtmlDownloader.HtmlDownloader()
     self.parser = HtmlParser.HtmlParser()
     self.outputer = HtmlOutputer.HtmlOutputer()
Example #18
0
 def __init__(self):
     self.urls = UrlManager.UrlManager()
     self.downloader = HtmlDownloader.HtmlDownloader()
     self.parser = HtmlParser.HtmlParser()
     self.outputer = HtmlOutputer.HtmlOutputer()
Example #19
0
 def test_urlManager_updateWithFilteR(self):
     urlmanager = UrlManager.UrlManager()
Example #20
0
 def setUp(self):
     self.urlmanager = UrlManager()
Example #21
0
 def __init__(self):
     self.manager = UrlManager.UrlManager()
     self.downloader = HtmlDownloader.HtmlDownloader()
     self.parser = HtmlParser.HtmlParser()
Example #22
0
 def test_urlManager_exportHtml(self):
     urlmanager = UrlManager.UrlManager()
     urlmanager.exportToHtml("", "test1.htm")
     self.assertTrue(os.path.isfile("test1.htm"))
Example #23
0
 def __init__(self):
     self.urlManager = UrlManager.UrlManager()
     self.doc = doc()
Example #24
0
#Main driver file for the program. Handles input from the user and the interaction between the classes

from UrlManager import *
from DatabaseManager import *
import re

#if __name __ == "__main__"

print("***********OSDVB Parser**************")

database = DatabaseManager()
urlManager = UrlManager()
exit = False
while exit is False:
    userUrl = raw_input("Enter Url String from OSDVB search: ")
    if userUrl.find('http://www.osvdb.org/search/')!=0:  #not the best validation
        print('Invalid Url Input...')
        continue
    vulList = urlManager.getList(userUrl)
    if vulList == None:
        print('Url cannot be parsed. Please check input...')
        continue
    for v in vulList:
        vul = urlManager.getVulnerability(v)
        if vul == None:
            print('Failed in adding vulnerability '+ v +' to database')
            continue
        fail = database.addVulnerability(vul)
        if(fail == True):
            print('Failed in adding vulnerability '+ v + ' to database')
            continue
Example #25
0
 def __init__(self):
     self.url = UrlManager.UrlManager()
     self.downloader = Downloader.Downloader()
     self.htmlpar = SpiderPar.SpiderPar()
Example #26
0
 def __init__(self):
     
     self.UM = UM.UrlManager()
     self.HD = HD.HtmlDownloader()
     self.HP = HP.HtmlParser()
     self.DA = DA.DataArrange()
Example #27
0
 def __init__(self):
     self.urlManager = UrlManager.UrlManager()
     self.htmlDownloader = HtmlDownloader.HtmlDownloader()
     self.htmlParser = HtmlParser.HtmlParser()
     self.fileOutputer = FileOutputer.FileOutputer()
Example #28
0
 def test_urlManager_exportHtmlWithFilter(self):
     urlmanager = UrlManager.UrlManager()
     urlmanager.exportToHtml("com", "test2.htm")
     self.assertTrue(os.path.isfile("test2.htm"))
Example #29
0
 def __init__(self, root, thread_num):
     self.urls = UrlManager.UrlManager()
     self.downloader = Downloader.Downloader()
     self.root = root
     self.thread_num = thread_num
     self.web_file = set()