Ejemplo n.º 1
0
    def Get_URL_SRC(self,URL_Tuple,Flag,que):  #得到一个页面的源码,que,Flag
        if  Flag == 2:
            Url = urllib2.Request(url=URL_Tuple,headers=self.__headers__)
            Mid_SRC = urllib2.urlopen(Url).read()
            return Mid_SRC
        else:
            if re.search('.*(png|jpg)$', URL_Tuple,re.MULTILINE):
                print u'不是网页,获取不到源码\n',URL_Tuple
            else:
                try:
                    Url = urllib2.Request(url=URL_Tuple,headers=self.__headers__)
                    Mid_SRC = urllib2.urlopen(Url).read()
                    Url_SRC = re.search('<html lang="zh_CN">(.*?)在周四更新</p>', Mid_SRC, re.S).group(1)
    #                 Url_dict = {URL_Tuple:Url_SRC}
                    if Flag == 1:
                        Data = Url_SRC.replace('\'', '').replace('\n', '').replace('\r', '')
                        mmy = My_Save('xiaozhang')
                        mmy.command("insert into Content(content) values('%s')"%Data)
#                         if que.put(Url_SRC):
#                             print u'failed'
#                             print URL_Tuple
                    else:
                        return Url_SRC
                except urllib2.HTTPError,e:
                    print '\n_________________________________\n',e,u'\n丢弃'
                except urllib2.URLError,e:
                    print u'\n打开失败,稍后重新获取\n'
                except AttributeError,e:
                    print u'\n过滤',URL_Tuple,'\n'
Ejemplo n.º 2
0
def Find_exist(value,FF):
    My = My_Save('xiaozhang')
    if not My.command("select Flag from url where url = '%s'"%value,type='return'):
        My = My_Save('xiaozhang')
        My.command("insert into adddate(url) values('%s')"%value)
#         print u'发现新连接',value
        FF += 1
    else:
        FF = 0 
    return FF
Ejemplo n.º 3
0
def Find_Key_value(Key):
    mmy = My_Save('xiaozhang')
    print 
    w={'w':Key}
    ww=urllib.urlencode(w)
    date = re.search('=(.*?)$', ww).group(1)
    mmy.command("insert into xiaozhang.temp(url) select url from adddate where url like '%%%s%%'"%date,type = 'save')
    mmy = My_Save('xiaozhang')
    value = mmy.command('select count(*) from temp',  type='return')
    return u'查找到%s'%value
Ejemplo n.º 4
0
def Load_Data(Key,Table):
    My = My_Save('xiaozhang')
    return My.command("select %s from %s"%(Key,Table), type='return')
Ejemplo n.º 5
0
if __name__ == '__main__':
        Tuple_All = []
        URL_Tuple = []
        U1 = Analyse(Url)
        Que1 = Queue()
        Que2 = Queue()
        ttt = []
        FF = 0
#         print U1.Get_URL_SRC(urls, 0, Que1)
          
        choice = raw_input(u'检查更新y/n:')          #更新数据库连接
        if choice == 'y':
            Update_Url(URL_Tuple)
        choice = raw_input(u'查找y/n:')
        if choice  =='y':
            Value = Find_Key_value(raw_input(u'输入关键字:'))
        choice = raw_input(u'是否开始分析源码y/n:')       #数据库取连接
        if choice == 'y':
            Many_process_analyse_web(URL_Tuple,Que1)
            while not Que1.empty():
                Data = Que1.get().replace('\'', '').replace('\n', '').replace('\r', '')
                mmy = My_Save('xiaozhang')
                mmy.command("insert into Content(content) values('%s')"%Data)
        choice = raw_input(u'获取数据y/n:')
        Down_load()
        M = My_Save('xiaozhang')
        M.command('truncate table Content;',type='save')
        M = My_Save('xiaozhang')
        M.command('truncate table temp;',type='save')
        M = My_Save('xiaozhang')
        M.command('truncate table adddate;',type='save')