Python md5 Examples

Programming Language: Python

Namespace/Package Name: owl.scripts.func

Method/Function: md5

Examples at hotexamples.com: 3

Python md5 - 3 examples found. These are the top rated real world Python examples of owl.scripts.func.md5 extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def add_index(self,domain,soup):
     url_encrypt = md5(domain)
     if UrlInfo.isIndexed(url_encrypt):return
     # 插入url
     #if not UrlInfo.isIndexed(url_encrypt):
     urlinfo = UrlInfo.objects.create(url=domain,url_encrypt=url_encrypt,createtime=time.time())
     #else:
         #urlinfo = UrlInfo.objects.get(url_encrypt=url_encrypt)
     #start_index = 0
     # 提取title
     if soup.html.head.title:
         text_title = soup.html.head.title.get_text().strip()
         text_title_list = self.separatewords(text_title)
         self.add_location(urlinfo,text_title_list,1)
     # 获取关键字
     if soup.html.head.keywords:
         text_keywords = soup.html.head.keywords.get_text().strip()
         text_keywords_list = self.separatewords(text_keywords)
         self.add_location(urlinfo,text_keywords_list,2)
     # 获取描述
     if soup.html.head.description:
         text_description = soup.html.head.description.get_text().strip()
         text_description_list = self.separatewords(text_description)
         self.add_location(urlinfo,text_description_list,3)
     # 获取内容
     text_content = self.get_content(soup.html.body,0)
     text_content_list = self.separatewords(text_content)
     self.add_location(urlinfo,text_content_list,4)

Example #2

Show file

 def add_link(self,from_url,to_url,link_text):
     try:
         url_from = UrlInfo.objects.get(url_encrypt=md5(from_url))
         url_to = UrlInfo.objects.get(url_encrypt=md5(to_url))
     except:
         return
     if url_from == url_to or LinkInfo.isExist(url_from,url_to) : return
     linkinfo = LinkInfo.objects.create(from_url=url_from,to_url=url_to,createtime=time.time())
     if link_text:
         words = self.separatewords(link_text)
         if not words:
             return
         for word in words:
             wordinfo = self.get_word(word)
             if not wordinfo:continue
             LinkWords.objects.create(link=linkinfo,word=wordinfo)

Example #3

Show file

    def main(self,urllist,dep=0):
        for domain in urllist:
            #爬行
            try:
                c = urllib2.urlopen(domain)
            except:
                continue
            #是否发生重定向
            redirected = c.geturl() != domain
            if redirected:
                continue
            soup = BeautifulSoup(c.read(),"html.parser")
            self.add_index(domain,soup)

            # 查找其他链接
            links = soup.find_all("a")
            filter_links = ["","javascript:;","#","javascript:void(0);"]
            if links:
                newpages = []
                for link in links:
                    if 'href' in dict(link.attrs) and link["href"] not in filter_links:
                        url = urljoin(domain,link["href"])
                        if url.find("'") != -1:continue
                        url = url.split("#")[0]
                        if url[0:4] == "http" and url != domain :
                            if not UrlInfo.isIndexed(md5(url)):
                                newpages.append(url)
                            link_text = self.get_content(link,0)
                            # 添加到链接来源
                            self.add_link(domain,url,link_text)
                #递归获取，不大于3级目录
                if newpages and dep < 3:
                    self.main(newpages, dep+1)
                else:
                    continue
            else:
                continue
        return True