コード例 #1
0
ファイル: appcoda.py プロジェクト: virgoC0der/CoderNewsDjango
def getAppcoda():
    dataArray = []
    imageArr = []
    i = 0
    url = "https://www.appcoda.com/tutorials/ios/"
    result = requests.get(url)
    soup = BeautifulSoup(result.text,"html.parser")
    div_set = soup.find_all("div",class_="post-thumbnail")
    for div in div_set:
        txt = div.find("div").get("style")
        # 正则表达式
        p1 = r'https://.*g'
        # 将正则表达式编译成Pattern对象
        pattern = re.compile(p1)
        imageUrl = str(pattern.findall(txt))
        # print(type(imageUrl))
        imageArr.append(str(imageUrl))
    div_set2 = soup.find_all("div",class_="post-content")
    for div in div_set2:
        title = div.find("h2").get_text()
        eachUrl = div.find("h2").find("a").get("href")
        dataArray.append(dataModel(title, eachUrl, imageArr[i], "swift"))
        model = dataModel(title, eachUrl, imageArr[i][2:-2], "swift")
        model.printIt()
        i += 1
        try:
            model.updateToInfo()
        except:
            continue
    return dataArray
コード例 #2
0
def getHackeingWithSwift():
    dataArray = []
    websiteUrl = "https://www.raywenderlich.com"
    url = "https://www.raywenderlich.com/library/search?section_id=49&domain_ids%5B%5D=1&content_types%5B%5D=article&category_ids%5B%5D=156&category_ids%5B%5D=159&category_ids%5B%5D=157&category_ids%5B%5D=151&category_ids%5B%5D=145&category_ids%5B%5D=161&category_ids%5B%5D=143&category_ids%5B%5D=147&category_ids%5B%5D=155&category_ids%5B%5D=144&category_ids%5B%5D=158&category_ids%5B%5D=148&category_ids%5B%5D=150&category_ids%5B%5D=152&category_ids%5B%5D=160&category_ids%5B%5D=149&category_ids%5B%5D=153&category_ids%5B%5D=154&category_ids%5B%5D=146&sort_order=released_at&page=1"
    result = requests.get(url)
    soup = BeautifulSoup(result.text,"html.parser")
    title_set = soup.find_all("span",class_="c-tutorial-item__title")
    titleArray = []
    urlArray = []
    imageurlArray = []
    for title in title_set:
        titleArray.append(title.getText())
    a_set = soup.find_all("a")
    for a in a_set:
        urlArray.append(websiteUrl + a.get("href"))
    image_set = soup.find_all(class_="c-tutorial-item__art-image--primary")
    for image in image_set:
        imageurlArray.append(image.get("src"))
    for index in range(0,10):
        dataArray.append(dataModel(titleArray[index],urlArray[index],imageurlArray[index],"swift"))
        model = dataModel(titleArray[index],urlArray[index],imageurlArray[index],"swift")
        model.printIt()
        try:
            model.updateToInfo()
        except:
            continue
    return dataArray
コード例 #3
0
ファイル: appso.py プロジェクト: virgoC0der/CoderNewsDjango
def getAppso():
    url = "https://www.ifanr.com/app"
    result = requests.get(url)
    dataArray = []
    imageArray = []
    i = 0
    soup = BeautifulSoup(result.text, "html.parser")
    div_set = soup.find_all("div", class_="article-info")
    img_set = soup.find_all("a", class_="article-link cover-block")
    for img in img_set:
        txt = str(img['style'])
        # 正则表达式
        p1 = r'https://.*260'
        # 将正则表达式编译成Pattern对象
        pattern = re.compile(p1)
        imageUrl = str(pattern.findall(txt))
        imageArray.append(imageUrl)
    for div in div_set:
        title = div.find("h3").find("a").get_text()
        eachUrl = div.find("h3").find("a").get("href")
        model = dataModel(title, eachUrl, imageArray[i][2:-2], 'AppSolution')
        try:
            model.updateToInfo()
            model.printIt()
        except:
            continue
        i += 1
コード例 #4
0
def getSouhuArticles():
    dataArray = []
    imageUrlArray = []
    i = 0
    url = "http://it.sohu.com/882?spm=smpc.ch30.fd-ctag.24.1556018818504jdhz"
    result = requests.get(url)
    soup = BeautifulSoup(result.text, "html.parser")
    div_set = soup.find_all(attrs={"data-role": "news-item"})
    for img in div_set:
        txt = str(img['class'])
        # 正则表达式
        p1 = r'txt'
        # 将正则表达式编译成Pattern对象
        pattern = re.compile(p1)
        if (pattern.findall(txt)):
            noImg = None
            imageUrlArray.append(noImg)
        else:
            otherImg = "https:" + img.find("img").get("src")
            imageUrlArray.append(otherImg)
        title = img.find("h4").find("a").get_text().strip()
        eachUrl = "https:" + img.find("h4").find("a").get("href")
        model = dataModel(title, eachUrl, imageUrlArray[i],
                          'TechnologyArticles')
        try:
            model.updateToInfo()
            model.printIt()
        except:
            continue
        i += 1
コード例 #5
0
def getSwiftOrg():
    dataArray = []
    url = "https://swift.org/blog/"
    result = requests.get(url)
    soup = BeautifulSoup(result.text, "html.parser")
    div_set = soup.find_all("h1", class_="title")
    for div in div_set:
        title = div.get_text()
        eachUrl = "https://swift.org/blog/" + div.get_text("href")
        # imageUrl = "https://www.hackingwithswift.com" + div.find("img").get("src")
        dataArray.append(dataModel(title, eachUrl, None, "swift"))
        model = dataModel(title, eachUrl, "", "swift")
        model.printIt()
        try:
            model.updateToInfo()
        except:
            continue
    return dataArray
コード例 #6
0
def getHackeingWithSwift():
    dataArray = []
    url = "https://www.hackingwithswift.com/articles"
    result = requests.get(url)
    soup = BeautifulSoup(result.text, "html.parser")
    div_set = soup.find_all("a")
    for div in div_set:
        h3 = div.find("h3")
        if h3 != None:
            title = h3.get_text()
            eachUrl = "https://www.hackingwithswift.com" + div.get("href")
            imageUrl = "https://www.hackingwithswift.com" + div.find(
                "img").get("src")
            dataArray.append(dataModel(title, eachUrl, imageUrl, "swift"))
            model = dataModel(title, eachUrl, imageUrl, "swift")
            model.printIt()
            try:
                model.updateToInfo()
            except:
                continue
    return dataArray
コード例 #7
0
def getSspai():
    url = "https://sspai.com/api/v1/articles?offset=0&limit=20&has_tag=1&tag=%E5%BA%94%E7%94%A8%E6%8E%A8%E8%8D%90&include_total=false&type=recommend_to_home"
    list = requests.get(url).json()
    dataArray = []
    llist = list['list']
    for i in range(len(llist)):
        title = llist[i]['title']
        eachUrl = "https://sspai.com/post/" + str(llist[i]['id'])
        imageUrl = "https://cdn.sspai.com/" + llist[i]['banner']
        model = dataModel(title, eachUrl, imageUrl, 'ResourceRecommend')
        try:
            model.updateToInfo()
            model.printIt()
        except:
            continue
コード例 #8
0
ファイル: zuori.py プロジェクト: virgoC0der/CoderNewsDjango
def getZuori():
    dataArray = []
    url = "https://www.anquanke.com/"
    data = requests.get(url)
    soup = BeautifulSoup(data.text, "html.parser")
    div_set = soup.find_all("div", class_="col col-9 col-xs-9 col-sm-8 col-md-8 col-lg-6 col-xl-6 common-item-left")
    for div in div_set:
        title = div.find_next_sibling().find("div").find("div").find("a").get_text()
        eachUrl = "https://www.anquanke.com" + div.find("a").get("href")
        imageUrl = div.find("a").find("div").find("div").find("img").get("data-original")
        model = dataModel(title, eachUrl, imageUrl, 'NetworkSecurity')
        try:
            model.updateToInfo()
            model.printIt()
        except:
            continue
コード例 #9
0
def getNshipster():
    url = "https://nshipster.com"
    result = requests.get(url)
    soup = BeautifulSoup(result.text,"html.parser")
    # div_set = soup.find_all("a",class_="title")
    li_set = soup.find("section", id="recent").find_all("li")
    for li in li_set:
        title = li.find("a").get_text()
        text = li.find("p").get_text()
        eachUrl = url + li.find("a").get("href")
        model = dataModel(title, eachUrl, "", "swift")
        model.printIt()
        try:
            model.updateToInfo()
        except:
            continue
コード例 #10
0
def getPythonGithubTrending():
    url = "https://github.com/trending/python?since=daily"
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")
    projectList = soup.findAll("article", class_="Box-row")
    for project in projectList:
        title = project.find('h1').find('a').get("href")[1:]
        project_url = "https://github.com/" + title
        try:
            describe = project.find('p', class_="col-9 text-gray my-1 pr-4").get_text().rstrip().lstrip() + " " + title
        except:
            describe = title
        model = dataModel(describe, project_url, "", "python")
        model.printIt()
        try:
            model.updateToInfo()
        except:
            continue
コード例 #11
0
def getPconline():
    dataArray = []
    url = "https://mobile.pconline.com.cn/pry/"
    result = requests.get(url)
    result.encoding = "gbk"
    soup = BeautifulSoup(result.text, "html.parser")
    div_set = soup.find("div", class_="art-list art-list-cut").find_all(
        "a", class_="img-area")
    for div in div_set:
        title = div.find("img").get("alt")
        eachUrl = div.get("href")
        imageUrl = "https:" + div.find("img").get("#src")
        model = dataModel(title, eachUrl, imageUrl, 'Phone')
        try:
            model.updateToInfo()
            model.printIt()
        except:
            continue
コード例 #12
0
ファイル: zol.py プロジェクト: virgoC0der/CoderNewsDjango
def getZol():
    dataArray = []
    url = "http://safe.zol.com.cn/more/2_1628.shtml"
    data = requests.get(url)
    soup = BeautifulSoup(data.text, "html.parser")
    div_set = soup.find_all("div", class_="info-mod clearfix")
    for div in div_set:
        title = div.find("a").find("img").get("alt")
        eachUrl = div.find("a").get("href")
        if (div.find("a").find("img").get(".src") != None):
            imageUrl = div.find("a").find("img").get(".src")
        else:
            imageUrl = div.find("a").find("img").get("src")
        model = dataModel(title, eachUrl, imageUrl, 'NetworkSecurity')
        try:
            model.updateToInfo()
            model.printIt()
        except:
            continue
コード例 #13
0
def getPcbeta():
    url = "http://www.pcbeta.com/news/"
    headers = ("User-Agent", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45")
    opener = urllib.request.build_opener()
    opener.addheaders = [headers]
    data = opener.open(url).read().decode('gbk')
    dataArray = []
    soup = BeautifulSoup(data,"html.parser")
    div_set = soup.find_all("a", class_="thumb")
    for div in div_set:
        if(div.find("img").get("title")):
            title = div.find("img").get("title")
            eachUrl = div.get("href")
            imageUrl = div.find("img").get("src")
            model = dataModel(title, eachUrl, imageUrl, 'Computer')
            try:
                model.updateToInfo()
                model.printIt()
            except:
                continue
コード例 #14
0
ファイル: igao7.py プロジェクト: virgoC0der/CoderNewsDjango
def getIgao7():
    dataArray = []
    imageurlArray = []
    i = 0
    url = "http://m.igao7.com/category/all"
    result = requests.get(url)
    soup = BeautifulSoup(result.text, "html.parser")
    img_set = soup.find_all("div", class_="pic")
    for img in img_set:
        imageurlArray.append(img.find("img").get("src"))
    div_set = soup.find_all("div", class_="name clr")
    for div in div_set:
        title = div.find("span", class_="hd").get_text()
        eachUrl = div.parent.get("href")
        model = dataModel(title, eachUrl, imageurlArray[i], 'Phone')
        try:
            model.updateToInfo()
            model.printIt()
        except:
            continue
        i += 1