def get_ibm(): url = "https://developer.ibm.com/technologies/java/" res = requests.get(url) soup = BeautifulSoup(res.text, "html.parser") home = soup.find("a", class_="ibm--hub__block_link") home_title = home.find("h3", class_="ibm--hub__title").find("span").get_text() home_url = "https://developer.ibm.com" + home.get("href") model = dataModel(home_title, home_url, "", "java") try: model.updateToInfo() except: print("Duplicate!") print(home_title, home_url) article_list = soup.find_all("a", class_="ibm--card__block_link") for article in article_list: title = article.find("h3").get_text() article_url = "https://developer.ibm.com" + article.get("href") image_url = "" if article.find("img") != None: image_url = article.find("img").get("src") model = dataModel(title, article_url, image_url, "java") model.printIt() try: model.updateToInfo() except: continue
def get_java_world(): url = "https://www.javaworld.com" res = requests.get(url) soup = BeautifulSoup(res.text, "html.parser") home_article = soup.find("div", class_="home-feature") home_title = home_article.find("div", class_="post-cont").find("a").get_text() home_url = url + home_article.find( "div", class_="post-cont").find("a").get("href") home_image_url = home_article.find( "figure", class_="feature-img").find("a").find("img").get("data-original") model = dataModel(home_title, home_url, home_image_url, "java") try: model.updateToInfo() except: print("Duplicate!") article_list = soup.find_all("div", class_="river-well article") for article in article_list: a = article.find("div", class_="post-cont").find("a") title = a.get_text() article_url = url + a.get("href") image_url = article.find( "figure", class_="well-img").find("a").find("img").get("data-original") model = dataModel(title, article_url, image_url, "java") try: model.updateToInfo() except: continue
def getjrebel(): url = "https://jrebel.com/rebellabs/" res = requests.get(url) soup = BeautifulSoup(res.text, "html.parser") article_list = soup.find_all("div", class_="post") for article in article_list: title = article.find("h1").find("a").get("title") article_url = article.find("h1").find("a").get("href") try: image_url = "https://jrebel.com" + article.find("img").get("src") model = dataModel(title, article_url, image_url, "java") print(title, article_url, image_url, "img") except: model = dataModel(title, article_url, "", "java") print(title, article_url) try: model.updateToInfo() except: continue
def get_article(): url = "https://github.com/explore" res = requests.get(url, headers=head) soup = BeautifulSoup(res.text, "html.parser") article_list = soup.find("div", class_="d-lg-flex gutter-lg-condensed").find_all("article") for article in article_list: article_url = article.find("a").get("href") img_url = article.find("img").get("src") title = article.find("h1").get_text() model = dataModel.dataModel(title.lstrip().rstrip(), article_url.lstrip(), img_url, "github") model.printIt() try: model.updateToInfo() except: continue
def get_import_new(): url = "http://www.importnew.com/all-posts" try: res = requests.get(url) soup = BeautifulSoup(res.text, "html.parser") article_list = soup.find_all("div", class_="post-thumb") for article in article_list: a = article.find("a") title = a.get("title") article_url = a.get("href") image_url = a.find("img").get("src") model = dataModel(title, article_url, image_url, "java") try: model.updateToInfo() except: continue except: print("connection error")
def get_overops(): url = "https://blog.overops.com/" res = requests.get(url, headers=head) soup = BeautifulSoup(res.text, "html.parser") article_list = soup.find_all("article") for article in article_list: title = article.find( "div", class_="col-md-7 col-sm-7 right-post-content").find( "h2").find("a").get_text() article_url = article.find( "div", class_="col-md-7 col-sm-7 right-post-content").find( "h2").find("a").get("href") img_url = article.find( "div", class_="col-md-5 col-sm-5 post-featured-image").find( "a").get("href") print(title, article_url, img_url) model = dataModel(title, article_url, img_url, "java") try: model.updateToInfo() except: continue
def github(): trendingUrl = "https://github.com/trending/" categoryArray = ["python", "c++", "c", "swift", "java", "javascript"] for category in categoryArray: url = trendingUrl + category + "?since=daily" res = requests.get(url, headers=head) soup = BeautifulSoup(res.text, "html.parser") projectList = soup.findAll("li", class_="col-12 d-block width-full py-4 border-bottom") for project in projectList: title = project.find('h3').find('a').get("href")[1:] project_url = "https://github.com/" + title try: describe = project.find('p', class_="col-9 d-inline-block text-gray m-0 pr-4").get_text().rstrip().lstrip() + " " + title except: describe = title model = dataModel.dataModel(describe, project_url, "", category) model.printIt() try: model.updateToInfo() except: continue