def sync(obj: Base, *args, **kwargs): soup = Internet.get_soup_phjs(obj.sync_type.base_url) banner_div = soup.find('div', {'class': 'card-body m-0 p-0 pt-3'}) a = banner_div.find('a') link = urllib.parse.urljoin(obj.sync_type.base_url, a.get('href').strip()) name = a.text.strip() obj.add_text_task(unique_key=link, name=name, url=link, data=dict(title=name, text=link)) # find remaining div divs = soup.find_all('div', {'class': "card-body m-0 p-0 mt-2"})[::-1] for div in divs: a = div.find('a') link = urllib.parse.urljoin(obj.sync_type.base_url, a.get('href').strip()) name = a.text.strip() obj.add_text_task(unique_key=link, name=name, url=link, data=dict(title=name, text=link))
def sync(obj: Base, *args, **kwargs): city = kwargs.get('city') or os.environ.get("WEATHER_DEFAULT_CITY") url = obj.sync_type.base_url params = { "units": "metric", "q": city, "APPID": os.environ.get('WEATHER_API_KEY') } data = Internet.get(url=url, return_json=True, params=params) # data["text"] = ( # f"city: {city}\n" # f"temp: {data['main']['temp']}\n" # f"feels like: {data['main']['feels_like']}\n" # f"temp min: {data['main']['temp_min']}\n" # f"temp_max: {data['main']['temp_max']}\n" # f"pressure: {data['main']['pressure']}\n" # f"humidity: {data['main']['humidity']}\n" # f"visibility: {data['visibility']}\n" # f"wind speed: {data['wind']['speed']}\n" # f"weather: {data['weather'][0]['main']} ({data['weather'][0]['description']})\n" # f"sunrise: {datetime.datetime.fromtimestamp(data['sys']['sunrise']).isoformat()}\n" # f"sunset: {datetime.datetime.fromtimestamp(data['sys']['sunset']).isoformat()}\n" # ) icon_url = f"http://openweathermap.org/img/wn/{data['weather'][0]['main']}@2x.png" img_tag = f'<img src="{icon_url}" class="img-fluid" alt="Responsive image">' temp_data = f"{data['main']['temp']} (feels like: {data['main']['feels_like']}), {data['weather'][0]['description']}" obj.add_text_task(unique_key=str(uuid4()), name=f"{city}: {temp_data}", url=obj.sync_type.extras.get("weather_url").format( city_id=data['id']), data=data)
def sync(obj: Base, *args, **kwargs): # define number of posts to check # no_of_posts = int(kwargs.get("no_of_posts", 10)) soup = Internet.get_soup_phjs(obj.sync_type.base_url) all_cards = soup.find('div', {'id':'project-grid'}) posts = all_cards.find_all('div', {'class':['card']})[::-1] for post in posts: pid = post.find('div', {'class':'simplefavorite-button has-count'}).get('data-postid').strip() data = { 'name': post.find("h4", {"class":"card-title"}).text.strip(), 'url':post.find('a').get('href'), 'download_url': Bfy.get_download_link(obj, pid) } data["text"] = ( f"Name: {data.get('name')}\n" f"URL: {data.get('url')}\n" f"Link: {data.get('download_url')}\n" ) obj.add_text_task( unique_key=pid, name=data['name'], url=data['url'], data=data )
def sync(obj: Base, *args, **kwargs): repos = Internet.get(url=obj.sync_type.base_url, return_json=True, params={"since": "dialy"}) for repo in repos: obj.add_text_task(task_id=repo['url'], data=dict(text=repo['url']))
def sync(obj: Base, *args, **kwargs): r = Internet.html_get(obj.sync_type.base_url) links = r.html.find(".entry-title") for h2 in links[::-1]: a = h2.find("a", first=True) url = a.attrs.get('href') name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data={})
def sync(obj: Base, *args, **kwargs): url = "https://deepstash.com/trending" soup = Internet.get_soup_phjs(url) links = soup.find_all('a', {'class': 'css-rgh4li'})[::-1] print(len(links)) links = links[::-1][:-1] print(len(links)) for a in links: print(a.text.strip(), "\n")
def sync(obj: Base, *args, **kwargs): cat = kwargs.get("cat", "") typ = kwargs.get("typ", "") url = obj.sync_type.base_url.format(cat=cat, typ=typ) res = Internet.html_get(url) links = res.html.xpath("/html/body/main/section/article[*]/h2/a") for a in links[::-1]: link = a.attrs.get("href").split("?")[0] name = a.text.strip() obj.add_text_task(unique_key=link, name=name, url=link, data={})
def sync(obj: Base, *args, **kwargs): base_url = obj.sync_type.base_url res = Internet.html_get(base_url) h2_list = res.html.find(".post-card-title")[::-1] for h2 in h2_list: a = h2.find("a", first=True) link = urllib.parse.urljoin(base_url, a.attrs.get('href')) print(a.text.strip(), link) obj.add_text_task(unique_key=link, name=a.text.strip(), url=link, data={})
def sync(obj: Base, *args, **kwargs): url = obj.sync_type.base_url article_url = obj.sync_type.extras.get('article_url') res = Internet.post_phjs(url, return_json=True) posts = res['result']['data']['nodes'][::-1] for post in posts: post['text'] = urllib.parse.urljoin(article_url, post.get('articleLink')) obj.add_text_task(unique_key=post.get('id'), name=post['title'], url=post['text'], data=post)
def sync(obj: Base, *args, **kwargs): soup = Internet.get_soup(obj.sync_type.base_url) ul = soup.find('ul', {'id': 'posts-container'}) for li in ul.find_all('li')[::-1]: data = { "text": li.find('h3').find('a').get('href').strip(), "title": li.find('h3').find('a').text.strip() } obj.add_text_task(unique_key=data['text'], name=data['title'], url=data['text'], data=data)
async def test_url(url_test: UrlTest): """ Test given URL """ try: res = Internet.html_get(url_test.url) data = UrlTestOut( url=url_test.url, status_code=res.status_code, body=res.content, headers=res.headers ) except Exception as e: data = str(e) return data
def sync(obj: Base, *args, **kwargs): r = Internet.html_get(obj.sync_type.base_url) links = r.html.xpath( '/html/body/div[1]/div[2]/div/div[4]/div[2]/section/div[*]/div[*]/div[2]/a' ) for a in links[::-1]: url = a.attrs.get('href').split("?")[0] name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data=dict(text=url))
def sync(obj: Base, *args, **kwargs): soup = Internet.get_soup_phjs(obj.sync_type.base_url) links = soup.find_all('a', {'class': 'o-eZTujG o-fyWCgU'}) for a in links[::-1]: link = a.get('href') url = urllib.parse.urljoin(obj.sync_type.base_url, link) name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data=dict(text=url))
def sync(obj: Base, *args, **kwargs): tag = kwargs.get("tag", "programming") url = obj.sync_type.base_url.format(tag=tag) post_url = obj.sync_type.extras.get("post_url") soup = Internet.get_soup_phjs(url) posts = soup.find_all('div', {'class': 'title-wrapper'})[::-1] for post in posts: a = post.find('a') link = urllib.parse.urljoin(post_url, a.get('href').strip()) obj.add_text_task(unique_key=link, name=a.text.strip(), url=link, data=dict(title=a.text.strip(), text=link))
def sync(obj: Base, *args, **kwargs): res = Internet.html_get(obj.sync_type.base_url) links = res.html.xpath( "/html/body/form/div[4]/div[3]/div/div[1]/div[*]/div/div[1]/h3/a") for a in links[::-1]: link = a.attrs.get('href') url = urllib.parse.urljoin(obj.sync_type.base_url, link) name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data=dict(text=url))
def sync(obj: Base, *args, **kwargs): cat = kwargs.get("cat", "") url = urllib.parse.urljoin(obj.sync_type.base_url, cat) res = Internet.html_get(url) h2_list = res.html.find(".crayons-story__title") for h2 in h2_list[::-1]: a = h2.find('a', first=True) url = urllib.parse.urljoin(obj.sync_type.base_url, a.attrs.get('href')) obj.add_text_task( unique_key=a.attrs.get('id').strip(), name=a.text.strip(), url=url, data=dict(text=url) )
def sync(obj: Base, *args, **kwargs): r = Internet.html_get(obj.sync_type.base_url) found_links = [] xpaths = obj.sync_type.extras.get("xp") for xpath in xpaths: links = r.html.xpath(xpath) if links: found_links.extend(links) for a in found_links[::-1]: url = a.attrs.get('href').split("?")[0] name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data={})
def sync(obj: Base, *args, **kwargs): r = Internet.html_get(obj.sync_type.base_url) links = [] xpaths = obj.sync_type.extras.get("xp") for xpath in xpaths: links = r.html.xpath(xpath) if links: break article_url = obj.sync_type.extras.get("article_url") for a in links[::-1]: path = a.attrs.get('href') url = urllib.parse.urljoin(article_url, path) name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data={})
def sync(obj: Base, *args, **kwargs): group, typ = (kwargs.get("group") or os.environ.get("NG_DEFAULT_GROUP"), kwargs.get("type") or os.environ.get("NG_DEFAULT_TYPE")) url = obj.sync_type.base_url.format(grp=group, typ=typ) data = Internet.post_phjs(url=url, return_json=True) posts = data.get("data").get("posts")[::-1] for post in posts: data = { "caption": "{}\n{}".format(post.get("title"), post.get("url")), "title": post.get("title"), "nsfw": post.get("nsfw"), "post_url": post.get("url"), "content_type": post.get("type"), "up_vote": post.get("upVoteCount"), "down_vote": post.get("downVoteCount"), "description": post.get("description"), "comments_count": post.get("commentsCount"), } # check post type if post["type"] == "Photo": data["url"] = post.get("images").get("image700").get("url") obj.add_photo_task(unique_key=post.get("id"), name=post['title'], url=data["url"], data=data) elif post["type"] == "Animated": data["url"] = post.get("images").get("image460sv").get("url") obj.add_video_task(unique_key=post.get("id"), name=post['title'], url=data["url"], data=data)
def sync(obj: Base, *args, **kwargs): url = obj.sync_type.base_url soup = Internet.get_soup_phjs(url) divs = soup.find_all('div', {'class': 'entry-grid-content hgrid-span-12'}) for div in divs[::-1]: h2 = div.find("h2", {"class": "entry-title"}) a = h2.find('a') url = a.get('href') name = a.text.strip() desc_div = div.find("div", {"class": "entry-summary"}) if desc_div: desc = desc_div.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data=dict(text=url, desc=desc))
def sync(obj: Base, *args, **kwargs): # https://groww.in/slr/v1/search/derived/scheme?available_for_investment=true&doc_type=scheme&page=0&plan_type=Direct&q=&size=16&sort_by=3 # sort_by 1: Rating High to low # sort_by 2: Rating Low to high # sort_by 3: Rating popularity data = Internet.post_phjs(url=obj.sync_type.base_url, return_json=True)['content'] for post in data.get("data").get("posts"): data = { "caption": "{}\n{}".format(post.get("title"), post.get("url")), "title": post.get("title"), "nsfw": post.get("nsfw"), "post_url": post.get("url"), "content_type": post.get("type"), "up_vote": post.get("upVoteCount"), "down_vote": post.get("downVoteCount"), "description": post.get("description"), "comments_count": post.get("commentsCount") } # check post type if post["type"] == "Photo": data["url"] = post.get("images").get("image700").get("url") obj.add_photo_task(unique_key=post.get("id"), name=post['title'], url=post.get("url"), data=data) elif post["type"] == "Animated": data["url"] = post.get("images").get("image460sv").get("url") obj.add_video_task(unique_key=post.get("id"), name=post['title'], url=post.get("url"), data=data)
def sync(obj: Base, *args, **kwargs): cat = kwargs.get("cat", "") url = obj.sync_type.base_url.format(cat=cat) res = Internet.html_get(url) xpaths = [ "/html/body/main/div[2]/div/div/div[1]/div/div[2]/div/article[*]/div/div[2]/a", "/html/body/main/div[2]/div/div/div[1]/div/article[*]/div/div[2]/a", ] links = [] for xpath in xpaths: links = res.html.xpath(xpath) if links: break f_url = obj.sync_type.extras.get("base_url") for a in links[::-1]: link = urljoin(f_url, a.attrs.get("href")) name = a.text.strip().replace("\n", "--") obj.add_text_task(unique_key=link, name=name, url=link, data={})
def sync(obj: Base, *args, **kwargs): category = kwargs.get("category", "Startup") url = obj.sync_type.base_url.format(category=category) soup = Internet.get_soup_phjs(url) lis = soup.find_all('li', {'class': 'sc-hMFtBS gpleaq'})[::-1] # li are returend in double for li in lis: a = li.find('a') div = li.find('div', {'class': 'sc-gqPbQI iIXuvz'}) title_a = div.find("a") name = title_a.text.strip() if name.strip() == "": continue link = urllib.parse.urljoin(obj.sync_type.base_url, a.get('href').strip()) obj.add_text_task(unique_key=link, name=name, url=link, data=dict(text=link))