def sync(obj: Base, *args, **kwargs): city = kwargs.get('city') or os.environ.get("WEATHER_DEFAULT_CITY") url = obj.sync_type.base_url params = { "units": "metric", "q": city, "APPID": os.environ.get('WEATHER_API_KEY') } data = Internet.get(url=url, return_json=True, params=params) # data["text"] = ( # f"city: {city}\n" # f"temp: {data['main']['temp']}\n" # f"feels like: {data['main']['feels_like']}\n" # f"temp min: {data['main']['temp_min']}\n" # f"temp_max: {data['main']['temp_max']}\n" # f"pressure: {data['main']['pressure']}\n" # f"humidity: {data['main']['humidity']}\n" # f"visibility: {data['visibility']}\n" # f"wind speed: {data['wind']['speed']}\n" # f"weather: {data['weather'][0]['main']} ({data['weather'][0]['description']})\n" # f"sunrise: {datetime.datetime.fromtimestamp(data['sys']['sunrise']).isoformat()}\n" # f"sunset: {datetime.datetime.fromtimestamp(data['sys']['sunset']).isoformat()}\n" # ) icon_url = f"http://openweathermap.org/img/wn/{data['weather'][0]['main']}@2x.png" img_tag = f'<img src="{icon_url}" class="img-fluid" alt="Responsive image">' temp_data = f"{data['main']['temp']} (feels like: {data['main']['feels_like']}), {data['weather'][0]['description']}" obj.add_text_task(unique_key=str(uuid4()), name=f"{city}: {temp_data}", url=obj.sync_type.extras.get("weather_url").format( city_id=data['id']), data=data)
def sync(obj: Base, *args, **kwargs): soup = Internet.get_soup_phjs(obj.sync_type.base_url) banner_div = soup.find('div', {'class': 'card-body m-0 p-0 pt-3'}) a = banner_div.find('a') link = urllib.parse.urljoin(obj.sync_type.base_url, a.get('href').strip()) name = a.text.strip() obj.add_text_task(unique_key=link, name=name, url=link, data=dict(title=name, text=link)) # find remaining div divs = soup.find_all('div', {'class': "card-body m-0 p-0 mt-2"})[::-1] for div in divs: a = div.find('a') link = urllib.parse.urljoin(obj.sync_type.base_url, a.get('href').strip()) name = a.text.strip() obj.add_text_task(unique_key=link, name=name, url=link, data=dict(title=name, text=link))
def sync(obj: Base, *args, **kwargs): # define number of posts to check # no_of_posts = int(kwargs.get("no_of_posts", 10)) soup = Internet.get_soup_phjs(obj.sync_type.base_url) all_cards = soup.find('div', {'id':'project-grid'}) posts = all_cards.find_all('div', {'class':['card']})[::-1] for post in posts: pid = post.find('div', {'class':'simplefavorite-button has-count'}).get('data-postid').strip() data = { 'name': post.find("h4", {"class":"card-title"}).text.strip(), 'url':post.find('a').get('href'), 'download_url': Bfy.get_download_link(obj, pid) } data["text"] = ( f"Name: {data.get('name')}\n" f"URL: {data.get('url')}\n" f"Link: {data.get('download_url')}\n" ) obj.add_text_task( unique_key=pid, name=data['name'], url=data['url'], data=data )
def sync(obj: Base, *args, **kwargs): repos = Internet.get(url=obj.sync_type.base_url, return_json=True, params={"since": "dialy"}) for repo in repos: obj.add_text_task(task_id=repo['url'], data=dict(text=repo['url']))
def sync(obj: Base, *args, **kwargs): r = Internet.html_get(obj.sync_type.base_url) links = r.html.find(".entry-title") for h2 in links[::-1]: a = h2.find("a", first=True) url = a.attrs.get('href') name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data={})
def sync(obj: Base, *args, **kwargs): cat = kwargs.get("cat", "") typ = kwargs.get("typ", "") url = obj.sync_type.base_url.format(cat=cat, typ=typ) res = Internet.html_get(url) links = res.html.xpath("/html/body/main/section/article[*]/h2/a") for a in links[::-1]: link = a.attrs.get("href").split("?")[0] name = a.text.strip() obj.add_text_task(unique_key=link, name=name, url=link, data={})
def sync(obj: Base, *args, **kwargs): base_url = obj.sync_type.base_url res = Internet.html_get(base_url) h2_list = res.html.find(".post-card-title")[::-1] for h2 in h2_list: a = h2.find("a", first=True) link = urllib.parse.urljoin(base_url, a.attrs.get('href')) print(a.text.strip(), link) obj.add_text_task(unique_key=link, name=a.text.strip(), url=link, data={})
def sync(obj: Base, *args, **kwargs): soup = Internet.get_soup(obj.sync_type.base_url) ul = soup.find('ul', {'id': 'posts-container'}) for li in ul.find_all('li')[::-1]: data = { "text": li.find('h3').find('a').get('href').strip(), "title": li.find('h3').find('a').text.strip() } obj.add_text_task(unique_key=data['text'], name=data['title'], url=data['text'], data=data)
def sync(obj: Base, *args, **kwargs): url = obj.sync_type.base_url article_url = obj.sync_type.extras.get('article_url') res = Internet.post_phjs(url, return_json=True) posts = res['result']['data']['nodes'][::-1] for post in posts: post['text'] = urllib.parse.urljoin(article_url, post.get('articleLink')) obj.add_text_task(unique_key=post.get('id'), name=post['title'], url=post['text'], data=post)
def sync(obj: Base, *args, **kwargs): r = Internet.html_get(obj.sync_type.base_url) links = r.html.xpath( '/html/body/div[1]/div[2]/div/div[4]/div[2]/section/div[*]/div[*]/div[2]/a' ) for a in links[::-1]: url = a.attrs.get('href').split("?")[0] name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data=dict(text=url))
def sync(obj: Base, *args, **kwargs): price_trackers = obj.get_price_trackers() for pt in price_trackers: # print("syncing product:", pt.title) info = Tracker.get_poduct_info(pt.productUrl) obj.updated_price_tracker(info) content = ( f"Title: {info.title}\n" f"Current Price: {info.curr_price}\n" f"High Price: {info.high_price}\n" f"Low Price: {info.low_price}\n" f"URL: {info.productUrl}\n" ) obj.add_text_task(task_id=f"{obj.job.id}:{info.title}", data=dict(text=content))
def sync(obj: Base, *args, **kwargs): soup = Internet.get_soup_phjs(obj.sync_type.base_url) links = soup.find_all('a', {'class': 'o-eZTujG o-fyWCgU'}) for a in links[::-1]: link = a.get('href') url = urllib.parse.urljoin(obj.sync_type.base_url, link) name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data=dict(text=url))
def sync(obj: Base, *args, **kwargs): tag = kwargs.get("tag", "programming") url = obj.sync_type.base_url.format(tag=tag) post_url = obj.sync_type.extras.get("post_url") soup = Internet.get_soup_phjs(url) posts = soup.find_all('div', {'class': 'title-wrapper'})[::-1] for post in posts: a = post.find('a') link = urllib.parse.urljoin(post_url, a.get('href').strip()) obj.add_text_task(unique_key=link, name=a.text.strip(), url=link, data=dict(title=a.text.strip(), text=link))
def sync(obj: Base, *args, **kwargs): client = NewsApiClient(api_key=os.environ.get('NEWS_API_KEY')) # countries https://github.com/mattlisiv/newsapi-python/blob/master/newsapi/const.py country = kwargs.get("country", "in") top_headlines = client.get_top_headlines(language="en", country=country) articles = top_headlines.get('articles')[::-1] for article in articles: article['text'] = article['url'] obj.add_text_task(unique_key=article.get("url"), name=article['title'], url=article['url'], data=article)
def sync(obj: Base, *args, **kwargs): res = Internet.html_get(obj.sync_type.base_url) links = res.html.xpath( "/html/body/form/div[4]/div[3]/div/div[1]/div[*]/div/div[1]/h3/a") for a in links[::-1]: link = a.attrs.get('href') url = urllib.parse.urljoin(obj.sync_type.base_url, link) name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data=dict(text=url))
def sync(obj: Base, *args, **kwargs): cat = kwargs.get("cat", "") url = urllib.parse.urljoin(obj.sync_type.base_url, cat) res = Internet.html_get(url) h2_list = res.html.find(".crayons-story__title") for h2 in h2_list[::-1]: a = h2.find('a', first=True) url = urllib.parse.urljoin(obj.sync_type.base_url, a.attrs.get('href')) obj.add_text_task( unique_key=a.attrs.get('id').strip(), name=a.text.strip(), url=url, data=dict(text=url) )
def sync(obj: Base, *args, **kwargs): r = Internet.html_get(obj.sync_type.base_url) found_links = [] xpaths = obj.sync_type.extras.get("xp") for xpath in xpaths: links = r.html.xpath(xpath) if links: found_links.extend(links) for a in found_links[::-1]: url = a.attrs.get('href').split("?")[0] name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data={})
def sync(obj: Base, *args, **kwargs): r = Internet.html_get(obj.sync_type.base_url) links = [] xpaths = obj.sync_type.extras.get("xp") for xpath in xpaths: links = r.html.xpath(xpath) if links: break article_url = obj.sync_type.extras.get("article_url") for a in links[::-1]: path = a.attrs.get('href') url = urllib.parse.urljoin(article_url, path) name = a.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data={})
def sync(obj: Base, *args, **kwargs): url = obj.sync_type.base_url soup = Internet.get_soup_phjs(url) divs = soup.find_all('div', {'class': 'entry-grid-content hgrid-span-12'}) for div in divs[::-1]: h2 = div.find("h2", {"class": "entry-title"}) a = h2.find('a') url = a.get('href') name = a.text.strip() desc_div = div.find("div", {"class": "entry-summary"}) if desc_div: desc = desc_div.text.strip() obj.add_text_task(unique_key=url, name=name, url=url, data=dict(text=url, desc=desc))
def sync(obj: Base, *args, **kwargs): cat = kwargs.get("cat", "") url = obj.sync_type.base_url.format(cat=cat) res = Internet.html_get(url) xpaths = [ "/html/body/main/div[2]/div/div/div[1]/div/div[2]/div/article[*]/div/div[2]/a", "/html/body/main/div[2]/div/div/div[1]/div/article[*]/div/div[2]/a", ] links = [] for xpath in xpaths: links = res.html.xpath(xpath) if links: break f_url = obj.sync_type.extras.get("base_url") for a in links[::-1]: link = urljoin(f_url, a.attrs.get("href")) name = a.text.strip().replace("\n", "--") obj.add_text_task(unique_key=link, name=name, url=link, data={})
def sync(obj: Base, *args, **kwargs): category = kwargs.get("category", "Startup") url = obj.sync_type.base_url.format(category=category) soup = Internet.get_soup_phjs(url) lis = soup.find_all('li', {'class': 'sc-hMFtBS gpleaq'})[::-1] # li are returend in double for li in lis: a = li.find('a') div = li.find('div', {'class': 'sc-gqPbQI iIXuvz'}) title_a = div.find("a") name = title_a.text.strip() if name.strip() == "": continue link = urllib.parse.urljoin(obj.sync_type.base_url, a.get('href').strip()) obj.add_text_task(unique_key=link, name=name, url=link, data=dict(text=link))