Example #1
0
    def sync(obj: Base, *args, **kwargs):

        base_url = obj.sync_type.base_url
        res = Internet.html_get(base_url)
        h2_list = res.html.find(".post-card-title")[::-1]

        for h2 in h2_list:
            a = h2.find("a", first=True)
            link = urllib.parse.urljoin(base_url, a.attrs.get('href'))
            print(a.text.strip(), link)
            obj.add_text_task(unique_key=link,
                              name=a.text.strip(),
                              url=link,
                              data={})
Example #2
0
    def sync(obj: Base, *args, **kwargs):
        soup = Internet.get_soup(obj.sync_type.base_url)
        ul = soup.find('ul', {'id': 'posts-container'})

        for li in ul.find_all('li')[::-1]:
            data = {
                "text": li.find('h3').find('a').get('href').strip(),
                "title": li.find('h3').find('a').text.strip()
            }

            obj.add_text_task(unique_key=data['text'],
                              name=data['title'],
                              url=data['text'],
                              data=data)
Example #3
0
    def sync(obj: Base, *args, **kwargs):
        url = obj.sync_type.base_url
        article_url = obj.sync_type.extras.get('article_url')

        res = Internet.post_phjs(url, return_json=True)
        posts = res['result']['data']['nodes'][::-1]

        for post in posts:
            post['text'] = urllib.parse.urljoin(article_url,
                                                post.get('articleLink'))
            obj.add_text_task(unique_key=post.get('id'),
                              name=post['title'],
                              url=post['text'],
                              data=post)
Example #4
0
    def sync(obj: Base, *args, **kwargs):

        tag = kwargs.get("tag", "programming")
        url = obj.sync_type.base_url.format(tag=tag)
        post_url = obj.sync_type.extras.get("post_url")
        soup = Internet.get_soup_phjs(url)
        posts = soup.find_all('div', {'class': 'title-wrapper'})[::-1]

        for post in posts:
            a = post.find('a')
            link = urllib.parse.urljoin(post_url, a.get('href').strip())
            obj.add_text_task(unique_key=link,
                              name=a.text.strip(),
                              url=link,
                              data=dict(title=a.text.strip(), text=link))
Example #5
0
 def sync(obj: Base, *args, **kwargs):
     price_trackers = obj.get_price_trackers()
     
     for pt in price_trackers:
         # print("syncing product:", pt.title)
         info = Tracker.get_poduct_info(pt.productUrl)            
         obj.updated_price_tracker(info)
         content = (
             f"Title: {info.title}\n"
             f"Current Price: {info.curr_price}\n"
             f"High Price: {info.high_price}\n"
             f"Low Price: {info.low_price}\n"
             f"URL: {info.productUrl}\n"
         )
         obj.add_text_task(task_id=f"{obj.job.id}:{info.title}", data=dict(text=content))
Example #6
0
    def sync(obj: Base, *args, **kwargs):
        soup = Internet.get_soup_phjs(obj.sync_type.base_url)
        links = soup.find_all('a', {'class': 'o-eZTujG o-fyWCgU'})

        for a in links[::-1]:

            link = a.get('href')

            url = urllib.parse.urljoin(obj.sync_type.base_url, link)
            name = a.text.strip()

            obj.add_text_task(unique_key=url,
                              name=name,
                              url=url,
                              data=dict(text=url))
Example #7
0
    def sync(obj: Base, *args, **kwargs):

        r = Internet.html_get(obj.sync_type.base_url)
        links = r.html.xpath(
            '/html/body/div[1]/div[2]/div/div[4]/div[2]/section/div[*]/div[*]/div[2]/a'
        )

        for a in links[::-1]:
            url = a.attrs.get('href').split("?")[0]
            name = a.text.strip()

            obj.add_text_task(unique_key=url,
                              name=name,
                              url=url,
                              data=dict(text=url))
Example #8
0
    def sync(obj: Base, *args, **kwargs):

        client = NewsApiClient(api_key=os.environ.get('NEWS_API_KEY'))
        # countries https://github.com/mattlisiv/newsapi-python/blob/master/newsapi/const.py
        country = kwargs.get("country", "in")
        top_headlines = client.get_top_headlines(language="en",
                                                 country=country)

        articles = top_headlines.get('articles')[::-1]

        for article in articles:
            article['text'] = article['url']
            obj.add_text_task(unique_key=article.get("url"),
                              name=article['title'],
                              url=article['url'],
                              data=article)
Example #9
0
    def sync(obj: Base, *args, **kwargs):

        res = Internet.html_get(obj.sync_type.base_url)
        links = res.html.xpath(
            "/html/body/form/div[4]/div[3]/div/div[1]/div[*]/div/div[1]/h3/a")

        for a in links[::-1]:

            link = a.attrs.get('href')
            url = urllib.parse.urljoin(obj.sync_type.base_url, link)

            name = a.text.strip()

            obj.add_text_task(unique_key=url,
                              name=name,
                              url=url,
                              data=dict(text=url))
Example #10
0
    def sync(obj: Base, *args, **kwargs):

        r = Internet.html_get(obj.sync_type.base_url)

        found_links = []

        xpaths = obj.sync_type.extras.get("xp")

        for xpath in xpaths:
            links = r.html.xpath(xpath)
            if links:
                found_links.extend(links)

        for a in found_links[::-1]:
            url = a.attrs.get('href').split("?")[0]
            name = a.text.strip()

            obj.add_text_task(unique_key=url, name=name, url=url, data={})
Example #11
0
    def sync(obj: Base, *args, **kwargs):        
        
        cat = kwargs.get("cat", "")
        url = urllib.parse.urljoin(obj.sync_type.base_url, cat)        
        res = Internet.html_get(url)
        h2_list = res.html.find(".crayons-story__title")        

        for h2 in h2_list[::-1]:            
            a = h2.find('a', first=True)

            url = urllib.parse.urljoin(obj.sync_type.base_url, a.attrs.get('href'))

            obj.add_text_task(
                unique_key=a.attrs.get('id').strip(),
                name=a.text.strip(),
                url=url,
                data=dict(text=url)
            )
Example #12
0
    def sync(obj: Base, *args, **kwargs):

        r = Internet.html_get(obj.sync_type.base_url)

        links = []

        xpaths = obj.sync_type.extras.get("xp")

        for xpath in xpaths:
            links = r.html.xpath(xpath)
            if links: break

        article_url = obj.sync_type.extras.get("article_url")

        for a in links[::-1]:
            path = a.attrs.get('href')
            url = urllib.parse.urljoin(article_url, path)
            name = a.text.strip()

            obj.add_text_task(unique_key=url, name=name, url=url, data={})
Example #13
0
    def sync(obj: Base, *args, **kwargs):

        group, typ = (kwargs.get("group")
                      or os.environ.get("NG_DEFAULT_GROUP"), kwargs.get("type")
                      or os.environ.get("NG_DEFAULT_TYPE"))

        url = obj.sync_type.base_url.format(grp=group, typ=typ)
        data = Internet.post_phjs(url=url, return_json=True)

        posts = data.get("data").get("posts")[::-1]

        for post in posts:

            data = {
                "caption": "{}\n{}".format(post.get("title"), post.get("url")),
                "title": post.get("title"),
                "nsfw": post.get("nsfw"),
                "post_url": post.get("url"),
                "content_type": post.get("type"),
                "up_vote": post.get("upVoteCount"),
                "down_vote": post.get("downVoteCount"),
                "description": post.get("description"),
                "comments_count": post.get("commentsCount"),
            }

            # check post type
            if post["type"] == "Photo":
                data["url"] = post.get("images").get("image700").get("url")
                obj.add_photo_task(unique_key=post.get("id"),
                                   name=post['title'],
                                   url=data["url"],
                                   data=data)

            elif post["type"] == "Animated":
                data["url"] = post.get("images").get("image460sv").get("url")
                obj.add_video_task(unique_key=post.get("id"),
                                   name=post['title'],
                                   url=data["url"],
                                   data=data)
Example #14
0
    def sync(obj: Base, *args, **kwargs):
        url = obj.sync_type.base_url
        soup = Internet.get_soup_phjs(url)

        divs = soup.find_all('div',
                             {'class': 'entry-grid-content hgrid-span-12'})

        for div in divs[::-1]:

            h2 = div.find("h2", {"class": "entry-title"})
            a = h2.find('a')

            url = a.get('href')
            name = a.text.strip()

            desc_div = div.find("div", {"class": "entry-summary"})
            if desc_div:
                desc = desc_div.text.strip()

            obj.add_text_task(unique_key=url,
                              name=name,
                              url=url,
                              data=dict(text=url, desc=desc))
Example #15
0
class Sync(object):
    def __init__(self, sync_type: str, db: Session, request: Request, *args, **kwargs):        
        self.sync_type = sync_type
        self.job_id = f"{sync_type}:{self.get_current_time()}:{uuid4()}"
        self.obj = Base(sync_type, self.job_id, db, request)
        self.args = args
        self.kwargs = kwargs
        
        
    def start(self):
        """
        execute the sync method
        """
        # check is lock is already acquired
        if not self.obj.sync_type.locked:

            try:
                self.obj.lock()
                # sync only if its enabled
                if self.obj.sync_type.enabled:
                    SYNC_GRABBERS[self.sync_type](self.obj, *self.args, **self.kwargs)
                    self.obj.write_tasks()                    

            except Exception as e:
                self.run(self.obj.job_failed)

            else:
                self.run(self.obj.job_success)
                self.obj.notify()

            finally:
                self.obj.release()

    def run(self, func, *args, **kwargs):
        """
        runs given funciton based sync type enabled or not
        """
        if self.obj.sync_type.enabled:
            func(*args, **kwargs)

    def get_current_time(self):
        fmt = "%H.%M-%D"
        utcmoment_naive = datetime.utcnow()        
        utcmoment = utcmoment_naive.replace(tzinfo=pytz.utc)
        tz = os.environ.get("TZ")
        conv_dt = utcmoment.astimezone(pytz.timezone(tz))
        return conv_dt.strftime(fmt)
Example #16
0
    def sync(obj: Base, *args, **kwargs):

        # https://groww.in/slr/v1/search/derived/scheme?available_for_investment=true&doc_type=scheme&page=0&plan_type=Direct&q=&size=16&sort_by=3
        # sort_by 1: Rating High to low
        # sort_by 2: Rating Low to high
        # sort_by 3: Rating popularity
        data = Internet.post_phjs(url=obj.sync_type.base_url,
                                  return_json=True)['content']

        for post in data.get("data").get("posts"):

            data = {
                "caption": "{}\n{}".format(post.get("title"), post.get("url")),
                "title": post.get("title"),
                "nsfw": post.get("nsfw"),
                "post_url": post.get("url"),
                "content_type": post.get("type"),
                "up_vote": post.get("upVoteCount"),
                "down_vote": post.get("downVoteCount"),
                "description": post.get("description"),
                "comments_count": post.get("commentsCount")
            }

            # check post type
            if post["type"] == "Photo":
                data["url"] = post.get("images").get("image700").get("url")
                obj.add_photo_task(unique_key=post.get("id"),
                                   name=post['title'],
                                   url=post.get("url"),
                                   data=data)

            elif post["type"] == "Animated":
                data["url"] = post.get("images").get("image460sv").get("url")
                obj.add_video_task(unique_key=post.get("id"),
                                   name=post['title'],
                                   url=post.get("url"),
                                   data=data)
Example #17
0
    def sync(obj: Base, *args, **kwargs):
        cat = kwargs.get("cat", "")
        url = obj.sync_type.base_url.format(cat=cat)

        res = Internet.html_get(url)

        xpaths = [
            "/html/body/main/div[2]/div/div/div[1]/div/div[2]/div/article[*]/div/div[2]/a",
            "/html/body/main/div[2]/div/div/div[1]/div/article[*]/div/div[2]/a",
        ]

        links = []

        for xpath in xpaths:
            links = res.html.xpath(xpath)
            if links: break

        f_url = obj.sync_type.extras.get("base_url")

        for a in links[::-1]:
            link = urljoin(f_url, a.attrs.get("href"))
            name = a.text.strip().replace("\n", "--")

            obj.add_text_task(unique_key=link, name=name, url=link, data={})
Example #18
0
    def sync(obj: Base, *args, **kwargs):
        category = kwargs.get("category", "Startup")
        url = obj.sync_type.base_url.format(category=category)
        soup = Internet.get_soup_phjs(url)

        lis = soup.find_all('li', {'class': 'sc-hMFtBS gpleaq'})[::-1]
        # li are returend in double

        for li in lis:
            a = li.find('a')
            div = li.find('div', {'class': 'sc-gqPbQI iIXuvz'})
            title_a = div.find("a")

            name = title_a.text.strip()

            if name.strip() == "":
                continue

            link = urllib.parse.urljoin(obj.sync_type.base_url,
                                        a.get('href').strip())
            obj.add_text_task(unique_key=link,
                              name=name,
                              url=link,
                              data=dict(text=link))
Example #19
0
 def __init__(self, sync_type: str, db: Session, request: Request, *args, **kwargs):        
     self.sync_type = sync_type
     self.job_id = f"{sync_type}:{self.get_current_time()}:{uuid4()}"
     self.obj = Base(sync_type, self.job_id, db, request)
     self.args = args
     self.kwargs = kwargs