Exemple #1
0
 def RunExtraction(self, language: str = 'ar') -> str:
     if language.lower() == 'ar':
         json_data = self.MakeRequest(target=self.AR_searchEngine,
                                      json=True)
         for data in json_data.get('contentItems'):
             title = data.get('headline')
             published_date = data.get('date')
             category = data.get('category')
             link = self.CreateNewsLink(data.get('id'),
                                        data.get('sectionUrl'),
                                        data.get('urlFriendlySuffix'))
             self.Results.get("SkyNews").append(
                 dict(title=title,
                      published_date=published_date,
                      category=category,
                      link=link))
             if config.DEBUG:
                 print("title: ", title)
                 print("published date: ", published_date)
                 print("category: ", category)
                 print("Link: ", link)
             threading.Thread(target=SendToChannel,
                              args=(title, published_date, category,
                                    link)).start()  # Send News to telegram
         write_json(config.EnvironmentPath(), 'skynews', self.Results)
         return ''
Exemple #2
0
 def performDataExtraction(self, links: list):
     DataFetcherQueue = queue.Queue()
     threads = []
     for link in links:
         DataFetcherQueue.put(link)
         DataFetcherThread = threading.Thread(
             target=self.extractData, args=(DataFetcherQueue.get(), ))
         threads.append(DataFetcherThread)
     for thread_starter in threads:
         thread_starter.start()
     for thread_joiner in threads:
         thread_joiner.join()
     write_json(config.EnvironmentPath(), 'alarabiya', self.ResultsData)
Exemple #3
0
 def performDataExtraction(self, links: list):
     try:
         DataFetcherQueue = queue.Queue()
         threads = []
         for link in links:
             DataFetcherQueue.put(link)
             DataFetcherThread = threading.Thread(target=self.extractData, args=(DataFetcherQueue.get(),))
             threads.append(DataFetcherThread)
         for thread_start in threads:
             thread_start.start()
         for thread_join in threads:
             thread_join.join()
     except BaseException as e:
         config.debug(level=1, data=e)
     write_json(config.EnvironmentPath(), 'rt', self.Results)
Exemple #4
0
 def EN_CNN_Search(self, query: str):
     results = self.MakeRequest(target=self.API_CNN_EN.format(
         query.strip()),
                                json=True)
     for news in results.get('result'):
         title = news.get('headline')
         tags = news.get('section')
         published_date = news.get('firstPublishDate')
         link = news.get('url')
         self.Results.get("cnn").append(
             dict(title=title,
                  tags=tags,
                  published_date=published_date,
                  link=link))
     write_json(config.EnvironmentPath(), 'cnn', self.Results)
Exemple #5
0
 def parseResults(self):
     """
     A method to parse the json object to actual data and send the results to telegram channel
     """
     try:
         # json object from API response
         json_data = self.convertToJson()
         # iterate in news list
         for item in json_data.get('items'):
             # checks if the link is a news and not anything else
             if not self.Ensure_Rules(link=item.get('link'),
                                      rule='category'):
                 # gather news metadata from API response
                 news_link = item.get('link')
                 news_title = item.get('title')
                 news_published_date = item.get('pagemap').get(
                     'metatags')[0].get('dcterms.created')
                 news_tags = None
                 # Get news category using two possible dict keys
                 if item.get('pagemap').get('metatags')[0].get(
                         'classification-tags'):
                     news_tags = item.get('pagemap').get('metatags')[0].get(
                         'classification-tags').split(',')
                 else:
                     if hasattr(
                             item.get('pagemap').get('metatags')[0].get(
                                 'classification-isa'), 'split'):
                         news_tags = item.get('pagemap').get('metatags')[
                             0].get('classification-isa').split(',')
                 # Print data to user
                 title = news_title
                 published_date = news_published_date
                 link = news_link
                 category = news_tags
                 self.Results.get("foxnews").append(
                     dict(title=title,
                          published_date=published_date,
                          category=category,
                          link=link))
                 if config.DEBUG:
                     if config.DEBUG:
                         print("Link", news_link)
                         print("Title", news_title)
                         print("Categories", news_tags)
                         print("Published date:", news_published_date)
     except BaseException as e:
         print(e)
     write_json(config.EnvironmentPath(), 'foxnews', self.Results)
Exemple #6
0
 def performDataExtraction(self, links: list, language):
     try:
         DataFetcherQueue = queue.Queue()
         threads = []
         for link in links:
             DataFetcherQueue.put(link)
             DataFetcherThread = threading.Thread(
                 target=self.extractData,
                 args=(DataFetcherQueue.get(), language))
             threads.append(DataFetcherThread)
         for thread_starter in threads:
             thread_starter.start()
         for thread_joiner in threads:
             thread_joiner.join()
     except BaseException as e:
         print(e)
     write_json(config.EnvironmentPath(), 'aljazeera', self.ResultsData)