Example #1
0
 def read_keywords(self):
     """ Get keywords table. 
     Try to read keywords table from self.keywords_path.
     yield:
         self.keywords: DataFrame contains Titles and Keywords,
         empty if file of keywords_path not exist."""
     if not os.path.exists(self.keywords_path):
         logger.warning(f'Keywords path does not exist.')
         self.keywords = pd.DataFrame()
     else:
         self.keywords = pd.read_json(self.keywords_path)
Example #2
0
 def get_titles_by_keyword(self, keyword):
     """ Get titles by [keyword],
     output:
         titles: A list of titles of [keyword] """
     # Return empty list if [keyword] not found
     if not keyword in self.keywords:
         logger.warning(f'Keyword {keyword} not found.')
         return []
     # Get titles
     alltitles = self.keywords[keyword]
     return [e for e in alltitles[alltitles == 1].index]
Example #3
0
 def read_descriptions(self):
     """ Get descriptions table. 
     Try to read descriptions table from self.descriptions_path.
     yield:
         self.descriptions: DataFrame contains Titles and Descriptions,
         empty if file of descriptions_path not exist."""
     if not os.path.exists(self.descriptions_path):
         logger.warning(f'descriptions path does not exist.')
         self.descriptions = pd.DataFrame()
     else:
         self.descriptions = pd.read_excel(self.descriptions_path)
         self.descriptions = self.descriptions.set_index('Unnamed: 0', drop=True)
Example #4
0
 def get_keywords_by_title(self, title):
     """ Get keywords by [title],
     outputs:
         titles: A list of keywords of [title] """
     # Return empty list if [title] not found
     if not title in self.keywords.index:
         logger.warning(f'Title {title} not found.')
         return []
     # Get keywords
     keywordsTrans = self.keywords.T
     allkeywords = keywordsTrans[title]
     return [e for e in allkeywords[allkeywords == 1].index]
Example #5
0
 def papers_get_by_title(self, title, fields=['keywords', 'descriptions']):
     """
     Get paper and its contents by [title]. 
     outputs:
         A dict contains bits, keywords, descriptions in the format of json.
         None if failed.
     """
     try:
         paper_contents = self.papers_server.get_by_title(title,
                                                          fields=fields)
         return paper_contents
     except AssertionError as e:
         logger.warning(
             f'WORKER papers_get_by_title cannot get not existing title: {title}.'
         )
         return None
     except Exception as e:
         logger.error(f'WORKER papers_get_by_title failed: {e}')
         return None