def get_articles(self, process: bool = True) -> \ Union[pd.DataFrame, Response]: """ Retrieve information about all articles within institutional instance See: https://docs.figshare.com/#private_institution_articles :param process: Returns JSON content from ``redata_request``, otherwise the full request is provided. Default: True :return: Relational database of all articles for an institution or the full ``requests.Response`` """ url = self.endpoint("articles") # Figshare API is limited to a maximum of 1000 per page # Full pagination still needed params = {'page': 1, 'page_size': 1000} articles = redata_request('GET', url, self.headers, params=params, process=process) if process: articles_df = pd.DataFrame(articles) return articles_df else: return articles
def doi_check(self, article_id: int, process: bool = True) -> \ Union[Tuple[bool, str], Response]: """ Check if DOI is present/reserved for ``article_id``. Uses: https://docs.figshare.com/#private_article_details :param article_id: Figshare article ID :param process: Returns JSON content from ``redata_request``, otherwise the full request is provided. Default: True :return: Flag to indicate whether DOI is reserved and DOI (empty string if not). Returns the full ``requests.Response`` if ``process=False`` """ url = self.endpoint(f"articles/{article_id}", institute=False) article_details = redata_request('GET', url, self.headers, process=process) if process: check = False if article_details['doi']: check = True return check, article_details['doi'] else: return article_details
def reserve_doi(self, article_id: int) -> str: """ Reserve DOI if one has not been reserved for ``article_id``. See: https://docs.figshare.com/#private_article_reserve_doi :param article_id: Figshare article ID :return: DOI string """ url = self.endpoint(f"articles/{article_id}/reserve_doi", institute=False) # Check if DOI has been reserved doi_check, doi_string = self.doi_check(article_id) if doi_check: self.log.info("DOI already reserved! Skipping... ") return doi_string else: self.log.info( "PROMPT: DOI reservation has not occurred! Do you wish to reserve?" ) src_input = input( "PROMPT: Type 'Yes'/'yes'. Anything else will skip : ") self.log.info(f"RESPONSE: {src_input}") if src_input.lower() == 'yes': self.log.info("Reserving DOI ... ") response = redata_request('POST', url, self.headers) self.log.info(f"DOI minted : {response['doi']}") return response['doi'] else: self.log.warning("Skipping... ") return doi_string
def get_user_collections(self, account_id: int, process: bool = True) \ -> Union[pd.DataFrame, Response]: """ Impersonate a user, ``account_id``, to retrieve collections associated with the user. See: https://docs.figshare.com/#private_collections_list :param account_id: Figshare *institute* account ID :param process: Returns JSON content from ``redata_request``, otherwise the full request is provided. Default: True :return: Relational database of all collections owned by user or the full ``requests.Response`` """ url = self.endpoint("collections", institute=False) # Figshare API is limited to a maximum of 1000 per page params = {'page': 1, 'page_size': 1000, 'impersonate': account_id} user_collections = redata_request('GET', url, self.headers, params=params, process=process) if process: user_collections_df = pd.DataFrame(user_collections) return user_collections_df else: return user_collections
def get_other_account_details(self, account_id: int) -> dict: """ Retrieve ORCID and Figshare account information (among other metadata) See: https://docs.figshare.com/#private_account_institution_user :param account_id: Figshare *institute* account ID :return: Dictionary with full account details """ url = self.endpoint(f"users/{account_id}", institute=True) other_account_dict = redata_request('GET', url, self.headers) return other_account_dict
def get_curation_list(self, article_id: int = None, status: Optional[str] = "", process: bool = True) \ -> Union[pd.DataFrame, Response]: """ Retrieve list of curation records for ``article_id``. If not specified, all curation records are retrieved. See: https://docs.figshare.com/#account_institution_curations :param article_id: Figshare article ID :param status: Filter by status of review. Options are: ['', 'pending', 'approved', 'rejected', 'closed'] :param process: Returns JSON content from ``redata_request``, otherwise the full request is provided. Default: True :return: Relational database of all curation records or the full ``requests.Response`` """ status_list = ['', 'pending', 'approved', 'rejected', 'closed'] if status not in status_list: raise ValueError( f"Incorrect status input. Most be one of {status_list}") url = self.endpoint("reviews") params = {'offset': 0, 'limit': 1000} if article_id is not None: params['article_id'] = article_id if status: params['status'] = status curation_list = redata_request('GET', url, self.headers, params=params, process=process) if process: curation_df = pd.DataFrame(curation_list) return curation_df else: return curation_list
def get_account_group_roles(self, account_id: int, process: bool = True) \ -> Union[dict, Response]: """ Retrieve group roles for a given account, ``account_id``. See: https://docs.figshare.com/#private_institution_account_group_roles :param account_id: Figshare *institute* account ID :param process: Returns JSON content from ``redata_request``, otherwise the full request is provided. Default: True :return: Python dictionary of all group roles for a user or the full ``requests.Response`` """ url = self.endpoint(f"roles/{account_id}") roles = redata_request('GET', url, self.headers, process=process) return roles
def get_account_list(self, process: bool = True) -> \ Union[pd.DataFrame, Response]: """ Return pandas DataFrame of user accounts. See: https://docs.figshare.com/#private_institution_accounts_list :param process: Returns JSON content from ``redata_request``, otherwise the full request is provided. Default: True :return: Relational database of all user accounts for an institution or the full ``requests.Response`` """ url = self.endpoint("accounts") # Figshare API is limited to a maximum of 1000 per page params = {'page': 1, 'page_size': 1000} accounts = redata_request('GET', url, self.headers, params=params, process=process) if process: accounts_df = pd.DataFrame(accounts) accounts_df = accounts_df.drop(columns='institution_id') if self.ignore_admin: self.log.info("Excluding administrative and test accounts") drop_index = [] for ia in self.admin_filter: drop_index += list(accounts_df[ accounts_df['email'].str.contains(ia)].index) if len(drop_index) > 0: accounts_df = accounts_df.drop(drop_index).reset_index( drop=True) return accounts_df else: return accounts
def get_curation_comments(self, curation_id: int, process: bool = True) \ -> Union[dict, Response]: """ Retrieve comments about specified curation, ``curation_id``. See: https://docs.figshare.com/#account_institution_curation_comments :param curation_id: Figshare curation ID :param process: Returns JSON content from ``redata_request``, otherwise the full request is provided. Default: True :return: Python dictionary with curation comments or the full ``requests.Response`` """ url = self.endpoint(f"review/{curation_id}/comments") curation_comments = redata_request('GET', url, self.headers, process=process) return curation_comments
def get_groups(self, process: bool = True) -> \ Union[pd.DataFrame, Response]: """ Retrieve information about groups within institutional instance. See: https://docs.figshare.com/#private_institution_groups_list :param process: Returns JSON content from ``redata_request``, otherwise the full request is provided. Default: True :return: Relational database of all Figshare groups for an institution or the full ``requests.Response`` """ url = self.endpoint("groups") groups = redata_request('GET', url, self.headers, process=process) if process: groups_df = pd.DataFrame(groups) return groups_df else: return groups