def find_qualtrics_readme(self, dn: DepositorName): """Get Response ID based on a article_id,curation_id search""" dn_dict = dn.name_dict qualtrics_df = self.get_survey_responses(self.readme_survey_id) # First perform search via article_id or curation_id self.log.info("Attempting to identify using article_id or curation_id ...") article_id = str(dn_dict['article_id']) curation_id = str(dn_dict['curation_id']) try: response_df = qualtrics_df[(qualtrics_df['article_id'] == article_id) | (qualtrics_df['curation_id'] == curation_id)] except KeyError: self.log.warn("article_id and curation_id not in qualtrics survey !") response_df = pd.DataFrame() if response_df.empty: self.log.warn("Empty DataFrame") raise ValueError else: self.log.info("Unique match based on article_id or curation_id !") self.pandas_write_buffer(response_df[readme_cols_order]) if response_df.shape[0] == 1: response_dict = df_to_dict_single(response_df) self.log.info("Only one entry found!") self.log.info(f"Survey completed on {response_dict['date_completed']}") self.log.info(f" ... for {response_dict['article_id']}") return response_dict['ResponseId'], response_df else: self.log.warn("Multiple entries found") raise ValueError
def find_deposit_agreement(self, dn_dict): """Get Response ID based on a match search for depositor name""" qualtrics_df = self.get_survey_responses() # First perform search via article_id or curation_id print("Attempting to identify using article_id or curation_id") article_id = str(dn_dict['article_id']) curation_id = str(dn_dict['curation_id']) try: response_df = qualtrics_df[ (qualtrics_df['article_id'] == article_id) | (qualtrics_df['curation_id'] == curation_id)] except KeyError: print("article_id and curation_id not in qualtrics survey") response_df = pd.DataFrame() if not response_df.empty: print("Unique match based on article_id or curation_id !") if response_df.shape[0] != 1: print("More than one entries found !!!") print(response_df[cols_order].to_markdown()) else: print("Unable to identify based article_id or curation_id.") print("Attempting to identify with name") response_df = qualtrics_df[ (qualtrics_df['Q4_1'] == dn_dict['fullName']) | (qualtrics_df['Q4_1'] == dn_dict['simplify_fullName']) | (qualtrics_df['Q4_2'] == dn_dict['depositor_email'])] # Identify corresponding author cases if different from depositor name if not dn_dict['self_deposit'] and not response_df.empty: print( "Not self-deposit. Identifying based on corresponding author as well" ) df_select = response_df[( response_df['Q6_1'] == dn_dict['authors'][0])] if df_select.empty: print("Unable to identify based on corresponding author") print("Listing all deposit agreements based on Depositor") print(response_df[cols_order].to_markdown()) else: response_df = df_select if response_df.empty: print("Empty DataFrame") raise ValueError else: if response_df.shape[0] == 1: response_dict = df_to_dict_single(response_df) print("Only one entry found!") print("Survey completed on {} for {}".format( response_dict['Date Completed'], response_dict['Q7'])) return response_dict['ResponseId'] else: print("Multiple entries found") print(response_df[cols_order].to_markdown()) raise ValueError
def get_curation_id(self): # This retrieves basic curation information for article (this includes all curation) cur_df = self.fs_admin.get_curation_list(article_id=self.article_id) # By default it retrieves the most recent one cur_loc_dict = df_to_dict_single(cur_df) return cur_loc_dict['id']
def get_name_dict(self): if self.verbose: print("Retrieving depositor_name for {} ... ".format( self.article_id)) account_id = self.curation_dict['account_id'] acct_df = self.fs_admin.get_account_list() temp_dict = df_to_dict_single(acct_df.loc[acct_df['id'] == account_id]) surName = temp_dict['last_name'] # full last name firstName = temp_dict['first_name'] # full first name simplify_firstName = firstName.split(' ')[0] simplify_surName = surName.split(' ')[0] fullName = "{} {}".format(firstName, surName) simplify_fullName = "{} {}".format(simplify_firstName, simplify_surName) name_dict = dict() name_dict['surName'] = surName name_dict['firstName'] = firstName name_dict['simplify_firstName'] = simplify_firstName name_dict['simplify_surName'] = simplify_surName name_dict['fullName'] = fullName name_dict['simplify_fullName'] = simplify_fullName authors = [ d['full_name'] for d in self.curation_dict['item']['authors'] ] name_dict['authors'] = authors if fullName in authors or simplify_fullName in authors: name_dict['self_deposit'] = True else: name_dict['self_deposit'] = False # Add additional information about deposit, such as article and # curation IDs, email, and title name_dict['article_id'] = self.article_id name_dict['curation_id'] = self.curation_id name_dict['depositor_email'] = temp_dict['email'] name_dict['title'] = self.curation_dict['item']['title'] return name_dict
def retrieve_qualtrics_readme(self, dn=None, ResponseId='', browser=True, save_metadata: bool = False): """Retrieve response to Qualtrics README form""" if ResponseId: response_df = self.get_survey_response(self.readme_survey_id, ResponseId) else: try: ResponseId, response_df = self.find_qualtrics_readme(dn) self.log.info(f"Qualtrics README ResponseID : {ResponseId}") except ValueError: self.log.warn("Error with retrieving ResponseId") self.log.info("PROMPT: If you wish, you can manually enter ResponseId to retrieve.") if self.interactive: ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ") self.log.info(f"RESPONSE: {ResponseId}") else: self.log.info("Interactive mode disabled. Skipping manual input") ResponseId = '' if ResponseId: response_df = self.get_survey_response(self.readme_survey_id, ResponseId) else: response_df = pd.DataFrame() readme_url = self.generate_readme_url(dn) self.log.info(f"README URL: {readme_url}") if response_df.empty: self.log.warn("Empty DataFrame") self.log.info("Filling with empty content") qualtrics_dict = {} for field in readme_custom_content: qualtrics_dict[field] = 'nan' qualtrics_dict['references'] = [] else: qualtrics_dict = df_to_dict_single(response_df[readme_custom_content]) for key in qualtrics_dict.keys(): if isinstance(qualtrics_dict[key], float): qualtrics_dict[key] = str(qualtrics_dict[key]) # Separate cite, contrib for list style for field in ['cite', 'contrib']: if qualtrics_dict[field] != 'nan': qualtrics_dict[field] = qualtrics_dict[field].split('\n') # Markdown files, materials for field in ['files', 'materials']: if qualtrics_dict[field] != 'nan': if qualtrics_dict[field][0] == "'": qualtrics_dict[field] = qualtrics_dict[field][1:] self.log.debug(f"Removing extra single quote in {field} entry") # Retrieve corresponding author info and append self.log.info("Appending Deposit Agreement's Corresponding Author metadata") if not self.da_response_id: self.log.info("NO METADATA - Retrieving Deposit Agreement metadata") self.find_deposit_agreement(dn) else: self.log.info(f"Parsed ResponseId : {self.da_response_id}") self.log.info(f"Parsed SurveyID : {self.da_survey_id}") DA_response_df = self.get_survey_response(self.da_survey_id, self.da_response_id) DA_dict = df_to_dict_single(DA_response_df) qualtrics_dict['corr_author_fullname'] = DA_dict['Q6_1'] qualtrics_dict['corr_author_email'] = DA_dict['Q6_2'] qualtrics_dict['corr_author_affil'] = DA_dict['Q6_3'] # Save Qualtrics README metadata if save_metadata: out_file_prefix = "qualtrics_readme_original_" + \ f"{dn.name_dict['article_id']}" self.save_metadata(qualtrics_dict, dn, out_file_prefix=out_file_prefix) return qualtrics_dict
def find_deposit_agreement(self, dn: DepositorName): """Get Response ID based on a match search for depositor name""" merged_df = self.merge_survey() dn_dict = dn.name_dict # First perform search via article_id or curation_id self.log.info("Attempting to identify using article_id or curation_id ...") article_id = str(dn_dict['article_id']) curation_id = str(dn_dict['curation_id']) try: response_df = merged_df[(merged_df['article_id'] == article_id) | (merged_df['curation_id'] == curation_id)] except KeyError: self.log.warn("article_id and curation_id not in qualtrics survey !") response_df = pd.DataFrame() if not response_df.empty: self.log.info("Unique match based on article_id or curation_id !") if response_df.shape[0] != 1: self.log.warn("More than one entries found !!!") else: self.log.info("Unable to identify based on article_id or curation_id ...") self.log.info("Attempting to identify with name ...") response_df = merged_df[(merged_df['Q4_1'] == dn_dict['fullName']) | (merged_df['Q4_1'] == dn_dict['simplify_fullName']) | (merged_df['Q4_2'] == dn_dict['depositor_email'])] # Identify corresponding author cases if different from depositor name if not dn_dict['self_deposit'] and not response_df.empty: self.log.info("Not self-deposit. Identifying based on corresponding author as well ...") df_select = response_df[(response_df['Q6_1'] == dn_dict['authors'][0])] if df_select.empty: self.log.warn("Unable to identify based on corresponding author") self.log.info("Listing all deposit agreements based on Depositor") self.pandas_write_buffer(response_df[cols_order]) else: response_df = df_select if response_df.empty: self.log.warn("Empty DataFrame") raise ValueError else: if response_df.shape[0] == 1: response_dict = df_to_dict_single(response_df) self.save_metadata(response_dict, dn, out_file_prefix= f'deposit_agreement_original_{article_id}') self.pandas_write_buffer(response_df[cols_order]) self.log.info("Only one entry found!") self.log.info(f"Survey completed on {response_dict['Date Completed']}") self.log.info(f" ... for {response_dict['Q7']}") survey_shortname = \ self.lookup_survey_shortname(response_dict['SurveyID']) self.log.info(f"Survey name: {survey_shortname}") self.da_response_id = response_dict['ResponseId'] self.da_survey_id = response_dict['SurveyID'] return response_dict['ResponseId'], response_dict['SurveyID'] else: self.log.warn("Multiple entries found") self.pandas_write_buffer(response_df[cols_order]) raise ValueError