Python df_to_dict_single 예제들, ldcoolp.curation.df_to_dict_single Python 예제들

예제 #1

0

파일 보기

    def find_qualtrics_readme(self, dn: DepositorName):
        """Get Response ID based on a article_id,curation_id search"""

        dn_dict = dn.name_dict
        qualtrics_df = self.get_survey_responses(self.readme_survey_id)

        # First perform search via article_id or curation_id
        self.log.info("Attempting to identify using article_id or curation_id ...")
        article_id = str(dn_dict['article_id'])
        curation_id = str(dn_dict['curation_id'])

        try:
            response_df = qualtrics_df[(qualtrics_df['article_id'] == article_id) |
                                       (qualtrics_df['curation_id'] == curation_id)]
        except KeyError:
            self.log.warn("article_id and curation_id not in qualtrics survey !")
            response_df = pd.DataFrame()

        if response_df.empty:
            self.log.warn("Empty DataFrame")
            raise ValueError
        else:
            self.log.info("Unique match based on article_id or curation_id !")
            self.pandas_write_buffer(response_df[readme_cols_order])
            if response_df.shape[0] == 1:
                response_dict = df_to_dict_single(response_df)
                self.log.info("Only one entry found!")
                self.log.info(f"Survey completed on {response_dict['date_completed']}")
                self.log.info(f" ... for {response_dict['article_id']}")
                return response_dict['ResponseId'], response_df
            else:
                self.log.warn("Multiple entries found")
                raise ValueError

예제 #2

0

파일 보기

    def find_deposit_agreement(self, dn_dict):
        """Get Response ID based on a match search for depositor name"""

        qualtrics_df = self.get_survey_responses()

        # First perform search via article_id or curation_id
        print("Attempting to identify using article_id or curation_id")
        article_id = str(dn_dict['article_id'])
        curation_id = str(dn_dict['curation_id'])

        try:
            response_df = qualtrics_df[
                (qualtrics_df['article_id'] == article_id) |
                (qualtrics_df['curation_id'] == curation_id)]
        except KeyError:
            print("article_id and curation_id not in qualtrics survey")
            response_df = pd.DataFrame()

        if not response_df.empty:
            print("Unique match based on article_id or curation_id !")
            if response_df.shape[0] != 1:
                print("More than one entries found !!!")
            print(response_df[cols_order].to_markdown())
        else:
            print("Unable to identify based article_id or curation_id.")
            print("Attempting to identify with name")

            response_df = qualtrics_df[
                (qualtrics_df['Q4_1'] == dn_dict['fullName']) |
                (qualtrics_df['Q4_1'] == dn_dict['simplify_fullName']) |
                (qualtrics_df['Q4_2'] == dn_dict['depositor_email'])]

            # Identify corresponding author cases if different from depositor name
            if not dn_dict['self_deposit'] and not response_df.empty:
                print(
                    "Not self-deposit.  Identifying based on corresponding author as well"
                )
                df_select = response_df[(
                    response_df['Q6_1'] == dn_dict['authors'][0])]
                if df_select.empty:
                    print("Unable to identify based on corresponding author")
                    print("Listing all deposit agreements based on Depositor")
                    print(response_df[cols_order].to_markdown())
                else:
                    response_df = df_select

        if response_df.empty:
            print("Empty DataFrame")
            raise ValueError
        else:
            if response_df.shape[0] == 1:
                response_dict = df_to_dict_single(response_df)
                print("Only one entry found!")
                print("Survey completed on {} for {}".format(
                    response_dict['Date Completed'], response_dict['Q7']))
                return response_dict['ResponseId']
            else:
                print("Multiple entries found")
                print(response_df[cols_order].to_markdown())
                raise ValueError

예제 #3

0

파일 보기

    def get_curation_id(self):
        # This retrieves basic curation information for article (this includes all curation)
        cur_df = self.fs_admin.get_curation_list(article_id=self.article_id)

        # By default it retrieves the most recent one
        cur_loc_dict = df_to_dict_single(cur_df)

        return cur_loc_dict['id']

예제 #4

0

파일 보기

    def get_name_dict(self):
        if self.verbose:
            print("Retrieving depositor_name for {} ... ".format(
                self.article_id))

        account_id = self.curation_dict['account_id']
        acct_df = self.fs_admin.get_account_list()

        temp_dict = df_to_dict_single(acct_df.loc[acct_df['id'] == account_id])

        surName = temp_dict['last_name']  # full last name
        firstName = temp_dict['first_name']  # full first name
        simplify_firstName = firstName.split(' ')[0]
        simplify_surName = surName.split(' ')[0]
        fullName = "{} {}".format(firstName, surName)
        simplify_fullName = "{} {}".format(simplify_firstName,
                                           simplify_surName)

        name_dict = dict()
        name_dict['surName'] = surName
        name_dict['firstName'] = firstName
        name_dict['simplify_firstName'] = simplify_firstName
        name_dict['simplify_surName'] = simplify_surName
        name_dict['fullName'] = fullName
        name_dict['simplify_fullName'] = simplify_fullName

        authors = [
            d['full_name'] for d in self.curation_dict['item']['authors']
        ]
        name_dict['authors'] = authors

        if fullName in authors or simplify_fullName in authors:
            name_dict['self_deposit'] = True
        else:
            name_dict['self_deposit'] = False

        # Add additional information about deposit, such as article and
        # curation IDs, email, and title
        name_dict['article_id'] = self.article_id
        name_dict['curation_id'] = self.curation_id
        name_dict['depositor_email'] = temp_dict['email']
        name_dict['title'] = self.curation_dict['item']['title']

        return name_dict

예제 #5

0

파일 보기

    def retrieve_qualtrics_readme(self, dn=None, ResponseId='', browser=True,
                                  save_metadata: bool = False):
        """Retrieve response to Qualtrics README form"""

        if ResponseId:
            response_df = self.get_survey_response(self.readme_survey_id, ResponseId)
        else:
            try:
                ResponseId, response_df = self.find_qualtrics_readme(dn)
                self.log.info(f"Qualtrics README ResponseID : {ResponseId}")
            except ValueError:
                self.log.warn("Error with retrieving ResponseId")
                self.log.info("PROMPT: If you wish, you can manually enter ResponseId to retrieve.")
                if self.interactive:
                    ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ")
                    self.log.info(f"RESPONSE: {ResponseId}")
                else:
                    self.log.info("Interactive mode disabled. Skipping manual input")
                    ResponseId = ''

                if ResponseId:
                    response_df = self.get_survey_response(self.readme_survey_id, ResponseId)
                else:
                    response_df = pd.DataFrame()
                    readme_url = self.generate_readme_url(dn)
                    self.log.info(f"README URL: {readme_url}")

        if response_df.empty:
            self.log.warn("Empty DataFrame")
            self.log.info("Filling with empty content")
            qualtrics_dict = {}
            for field in readme_custom_content:
                qualtrics_dict[field] = 'nan'
            qualtrics_dict['references'] = []
        else:
            qualtrics_dict = df_to_dict_single(response_df[readme_custom_content])
            for key in qualtrics_dict.keys():
                if isinstance(qualtrics_dict[key], float):
                    qualtrics_dict[key] = str(qualtrics_dict[key])

            # Separate cite, contrib for list style
            for field in ['cite', 'contrib']:
                if qualtrics_dict[field] != 'nan':
                    qualtrics_dict[field] = qualtrics_dict[field].split('\n')

            # Markdown files, materials
            for field in ['files', 'materials']:
                if qualtrics_dict[field] != 'nan':
                    if qualtrics_dict[field][0] == "'":
                        qualtrics_dict[field] = qualtrics_dict[field][1:]
                        self.log.debug(f"Removing extra single quote in {field} entry")

        # Retrieve corresponding author info and append
        self.log.info("Appending Deposit Agreement's Corresponding Author metadata")
        if not self.da_response_id:
            self.log.info("NO METADATA - Retrieving Deposit Agreement metadata")
            self.find_deposit_agreement(dn)
        else:
            self.log.info(f"Parsed ResponseId : {self.da_response_id}")
            self.log.info(f"Parsed SurveyID : {self.da_survey_id}")

        DA_response_df = self.get_survey_response(self.da_survey_id, self.da_response_id)
        DA_dict = df_to_dict_single(DA_response_df)
        qualtrics_dict['corr_author_fullname'] = DA_dict['Q6_1']
        qualtrics_dict['corr_author_email'] = DA_dict['Q6_2']
        qualtrics_dict['corr_author_affil'] = DA_dict['Q6_3']

        # Save Qualtrics README metadata
        if save_metadata:
            out_file_prefix = "qualtrics_readme_original_" + \
                              f"{dn.name_dict['article_id']}"
            self.save_metadata(qualtrics_dict, dn,
                               out_file_prefix=out_file_prefix)

        return qualtrics_dict

예제 #6

0

파일 보기

    def find_deposit_agreement(self, dn: DepositorName):
        """Get Response ID based on a match search for depositor name"""

        merged_df = self.merge_survey()

        dn_dict = dn.name_dict

        # First perform search via article_id or curation_id
        self.log.info("Attempting to identify using article_id or curation_id ...")
        article_id = str(dn_dict['article_id'])
        curation_id = str(dn_dict['curation_id'])

        try:
            response_df = merged_df[(merged_df['article_id'] == article_id) |
                                    (merged_df['curation_id'] == curation_id)]
        except KeyError:
            self.log.warn("article_id and curation_id not in qualtrics survey !")
            response_df = pd.DataFrame()

        if not response_df.empty:
            self.log.info("Unique match based on article_id or curation_id !")
            if response_df.shape[0] != 1:
                self.log.warn("More than one entries found !!!")
        else:
            self.log.info("Unable to identify based on article_id or curation_id ...")
            self.log.info("Attempting to identify with name ...")

            response_df = merged_df[(merged_df['Q4_1'] == dn_dict['fullName']) |
                                    (merged_df['Q4_1'] == dn_dict['simplify_fullName']) |
                                    (merged_df['Q4_2'] == dn_dict['depositor_email'])]

            # Identify corresponding author cases if different from depositor name
            if not dn_dict['self_deposit'] and not response_df.empty:
                self.log.info("Not self-deposit. Identifying based on corresponding author as well ...")
                df_select = response_df[(response_df['Q6_1'] == dn_dict['authors'][0])]
                if df_select.empty:
                    self.log.warn("Unable to identify based on corresponding author")
                    self.log.info("Listing all deposit agreements based on Depositor")

                    self.pandas_write_buffer(response_df[cols_order])
                else:
                    response_df = df_select

        if response_df.empty:
            self.log.warn("Empty DataFrame")
            raise ValueError
        else:
            if response_df.shape[0] == 1:
                response_dict = df_to_dict_single(response_df)
                self.save_metadata(response_dict, dn, out_file_prefix=
                                   f'deposit_agreement_original_{article_id}')
                self.pandas_write_buffer(response_df[cols_order])
                self.log.info("Only one entry found!")
                self.log.info(f"Survey completed on {response_dict['Date Completed']}")
                self.log.info(f" ... for {response_dict['Q7']}")
                survey_shortname = \
                    self.lookup_survey_shortname(response_dict['SurveyID'])
                self.log.info(f"Survey name: {survey_shortname}")
                self.da_response_id = response_dict['ResponseId']
                self.da_survey_id = response_dict['SurveyID']
                return response_dict['ResponseId'], response_dict['SurveyID']
            else:
                self.log.warn("Multiple entries found")

                self.pandas_write_buffer(response_df[cols_order])

                raise ValueError