Ejemplo n.º 1
0
 def assert_schema(self):
     if self.is_initialized():
         log.info("Schema is already initialized")
     else:
         log.warning("The current schema is incomplete. Starting migration.")
         # TODO: Backup && Restore as soon as the database has state
         self.drop_all()
         self.run_schema()
Ejemplo n.º 2
0
    def process_profile_documents(self):
        """
        Iterates over the profile documents, finds research fields, finds duplicates, finds author profiles
        :return:
        """
        for profile_unified in self._unified_name_to_profiles:
            found_docs = []

            profiles = self._unified_name_to_profiles[profile_unified]
            if len(profiles) == 0:
                log.warning("There were no profiles for the unified name %s" %
                            profile_unified)
                continue

            # For each profile linked to that unified name, add the found documents to the list
            for profile in profiles:
                x = self._profile_docs[profile.identifier]
                log.debug(
                    "Used {len_x} documents from id {mendeley_id} for unified name {name}"
                    .format(len_x=len(x),
                            mendeley_id=profile.identifier,
                            name=unify_profile_name(profile.first_name,
                                                    profile.last_name)))
                found_docs += x

            # Process these documents
            for doc in found_docs:
                # Add doc to all docs
                self._documents.append(doc)

                # Create unified document title
                doc_unified, doc_real = unify_document_title(doc.core_title)

                # Add document to docs
                if doc_unified in self._unified_document_title_to_documents:
                    existing_docs = self._unified_document_title_to_documents[
                        doc_unified]
                    existing_docs.append(doc)
                else:
                    self._unified_document_title_to_documents[doc_unified] = [
                        doc
                    ]

                # Append the doc title to the authored_docs of that unified profile name
                authored_docs = self._unified_name_to_authored_documents[
                    profile_unified]
                authored_docs.add(doc_unified)

                # Process core_authors field of the doc to find participants
                for author in doc.core_authors:
                    self.analyze_author(doc_unified, author)

                # Analyze the tags fields of the doc to find research fields
                for tag in doc.tags:
                    self.analyze_field_tag(doc_unified, tag)
        log.info("Profile documents have been analyzed")
Ejemplo n.º 3
0
    def process_profile_documents(self):
        """
        Iterates over the profile documents, finds research fields, finds duplicates, finds author profiles
        :return:
        """
        for profile_unified in self._unified_name_to_profiles:
            found_docs = []

            profiles = self._unified_name_to_profiles[profile_unified]
            if len(profiles) == 0:
                log.warning("There were no profiles for the unified name %s" % profile_unified)
                continue

            # For each profile linked to that unified name, add the found documents to the list
            for profile in profiles:
                x = self._profile_docs[profile.identifier]
                log.debug("Used {len_x} documents from id {mendeley_id} for unified name {name}".format(
                    len_x=len(x),
                    mendeley_id=profile.identifier,
                    name=unify_profile_name(profile.first_name, profile.last_name)
                ))
                found_docs += x

            # Process these documents
            for doc in found_docs:
                # Add doc to all docs
                self._documents.append(doc)

                # Create unified document title
                doc_unified, doc_real = unify_document_title(doc.core_title)

                # Add document to docs
                if doc_unified in self._unified_document_title_to_documents:
                    existing_docs = self._unified_document_title_to_documents[doc_unified]
                    existing_docs.append(doc)
                else:
                    self._unified_document_title_to_documents[doc_unified] = [doc]

                # Append the doc title to the authored_docs of that unified profile name
                authored_docs = self._unified_name_to_authored_documents[profile_unified]
                authored_docs.add(doc_unified)

                # Process core_authors field of the doc to find participants
                for author in doc.core_authors:
                    self.analyze_author(doc_unified, author)

                # Analyze the tags fields of the doc to find research fields
                for tag in doc.tags:
                    self.analyze_field_tag(doc_unified, tag)
        log.info("Profile documents have been analyzed")