def assert_schema(self): if self.is_initialized(): log.info("Schema is already initialized") else: log.warning("The current schema is incomplete. Starting migration.") # TODO: Backup && Restore as soon as the database has state self.drop_all() self.run_schema()
def process_profile_documents(self): """ Iterates over the profile documents, finds research fields, finds duplicates, finds author profiles :return: """ for profile_unified in self._unified_name_to_profiles: found_docs = [] profiles = self._unified_name_to_profiles[profile_unified] if len(profiles) == 0: log.warning("There were no profiles for the unified name %s" % profile_unified) continue # For each profile linked to that unified name, add the found documents to the list for profile in profiles: x = self._profile_docs[profile.identifier] log.debug( "Used {len_x} documents from id {mendeley_id} for unified name {name}" .format(len_x=len(x), mendeley_id=profile.identifier, name=unify_profile_name(profile.first_name, profile.last_name))) found_docs += x # Process these documents for doc in found_docs: # Add doc to all docs self._documents.append(doc) # Create unified document title doc_unified, doc_real = unify_document_title(doc.core_title) # Add document to docs if doc_unified in self._unified_document_title_to_documents: existing_docs = self._unified_document_title_to_documents[ doc_unified] existing_docs.append(doc) else: self._unified_document_title_to_documents[doc_unified] = [ doc ] # Append the doc title to the authored_docs of that unified profile name authored_docs = self._unified_name_to_authored_documents[ profile_unified] authored_docs.add(doc_unified) # Process core_authors field of the doc to find participants for author in doc.core_authors: self.analyze_author(doc_unified, author) # Analyze the tags fields of the doc to find research fields for tag in doc.tags: self.analyze_field_tag(doc_unified, tag) log.info("Profile documents have been analyzed")
def process_profile_documents(self): """ Iterates over the profile documents, finds research fields, finds duplicates, finds author profiles :return: """ for profile_unified in self._unified_name_to_profiles: found_docs = [] profiles = self._unified_name_to_profiles[profile_unified] if len(profiles) == 0: log.warning("There were no profiles for the unified name %s" % profile_unified) continue # For each profile linked to that unified name, add the found documents to the list for profile in profiles: x = self._profile_docs[profile.identifier] log.debug("Used {len_x} documents from id {mendeley_id} for unified name {name}".format( len_x=len(x), mendeley_id=profile.identifier, name=unify_profile_name(profile.first_name, profile.last_name) )) found_docs += x # Process these documents for doc in found_docs: # Add doc to all docs self._documents.append(doc) # Create unified document title doc_unified, doc_real = unify_document_title(doc.core_title) # Add document to docs if doc_unified in self._unified_document_title_to_documents: existing_docs = self._unified_document_title_to_documents[doc_unified] existing_docs.append(doc) else: self._unified_document_title_to_documents[doc_unified] = [doc] # Append the doc title to the authored_docs of that unified profile name authored_docs = self._unified_name_to_authored_documents[profile_unified] authored_docs.add(doc_unified) # Process core_authors field of the doc to find participants for author in doc.core_authors: self.analyze_author(doc_unified, author) # Analyze the tags fields of the doc to find research fields for tag in doc.tags: self.analyze_field_tag(doc_unified, tag) log.info("Profile documents have been analyzed")