def load_terms(df: DataFrame) -> int: df.drop_duplicates(inplace=True) df.loc[df["Case Sensitive"] == False, "Term"] = df.loc[ df["Case Sensitive"] == False, "Term"].str.lower() df = df.drop_duplicates(subset="Term").dropna(subset=["Term"]) terms = [] for row_id, row in df.iterrows(): term = row["Term"].strip() if not Term.objects.filter(term=term).exists(): lt = Term() lt.term = term lt.source = row["Term Category"] lt.definition_url = row["Term Locale"] terms.append(lt) # cache "global" term stems step - should be cached here via model manager Term.objects.bulk_create(terms) # update existing ProjectTermConfiguration objects for all projects across loaded terms from apps.extract.dict_data_cache import cache_term_stems from apps.project.models import ProjectTermConfiguration for config in ProjectTermConfiguration.objects.all(): cache_term_stems(config.project_id) config.add(terms) return len(df)
def _prepare_term( dataframe_row: Series, term_creation_mode: _TermCreationMode) -> Union[Term, None]: """Instantiates and returns a new Term objects based on input data. Args: dataframe_row (pandas.Series): Input data for Term creation. term_creation_mode (_TermCreationMode): A logical flag; determines return behavior. Returns: new_db_term (Term): A new Term object from the input row. None: if term_creation_mode is IGNORE. """ term = dataframe_row['Term'].strip() qs_terms_of_this_term = Term.objects.filter(term=term) # instantiate a new Term new_db_term = Term(term=term, source=dataframe_row['Term Category'], definition_url=dataframe_row['Term Locale']) # handle term creation modes if qs_terms_of_this_term.exists(): if term_creation_mode == _TermCreationMode.REPLACE: qs_terms_of_this_term.delete() elif term_creation_mode == _TermCreationMode.IGNORE: return None return new_db_term
def load_terms(df: DataFrame) -> int: df.drop_duplicates(inplace=True) df.loc[df["Case Sensitive"] == False, "Term"] = df.loc[df["Case Sensitive"] == False, "Term"].str.lower() df = df.drop_duplicates(subset="Term").dropna(subset=["Term"]) terms = [] for row_id, row in df.iterrows(): term = row["Term"].strip() if not Term.objects.filter(term=term).exists(): lt = Term() lt.term = term lt.source = row["Term Category"] lt.definition_url = row["Term Locale"] terms.append(lt) Term.objects.bulk_create(terms) return len(df)