def load_terms(df: DataFrame) -> int:
    df.drop_duplicates(inplace=True)
    df.loc[df["Case Sensitive"] == False, "Term"] = df.loc[
        df["Case Sensitive"] == False, "Term"].str.lower()
    df = df.drop_duplicates(subset="Term").dropna(subset=["Term"])

    terms = []
    for row_id, row in df.iterrows():
        term = row["Term"].strip()
        if not Term.objects.filter(term=term).exists():
            lt = Term()
            lt.term = term
            lt.source = row["Term Category"]
            lt.definition_url = row["Term Locale"]
            terms.append(lt)

    # cache "global" term stems step - should be cached here via model manager
    Term.objects.bulk_create(terms)

    # update existing ProjectTermConfiguration objects for all projects across loaded terms
    from apps.extract.dict_data_cache import cache_term_stems
    from apps.project.models import ProjectTermConfiguration
    for config in ProjectTermConfiguration.objects.all():
        cache_term_stems(config.project_id)
        config.add(terms)

    return len(df)
Example #2
0
    def _prepare_term(
            dataframe_row: Series,
            term_creation_mode: _TermCreationMode) -> Union[Term, None]:
        """Instantiates and returns a new Term objects based on input data.

        Args:
            dataframe_row (pandas.Series): Input data for Term creation.
            term_creation_mode (_TermCreationMode): A logical flag; determines return behavior.

        Returns:
            new_db_term (Term): A new Term object from the input row.
            None: if term_creation_mode is IGNORE.
        """
        term = dataframe_row['Term'].strip()
        qs_terms_of_this_term = Term.objects.filter(term=term)

        # instantiate a new Term
        new_db_term = Term(term=term,
                           source=dataframe_row['Term Category'],
                           definition_url=dataframe_row['Term Locale'])

        # handle term creation modes
        if qs_terms_of_this_term.exists():
            if term_creation_mode == _TermCreationMode.REPLACE:
                qs_terms_of_this_term.delete()
            elif term_creation_mode == _TermCreationMode.IGNORE:
                return None
        return new_db_term
Example #3
0
def load_terms(df: DataFrame) -> int:
    df.drop_duplicates(inplace=True)
    df.loc[df["Case Sensitive"] == False,
           "Term"] = df.loc[df["Case Sensitive"] == False, "Term"].str.lower()
    df = df.drop_duplicates(subset="Term").dropna(subset=["Term"])

    terms = []
    for row_id, row in df.iterrows():
        term = row["Term"].strip()
        if not Term.objects.filter(term=term).exists():
            lt = Term()
            lt.term = term
            lt.source = row["Term Category"]
            lt.definition_url = row["Term Locale"]
            terms.append(lt)

    Term.objects.bulk_create(terms)
    return len(df)