def load_terms(df: DataFrame) -> int:
    df.drop_duplicates(inplace=True)
    df.loc[df["Case Sensitive"] == False, "Term"] = df.loc[
        df["Case Sensitive"] == False, "Term"].str.lower()
    df = df.drop_duplicates(subset="Term").dropna(subset=["Term"])

    terms = []
    for row_id, row in df.iterrows():
        term = row["Term"].strip()
        if not Term.objects.filter(term=term).exists():
            lt = Term()
            lt.term = term
            lt.source = row["Term Category"]
            lt.definition_url = row["Term Locale"]
            terms.append(lt)

    # cache "global" term stems step - should be cached here via model manager
    Term.objects.bulk_create(terms)

    # update existing ProjectTermConfiguration objects for all projects across loaded terms
    from apps.extract.dict_data_cache import cache_term_stems
    from apps.project.models import ProjectTermConfiguration
    for config in ProjectTermConfiguration.objects.all():
        cache_term_stems(config.project_id)
        config.add(terms)

    return len(df)
def cache_term(instance, **kwargs):
    from apps.extract.dict_data_cache import cache_term_stems
    # update global cache
    cache_term_stems()
    for project_id in instance.projecttermconfiguration_set.values_list('project_id', flat=True):
        # update project-term caches
        cache_term_stems(project_id)
    def upload_df(self, df: pd.DataFrame) -> None:
        if Term.objects.exists():
            print('Terms data already uploaded')
            return
        print('Uploading terms...')

        with transaction.atomic():
            terms_count = load_terms(df)

        print('Detected %d terms' % terms_count)
        print('Caching terms config for Locate tasks...')

        dict_data_cache.cache_term_stems()
def terms_loader(zip_file: ZipFile, files: list) -> None:
    if Term.objects.exists():
        print('Terms data already uploaded')
        return
    print('Uploading terms...')

    df = load_csv_files(zip_file, files)
    with transaction.atomic():
        terms_count = load_terms(df)

    print('Detected %d terms' % terms_count)
    print('Caching terms config for Locate tasks...')

    dict_data_cache.cache_term_stems()
Ejemplo n.º 5
0
def cache_terms(instance, action, pk_set, **kwargs):
    # cache project terms only in case if terms have changed, i.e. pk_set != {}
    if action.startswith('post') and pk_set:
        from apps.extract.dict_data_cache import cache_term_stems
        cache_term_stems(instance.project.pk)
def delete_cached_term(instance, **kwargs):
    # update global cache
    from apps.extract.dict_data_cache import cache_term_stems
    cache_term_stems()
 def bulk_create(self, objs, **kwargs):
     # to update global cached terms if they are loaded via fixtures
     super().bulk_create(objs, **kwargs)
     from apps.extract.dict_data_cache import cache_term_stems
     cache_term_stems()
Ejemplo n.º 8
0
def cache_term_stems(apps, schema_editor):
    dict_data_cache.cache_term_stems()