Python sanitize_text Examples

Programming Language: Python

Namespace/Package Name: mendeleycache.utils.sanitize

Method/Function: sanitize_text

Examples at hotexamples.com: 5

Python sanitize_text - 5 examples found. These are the top rated real world Python examples of mendeleycache.utils.sanitize.sanitize_text extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

0

Show file

File: crawl_data.py Project: ankoh/mc-server

    def update_cache_profiles(self, unified_name_to_profiles: {}):
        """
    Given a unified_profile_name to profiles map, merges the profiles and creates the FK references
    :param unified_name_to_profiles:
    :param unified_name_to_real_name:
    :return:
    """

        sql = self._update_cache_profiles[0]

        # Fire the sql script in a transaction
        with self._engine.begin() as conn:
            log.debug("Updating cache profiles")
            for _, profile_list in unified_name_to_profiles.items():
                # flatten the profile list down to one profile
                reference_profile = None
                """:type : Profile"""

                for profile in profile_list:
                    if reference_profile is None or len(profile.display_name) > len(reference_profile.display_name):
                        reference_profile = profile

                # if we found at least one reference_profile (which we should)
                # add the corresponding sql insert string to the cache_profile_strings array
                if reference_profile is not None:
                    u, r = unify_profile_name(reference_profile.first_name, reference_profile.last_name)
                    b64u = generate_id(u)
                    log.info("inserting %s, %s" % (b64u, sanitize_text(r)))
                    conn.execute(sql, (b64u, sanitize_text(r)))

        log.info("Cache profiles have been updated")

Example #2

0

Show file

File: crawl_data.py Project: ankoh/mc-server

 def insert_profile(conn: Connection, insert: str, p: Profile):
     u, _ = unify_profile_name(p.first_name, p.last_name)
     b64u = generate_id(u)
     conn.execute(
         insert,
         (
             sanitize_text(p.identifier),
             b64u,
             sanitize_text(p.first_name),
             sanitize_text(p.last_name),
             sanitize_text(p.display_name),
             sanitize_text(p.link),
         ),
     )

Example #3

0

Show file

File: crawl_data.py Project: ankoh/mc-server

    def update_cache_documents(self, unified_document_title_to_documents: {}):
        """
    Given a unified_document_title to documents map, merges the documents and creates the FK references
    :param unified_document_title_to_documents:
    :return:
    """

        sql = self._update_cache_documents[0]

        # Fire the sql script in a transaction
        with self._engine.begin() as conn:
            log.debug("Updating cache documents")
            for _, doc_list in unified_document_title_to_documents.items():
                # flatten the document list down to one document
                reference_doc = None
                """:type : Document"""

                for doc in doc_list:
                    if reference_doc is None or doc.core_last_modified > reference_doc.core_last_modified:
                        reference_doc = doc

                # if we found at least one reference_doc (which we should),
                # add the corresponding sql insert string to the cache_document_strings array
                if reference_doc is not None:
                    u, r = unify_document_title(reference_doc.core_title)
                    b64u = generate_id(u)
                    conn.execute(sql, (b64u, sanitize_text(r)))

        log.info("Cache documents have been updated")

Example #4

0

Show file

File: crawl_data.py Project: ankoh/mc-server

    def update_cache_fields(self, unified_field_title_to_field: {}):
        """
    Given a unified_field_title to field map, updates the fields
    :param unified_field_title_to_field:
    :return:
    """

        sql = self._update_cache_fields[0]

        # Fire the sql script in a transaction
        with self._engine.begin() as conn:
            log.debug("Updating cache fields")
            for _, field in unified_field_title_to_field.items():
                b64u = generate_id(field.unified_title)
                conn.execute(sql, (b64u, sanitize_text(field.title)))

        log.info("Cache fields have been updated")

Example #5

0

Show file

File: crawl_data.py Project: ankoh/mc-server

        def insert_doc(conn: Connection, insert: str, doc: Document):
            u, _ = unify_document_title(doc.core_title)
            b64u = generate_id(u)
            author_string = map(lambda x: "{first} {last}".format(first=x[0], last=x[1]), doc.core_authors)

            # Create strings
            authors_string = ", ".join(author_string)
            keywords_string = ", ".join(doc.core_keywords)
            tags_string = ", ".join(doc.tags)

            # Create bibtex
            bibtex = generate_bibtex(doc)

            # Insert tuple
            conn.execute(
                insert,
                (
                    sanitize_text(doc.core_id),
                    b64u,
                    sanitize_text(doc.core_profile_id),
                    sanitize_text(doc.core_title),
                    sanitize_text(doc.core_type),
                    datetime_to_sqltime(doc.core_created),
                    datetime_to_sqltime(doc.core_last_modified),
                    sanitize_text(doc.core_abstract),
                    sanitize_text(doc.core_source),
                    doc.core_year,
                    sanitize_text(authors_string),
                    sanitize_text(keywords_string),
                    sanitize_text(tags_string),
                    sanitize_text(doc.doc_website),
                    sanitize_text(doc.conf_website),
                    doc.conf_month,
                    sanitize_text(doc.conf_pages),
                    sanitize_text(doc.conf_city),
                    sanitize_text(bibtex),
                ),
            )