Exemple #1
0
def create_user(username, first_name, last_name, email, affiliation, language,
                role):
    """
    This function creates an user with the given properties. Moreover: it
    generates a passwords and emails it to the new user.

    Raises: smtplib.SMTPException, django.db.utils.DatabaseError
    """
    password = toolkit.random_alphanum(7)
    log.info("Creating new user: {username}".format(**locals()))

    u = _create_user(username,
                     first_name,
                     last_name,
                     email,
                     affiliation,
                     language,
                     role,
                     password=password)

    log.info("Created new user, sending email...")
    html = render(get_request(), "welcome_email.html", locals()).content
    text = render(get_request(), "welcome_email.txt", locals()).content
    sendmail.sendmail(settings.DEFAULT_FROM_EMAIL, email, 'Welcome to AmCAT!',
                      html, text)
    log.info("Email sent, done!")
    return u
Exemple #2
0
    def highlight_fragments(self,
                            query: str,
                            fields: Sequence[str],
                            mark="mark",
                            add_filter=False,
                            number_of_fragments=3,
                            fragment_size=150) -> Dict[int, Dict[str, List[str]]]:
        """
        Highlight articles but only return fragments.

        @param query: Lucene query
        @param fields: fields to highlight
        @param mark: html tag to mark highlights.
        @param add_filter: Indicates whether you also want to *filter* documents on this
                            highlight. If True, only documents with highlighting will be
                            returned.
        @param number_of_fragments: Number of fragments to include
        @param fragment_size: size of fragments in characters (bytes/unicode codepoints, not
                              formalized by elasticsearch..)
        @return: A dictionary mapping and article id to a dictionary mappping fieldnames
                 to a list of fragments
        """
        # Pass highlight options to "normal" highlighter, generate default query
        random_mark = toolkit.random_alphanum(20)
        new = self.highlight(query, fields=fields, mark=random_mark, add_filter=add_filter)
        dsl = new.get_query(new.highlights[-1])

        # Set highlight options for fragments
        for field in fields:
            dsl["highlight"]["fields"][field] = {
                "number_of_fragments": number_of_fragments,
                "fragment_size": fragment_size,
                "no_match_size": fragment_size
            }

        # Parse result
        articles = collections.OrderedDict()
        for hit in new._do_query(dsl)["hits"]["hits"]:
            articles[hit["_source"]["id"]] = {
                field: hit["highlight"][field] for field in fields
            }

        # HACK: Elastic does not escape html tags *in the article*. We therefore pass a random
        # marker and use it to escape ourselves.
        double_random_mark = random_mark + random_mark
        for article in articles.values():
            for field in list(article.keys()):
                texts = article[field]
                for i, text in enumerate(texts):
                    text = text.replace("<{}>".format(random_mark), random_mark)
                    text = text.replace("</{}>".format(random_mark), double_random_mark)
                    text = html.escape(text)
                    text = text.replace(double_random_mark, "</{}>".format(mark))
                    text = text.replace(random_mark, "<{}>".format(mark))
                    texts[i] = text

        return articles
Exemple #3
0
    def highlight_fragments(self,
                            query: str,
                            fields: Sequence[str],
                            mark="mark",
                            add_filter=False,
                            number_of_fragments=3,
                            fragment_size=150) -> Dict[int, Dict[str, List[str]]]:
        """
        Highlight articles but only return fragments.

        @param query: Lucene query
        @param fields: fields to highlight
        @param mark: html tag to mark highlights.
        @param add_filter: Indicates whether you also want to *filter* documents on this
                            highlight. If True, only documents with highlighting will be
                            returned.
        @param number_of_fragments: Number of fragments to include
        @param fragment_size: size of fragments in characters (bytes/unicode codepoints, not
                              formalized by elasticsearch..)
        @return: A dictionary mapping and article id to a dictionary mappping fieldnames
                 to a list of fragments
        """
        # Pass highlight options to "normal" highlighter, generate default query
        random_mark = toolkit.random_alphanum(20)
        new = self.highlight(query, fields=fields, mark=random_mark, add_filter=add_filter)
        dsl = new.get_query(new.highlights[-1])

        # Set highlight options for fragments
        for field in fields:
            dsl["highlight"]["fields"][field] = {
                "number_of_fragments": number_of_fragments,
                "fragment_size": fragment_size,
                "no_match_size": fragment_size
            }

        # Parse result
        articles = collections.OrderedDict()
        for hit in new._do_query(dsl)["hits"]["hits"]:
            articles[hit["fields"]["id"][0]] = {
                field: hit["highlight"][field] for field in fields
            }

        # HACK: Elastic does not escape html tags *in the article*. We therefore pass a random
        # marker and use it to escape ourselves.
        double_random_mark = random_mark + random_mark
        for article in articles.values():
            for field in list(article.keys()):
                texts = article[field]
                for i, text in enumerate(texts):
                    text = text.replace("<{}>".format(random_mark), random_mark)
                    text = text.replace("</{}>".format(random_mark), double_random_mark)
                    text = html.escape(text)
                    text = text.replace(double_random_mark, "</{}>".format(mark))
                    text = text.replace(random_mark, "<{}>".format(mark))
                    texts[i] = text

        return articles
Exemple #4
0
 def test_random_alphanum(self):
     self.assertEqual(len(toolkit.random_alphanum(1000)), 1000)
     self.assertEqual(len(toolkit.random_alphanum(100)), 100)
     self.assertEqual(len(toolkit.random_alphanum(80)), 80)
     self.assertEqual(len(toolkit.random_alphanum(60)), 60)
     self.assertNotEqual(toolkit.random_alphanum(100),
                         toolkit.random_alphanum(100))
Exemple #5
0
def create_user(username, first_name, last_name, email,  password=None):
    """
    This function creates an user with the given properties. Moreover: it
    generates a passwords and emails it to the new user.

    Raises: smtplib.SMTPException, django.db.utils.DatabaseError
    """
    email_password = (password is None)
    if password is None:
        password = toolkit.random_alphanum(7)
        
    log.info("Creating new user: {username}".format(**locals()))

    u = _create_user(username, first_name, last_name, email, password=password)

    log.info("Created new user, sending email...")
    html = render(get_request(), "welcome_email.html", locals()).content
    text = render(get_request(), "welcome_email.txt", locals()).content
    sendmail.sendmail(settings.DEFAULT_FROM_EMAIL, email, 'Welcome to AmCAT!',
                      html, text)
    log.info("Email sent, done!")
    return u
Exemple #6
0
def get_secret():
    """
    Get or create a secret key to sign cookies with.

    ~/.cookie-secret will be used to store the secret key.
    """
    sfile = os.path.expanduser("~/.cookie-secret")

    if os.path.exists(sfile):
        if os.path.isfile(sfile):
            try:
                return open(sfile).read()
            except IOError as e:
                print("%r is not readable!" % sfile)
                raise
        else:
            print("%r is not a file." % sfile)
            raise (Exception())

    with open(sfile, 'w') as sfile:
        sfile.write(random_alphanum(40))

    return get_secret()
Exemple #7
0
def get_fragments(query: str, article_ids: Sequence[int], fragment_size=150, number_of_fragments=3):
    order_to_keep = article_ids

    if not query:
        query = toolkit.random_alphanum(20)

    articles = Article.objects.defer("text", "title").in_bulk(article_ids)
    qs = ESQuerySet().filter(id__in=article_ids)
    fragments = qs.highlight_fragments(query, ("text", "title"), mark="em", fragment_size=fragment_size, number_of_fragments=number_of_fragments)
    for article_id, fields in fragments.items():
        
        if number_of_fragments == 0:
            articles[article_id].text = ""
            continue

        articles[article_id]._highlighted = True  # Disable save()
        for field, highlights in fields.items():
            if len(highlights) > 1:
                fragment = "<p>... " + " ...</p><p>... ".join(h.strip().replace("\n", " ") for h in highlights) + " ...</p>"
            else:
                fragment = highlights[0]
            setattr(articles[article_id], field, fragment)
    return [articles[id] for id in order_to_keep]
Exemple #8
0
def get_fragments(query: str,
                  article_ids: Sequence[int],
                  fragment_size=150,
                  number_of_fragments=3):
    if not query:
        query = toolkit.random_alphanum(20)

    articles = Article.objects.defer("text", "title").in_bulk(article_ids)
    qs = ESQuerySet().filter(id__in=article_ids)
    fragments = qs.highlight_fragments(query, ("text", "title"),
                                       mark="em",
                                       fragment_size=fragment_size,
                                       number_of_fragments=number_of_fragments)
    for article_id, fields in fragments.items():
        articles[article_id]._highlighted = True  # Disable save()
        for field, highlights in fields.items():
            if len(highlights) > 1:
                fragment = "<p>... " + " ...</p><p>... ".join(
                    h.strip().replace("\n", " ")
                    for h in highlights) + " ...</p>"
            else:
                fragment = highlights[0]
            setattr(articles[article_id], field, fragment)
    return articles.values()
Exemple #9
0
from collections import ChainMap
from typing import Iterable, Any, Union, Sequence, Dict, Tuple, List
from typing import Optional

from django.conf import settings
from django.db.models import QuerySet
from django.http import QueryDict

from amcat.models import get_used_properties_by_articlesets, ArticleSet, Project
from amcat.tools import queryparser
from amcat.tools import toolkit
from amcat.tools.amcates import ALL_FIELDS, ES, get_property_primitive_type
from amcat.tools.queryparser import Term


TOKEN_START = toolkit.random_alphanum(16)
TOKENIZER_PATTERN = settings.ES_SETTINGS["analysis"]["tokenizer"]["unicode_letters_digits"]["pattern"]
TOKENIZER_INV = regex.compile(TOKENIZER_PATTERN.replace("^", "") + "+")
TOKENIZER = regex.compile(TOKENIZER_PATTERN)


def tokenize_highlighted_text(text: str, marker: str):
    start_marker = "<{}>".format(marker)
    stop_marker = "</{}>".format(marker)

    # Get rid of html tags. Instead, replace them by unique tokens
    text = text.replace(start_marker, TOKEN_START).replace(stop_marker, "")

    for token in TOKENIZER.split(text):
        if token:
            yield token.startswith(TOKEN_START)
Exemple #10
0
from collections import ChainMap
from typing import Iterable, Any, Union, Sequence, Dict, Tuple, List
from typing import Optional

from django.conf import settings
from django.db.models import QuerySet
from django.http import QueryDict

from amcat.models import get_used_properties_by_articlesets, ArticleSet, Project
from amcat.tools import queryparser
from amcat.tools import toolkit
from amcat.tools.amcates import ALL_FIELDS, ES, get_property_primitive_type
from amcat.tools.queryparser import Term


TOKEN_START = toolkit.random_alphanum(16)
TOKENIZER_PATTERN = settings.ES_SETTINGS["analysis"]["tokenizer"]["unicode_letters_digits"]["pattern"]
TOKENIZER_INV = regex.compile(TOKENIZER_PATTERN.replace("^", "") + "+")
TOKENIZER = regex.compile(TOKENIZER_PATTERN)


def tokenize_highlighted_text(text: str, marker: str):
    start_marker = "<{}>".format(marker)
    stop_marker = "</{}>".format(marker)

    # Get rid of html tags. Instead, replace them by unique tokens
    text = text.replace(start_marker, TOKEN_START).replace(stop_marker, "")

    for token in TOKENIZER.split(text):
        if token:
            yield token.startswith(TOKEN_START)
Exemple #11
0
 def test_random_alphanum(self):
     self.assertEqual(len(toolkit.random_alphanum(1000)), 1000)
     self.assertEqual(len(toolkit.random_alphanum(100)), 100)
     self.assertEqual(len(toolkit.random_alphanum(80)), 80)
     self.assertEqual(len(toolkit.random_alphanum(60)), 60)
     self.assertNotEqual(toolkit.random_alphanum(100), toolkit.random_alphanum(100))
Exemple #12
0
# password=secret
# host=localhost
# port=5432
#
# [caching-default]
# backend=django.core.cache.backends.memcached.MemcachedCache
# location=127.0.0.1:11211

def sections(identifier):
    c = configparser.ConfigParser()
    c.readfp(file(os.path.expanduser('~/.amcatrc3')))

    for sect in c.sections():
        db = sect.split('-')
        if db[0] == identifier and len(db) is 2:
            yield db[1], c.items(sect)

def filldict(vals, dic):
    for id, opts in vals:
        dic[id] = {}
        for k,v in opts:
            dic[id][k.upper()] = v
    return dic

DATABASES = filldict(sections('db'), dict())
if os.environ.get("DJANGO_DB_ENGINE"):
    DATABASES["default"]["ENGINE"] = os.environ.get("DJANGO_DB_ENGINE")
CACHES = filldict(sections('caching'), dict())

SECRET_KEY = random_alphanum(30)