def create_user(username, first_name, last_name, email, affiliation, language, role): """ This function creates an user with the given properties. Moreover: it generates a passwords and emails it to the new user. Raises: smtplib.SMTPException, django.db.utils.DatabaseError """ password = toolkit.random_alphanum(7) log.info("Creating new user: {username}".format(**locals())) u = _create_user(username, first_name, last_name, email, affiliation, language, role, password=password) log.info("Created new user, sending email...") html = render(get_request(), "welcome_email.html", locals()).content text = render(get_request(), "welcome_email.txt", locals()).content sendmail.sendmail(settings.DEFAULT_FROM_EMAIL, email, 'Welcome to AmCAT!', html, text) log.info("Email sent, done!") return u
def highlight_fragments(self, query: str, fields: Sequence[str], mark="mark", add_filter=False, number_of_fragments=3, fragment_size=150) -> Dict[int, Dict[str, List[str]]]: """ Highlight articles but only return fragments. @param query: Lucene query @param fields: fields to highlight @param mark: html tag to mark highlights. @param add_filter: Indicates whether you also want to *filter* documents on this highlight. If True, only documents with highlighting will be returned. @param number_of_fragments: Number of fragments to include @param fragment_size: size of fragments in characters (bytes/unicode codepoints, not formalized by elasticsearch..) @return: A dictionary mapping and article id to a dictionary mappping fieldnames to a list of fragments """ # Pass highlight options to "normal" highlighter, generate default query random_mark = toolkit.random_alphanum(20) new = self.highlight(query, fields=fields, mark=random_mark, add_filter=add_filter) dsl = new.get_query(new.highlights[-1]) # Set highlight options for fragments for field in fields: dsl["highlight"]["fields"][field] = { "number_of_fragments": number_of_fragments, "fragment_size": fragment_size, "no_match_size": fragment_size } # Parse result articles = collections.OrderedDict() for hit in new._do_query(dsl)["hits"]["hits"]: articles[hit["_source"]["id"]] = { field: hit["highlight"][field] for field in fields } # HACK: Elastic does not escape html tags *in the article*. We therefore pass a random # marker and use it to escape ourselves. double_random_mark = random_mark + random_mark for article in articles.values(): for field in list(article.keys()): texts = article[field] for i, text in enumerate(texts): text = text.replace("<{}>".format(random_mark), random_mark) text = text.replace("</{}>".format(random_mark), double_random_mark) text = html.escape(text) text = text.replace(double_random_mark, "</{}>".format(mark)) text = text.replace(random_mark, "<{}>".format(mark)) texts[i] = text return articles
def highlight_fragments(self, query: str, fields: Sequence[str], mark="mark", add_filter=False, number_of_fragments=3, fragment_size=150) -> Dict[int, Dict[str, List[str]]]: """ Highlight articles but only return fragments. @param query: Lucene query @param fields: fields to highlight @param mark: html tag to mark highlights. @param add_filter: Indicates whether you also want to *filter* documents on this highlight. If True, only documents with highlighting will be returned. @param number_of_fragments: Number of fragments to include @param fragment_size: size of fragments in characters (bytes/unicode codepoints, not formalized by elasticsearch..) @return: A dictionary mapping and article id to a dictionary mappping fieldnames to a list of fragments """ # Pass highlight options to "normal" highlighter, generate default query random_mark = toolkit.random_alphanum(20) new = self.highlight(query, fields=fields, mark=random_mark, add_filter=add_filter) dsl = new.get_query(new.highlights[-1]) # Set highlight options for fragments for field in fields: dsl["highlight"]["fields"][field] = { "number_of_fragments": number_of_fragments, "fragment_size": fragment_size, "no_match_size": fragment_size } # Parse result articles = collections.OrderedDict() for hit in new._do_query(dsl)["hits"]["hits"]: articles[hit["fields"]["id"][0]] = { field: hit["highlight"][field] for field in fields } # HACK: Elastic does not escape html tags *in the article*. We therefore pass a random # marker and use it to escape ourselves. double_random_mark = random_mark + random_mark for article in articles.values(): for field in list(article.keys()): texts = article[field] for i, text in enumerate(texts): text = text.replace("<{}>".format(random_mark), random_mark) text = text.replace("</{}>".format(random_mark), double_random_mark) text = html.escape(text) text = text.replace(double_random_mark, "</{}>".format(mark)) text = text.replace(random_mark, "<{}>".format(mark)) texts[i] = text return articles
def test_random_alphanum(self): self.assertEqual(len(toolkit.random_alphanum(1000)), 1000) self.assertEqual(len(toolkit.random_alphanum(100)), 100) self.assertEqual(len(toolkit.random_alphanum(80)), 80) self.assertEqual(len(toolkit.random_alphanum(60)), 60) self.assertNotEqual(toolkit.random_alphanum(100), toolkit.random_alphanum(100))
def create_user(username, first_name, last_name, email, password=None): """ This function creates an user with the given properties. Moreover: it generates a passwords and emails it to the new user. Raises: smtplib.SMTPException, django.db.utils.DatabaseError """ email_password = (password is None) if password is None: password = toolkit.random_alphanum(7) log.info("Creating new user: {username}".format(**locals())) u = _create_user(username, first_name, last_name, email, password=password) log.info("Created new user, sending email...") html = render(get_request(), "welcome_email.html", locals()).content text = render(get_request(), "welcome_email.txt", locals()).content sendmail.sendmail(settings.DEFAULT_FROM_EMAIL, email, 'Welcome to AmCAT!', html, text) log.info("Email sent, done!") return u
def get_secret(): """ Get or create a secret key to sign cookies with. ~/.cookie-secret will be used to store the secret key. """ sfile = os.path.expanduser("~/.cookie-secret") if os.path.exists(sfile): if os.path.isfile(sfile): try: return open(sfile).read() except IOError as e: print("%r is not readable!" % sfile) raise else: print("%r is not a file." % sfile) raise (Exception()) with open(sfile, 'w') as sfile: sfile.write(random_alphanum(40)) return get_secret()
def get_fragments(query: str, article_ids: Sequence[int], fragment_size=150, number_of_fragments=3): order_to_keep = article_ids if not query: query = toolkit.random_alphanum(20) articles = Article.objects.defer("text", "title").in_bulk(article_ids) qs = ESQuerySet().filter(id__in=article_ids) fragments = qs.highlight_fragments(query, ("text", "title"), mark="em", fragment_size=fragment_size, number_of_fragments=number_of_fragments) for article_id, fields in fragments.items(): if number_of_fragments == 0: articles[article_id].text = "" continue articles[article_id]._highlighted = True # Disable save() for field, highlights in fields.items(): if len(highlights) > 1: fragment = "<p>... " + " ...</p><p>... ".join(h.strip().replace("\n", " ") for h in highlights) + " ...</p>" else: fragment = highlights[0] setattr(articles[article_id], field, fragment) return [articles[id] for id in order_to_keep]
def get_fragments(query: str, article_ids: Sequence[int], fragment_size=150, number_of_fragments=3): if not query: query = toolkit.random_alphanum(20) articles = Article.objects.defer("text", "title").in_bulk(article_ids) qs = ESQuerySet().filter(id__in=article_ids) fragments = qs.highlight_fragments(query, ("text", "title"), mark="em", fragment_size=fragment_size, number_of_fragments=number_of_fragments) for article_id, fields in fragments.items(): articles[article_id]._highlighted = True # Disable save() for field, highlights in fields.items(): if len(highlights) > 1: fragment = "<p>... " + " ...</p><p>... ".join( h.strip().replace("\n", " ") for h in highlights) + " ...</p>" else: fragment = highlights[0] setattr(articles[article_id], field, fragment) return articles.values()
from collections import ChainMap from typing import Iterable, Any, Union, Sequence, Dict, Tuple, List from typing import Optional from django.conf import settings from django.db.models import QuerySet from django.http import QueryDict from amcat.models import get_used_properties_by_articlesets, ArticleSet, Project from amcat.tools import queryparser from amcat.tools import toolkit from amcat.tools.amcates import ALL_FIELDS, ES, get_property_primitive_type from amcat.tools.queryparser import Term TOKEN_START = toolkit.random_alphanum(16) TOKENIZER_PATTERN = settings.ES_SETTINGS["analysis"]["tokenizer"]["unicode_letters_digits"]["pattern"] TOKENIZER_INV = regex.compile(TOKENIZER_PATTERN.replace("^", "") + "+") TOKENIZER = regex.compile(TOKENIZER_PATTERN) def tokenize_highlighted_text(text: str, marker: str): start_marker = "<{}>".format(marker) stop_marker = "</{}>".format(marker) # Get rid of html tags. Instead, replace them by unique tokens text = text.replace(start_marker, TOKEN_START).replace(stop_marker, "") for token in TOKENIZER.split(text): if token: yield token.startswith(TOKEN_START)
# password=secret # host=localhost # port=5432 # # [caching-default] # backend=django.core.cache.backends.memcached.MemcachedCache # location=127.0.0.1:11211 def sections(identifier): c = configparser.ConfigParser() c.readfp(file(os.path.expanduser('~/.amcatrc3'))) for sect in c.sections(): db = sect.split('-') if db[0] == identifier and len(db) is 2: yield db[1], c.items(sect) def filldict(vals, dic): for id, opts in vals: dic[id] = {} for k,v in opts: dic[id][k.upper()] = v return dic DATABASES = filldict(sections('db'), dict()) if os.environ.get("DJANGO_DB_ENGINE"): DATABASES["default"]["ENGINE"] = os.environ.get("DJANGO_DB_ENGINE") CACHES = filldict(sections('caching'), dict()) SECRET_KEY = random_alphanum(30)