Example #1
0
def add():
    form = AddUrlForm()

    if request.method == 'POST':
        # submitted either with add1 (first submission) or add2 (there was similar matches, ask for confirmation)
        if not form.validate():
            return dict(form=form)

        url = form.url.data
        if MongoURL.exists(url):
            return dict(form=form, url_exists=True)

        if form.add1.data:
            # first submission: check for similar matches
            # TODO use urllib.urlparse
            base_url = url.split('?')[0]
            if base_url.endswith('/'):
                base_url = base_url[:-1]
            similar_urls = MongoURL.objects(url__icontains=base_url).paginate(
                page=1, per_page=10)
            if similar_urls.total > 0:
                return dict(form=form, similar_urls=similar_urls)

        mu = MongoURL.create(url,
                             source=Source(type_=SourceType.USER,
                                           extra=current_user.id))  #.save()
        flash_success(
            Markup(
                f'The url has been added: <a href=""><code>{mu.id}</code></a>')
        )
        return redirect(url_for(request.endpoint))

    return dict(form=form)
Example #2
0
def _delete_url(
        mu: MongoURL):  # TODO: not working !!! changes not saved in Mongo ???
    # remove all sentences
    all_sentences = MongoSentence.objects(url=mu.url).fields(id=True)
    MongoSentence.mark_deleted(all_sentences, current_user.id)
    # blacklist url
    MongoURL.try_delete(id=mu.id)  # remove URL if exists
    MongoBlacklist.add_url(mu.url)

    return len(all_sentences)
Example #3
0
def view():
    if request.method == 'GET':
        form = SearchUrlsForm.from_get()
        page = form.get_page_and_reset()  # get the parameter, then reset
        if form.is_blank():
            return dict(form=form, urls=[], collapse=False)
        else:
            query_params = form.get_mongo_params()
            urls = MongoURL.objects(**query_params) \
                .order_by("%s%s" % ('' if form.sort_order.data else '-', form.sort.data)) \
                .paginate(page, per_page=20)
            collapse = urls.total > 0

        return dict(form=form,
                    delete_form=DeleteUrlsForm(),
                    urls=urls,
                    collapse=collapse)

    else:
        delete_form = DeleteUrlsForm()

        if delete_form.go.data:
            # the delete form is only available after search,
            # so the url should contain some query parameters

            if not delete_form.validate():
                # ensure the checkbox is selected to avoid silly mistakes
                flash_error('You must know what you are doing.')
                return redirect(request.url)

            form = SearchUrlsForm.from_get()
            if not form.search.data:
                # don't let the user delete everything
                flash_error('You must have some filtering before delete.')
                return redirect(request.url)

            urls = MongoURL.objects(**form.get_mongo_params())
            ud, sd = 0, 0
            for url in urls:
                try:
                    sd += _delete_url(url)
                    ud += 1
                except Exception as e:
                    print(e)

            flash_success(f'Delete {ud}/{len(urls)} urls ({sd} sentences).')
            return redirect(request.url)

        return SearchUrlsForm.redirect_as_get()
Example #4
0
def details(id):
    form = DeleteUrlForm()
    mu = MongoURL.objects(id=id).get_or_404()

    if request.method == 'POST' and form.validate():
        num_sentences = _delete_url(mu)
        msg = f'URL <small>{mu.url}</small> (id: <code>{mu.id}</code>) has been blacklisted.'
        if num_sentences > 0:
            msg += f'<br>{num_sentences} sentence(s) deleted.'
        flash_success(Markup(msg))
        return redirect(request.args.get('next') or url_for('.view'))

    page = int(request.args.get('page', 1))

    sentences = MongoSentence.objects(
        url=mu.url).order_by('-date_added').paginate(page=page, per_page=20)
    return dict(form=form, url=mu, sentences=sentences)
Example #5
0
def label_from_url():
    param_uid = request.args.get('uid', '')
    param_sid = request.args.get('sid', '')

    if not param_uid:
        # no url specified: redirect to single labelling view
        flash_error('No url specified.')
        return redirect(url_for('.label_one', id=param_sid))

    form = FromUrlPostForm()
    if request.method == 'POST':  # we have a post: save the labels
        if not form.validate():
            flash_form_errors(form)
        elif form.save.data:
            saved_count = _label_all_from_form(request.form)
            flash_success("Labelled %d sentences." % saved_count)
        # redirect as GET
        return redirect(
            url_for(request.endpoint, dialect=form.dialect.data,
                    uid=param_uid))
    else:  # this is a get: display the form to label all urls from the uid
        # first, get the URL object
        mu = MongoURL.get(id=param_uid)
        if mu is None:
            # non-existant URL: redirect to single labelling view
            flash_warning(
                Markup(f'The URL <code>{param_uid}</code> does not exist.'))
            return redirect(url_for('.label_one'))
        # Now, get the sentences from the url, ordered by date_added
        sentences = MongoSentence.objects(**base_mongo_params(), url=mu.url) \
            .order_by('date_added') \
            .paginate(page=1, per_page=50)
        if sentences.total == 0:
            # no sentences, redirect to single labelling view
            flash_warning(
                Markup(
                    f'No sentence left to label for URL  <code>{param_uid}</code>.<br>'
                    '<small>Either they are not validated or you already labelled them</small>.'
                ))
            return redirect(url_for('.label_one'))
        # set default dialect and return
        form.dialect.data = request.args.get('dialect', '')
        return dict(form=form,
                    param_sid=param_sid,
                    url=mu.url,
                    sentences=sentences)
Example #6
0
def validate():
    form = ValidationForm()
    param_uid = request.args.get('uid', None)
    url = None  # set if the uid parameter is present AND valid

    if request.method == 'POST' and form.validate():
        ss = MongoSentence.objects(id__in=form.sentences_ids.data.split(","))
        ss.update(add_to_set__validated_by=current_user.id)
        flash.flash_success('%d sentences validated.' % ss.count())
        return redirect(
            url_for(request.endpoint,
                    **request.args))  # avoid form resubmission on refresh

    mongo_params = dict(validated_by__nin=[current_user.id],
                        deleted__exists=False)

    if param_uid is not None:
        # add a filter to the Mongo Query (only if the uid is valid)
        mu = MongoURL.get(id=param_uid)
        if mu is None:
            flash.flash_error(
                Markup(f'url <code>{mu.id}</code> does not exist.'))
        else:
            url = mu.url
            mongo_params['url'] = url

    sentences = MongoSentence.objects(**mongo_params) \
        .order_by('url', 'date_added') \
        .limit(_per_page)

    if not sentences and param_uid:
        # we were displaying urls from a given URL, but they are all
        # validated now... Redirect to the main view
        flash.flash_warning(
            Markup(
                f'No more sentences to validate from URL <code>{param_uid}</code>.'
            ))
        return redirect(url_for(request.endpoint))

    form.sentences_ids.data = ",".join((s.id for s in sentences))
    return dict(form=form, sentences=sentences, url=url)
Example #7
0
def details(id):
    form = DeleteSeedForm()

    if request.method == 'POST' and form.validate():
        seed = MongoSeed.objects.with_id(id)
        if seed.search_history:
            seed.mark_deleted(seed, current_user.id, form.comment.data)
        else:
            seed.delete()
            return redirect(url_for('.view'))
        return redirect(url_for(request.endpoint, id=id))

    seed = MongoSeed.objects(id=id).get_or_404()
    # TODO : show pertinence information here as well ?
    # from .forms import get_default_seeds_pipeline
    # pipeline = get_default_seeds_pipeline()
    # pipeline.append({'$match': {'_id': id}})
    # seed_2 = MongoSeed.objects.aggregate(*pipeline).next()
    page = int(request.args.get('page', 1))
    urls = MongoURL.objects(source__extra=id).order_by('-date_added').paginate(
        page=page, per_page=10)
    return dict(form=form, s=seed, urls=urls)
Example #8
0
def details(id):
    mt = MongoText.objects(id=id).get_or_404()
    # use simplenamespace to simulate a pagination result, so we can reuse
    # the urls/_table.html template to show urls
    urls = SimpleNamespace(items=MongoURL.objects(id__in=mt.urls))
    return dict(mongo_text=mt, urls=urls)