Esempio n. 1
0
def test_update_page(mock_model, client, admin):
	# Given
	page = Page(name='Page', text='coucou')
	page.create()

	update_obj = {"_id": "page", "name": "Super Page", "text": "<b>coucou</b> <script>lol()</script>"}

	# When
	res = client.put(url_for('api.update_page', page_id='page'), data=json.dumps(update_obj), content_type='application/json')
	data = res.json

	assert res.status_code == 200
	assert data['data']['name'] == 'Super Page'
	assert data['data']['_id'] == 'page'
	assert data['data']['text'] == '<b>coucou</b> &lt;script&gt;lol()&lt;/script&gt;'
def get_pages_from_json(json) -> Dict[str, Page]:
    categorizer = Categorizer()
    pages_to_return = {}
    response_pages = get_hits(json)
    for page in response_pages:
        page_info = page.get("_source")
        url = page_info.get("raw_url")
        if is_url_valid(url):
            page_id = page.get("_id")
            title = page_info.get("title")
            response_links = page_info.get("links")
            last_updated = page_info.get("updated_on")
            links = get_links_from_json(response_links)
            content = page_info.get("content")
            category_name = categorizer.categorize(bytes(content, 'utf-8').decode('utf-8', 'ignore'))
            category = Category(
                    name=category_name,
                    occurrence=1,
            )
            pages_to_return[url] = Page(
                id=page_id,
                url=url,
                title=title,
                last_updated=last_updated,
                links=links,
                content=content,
                categories=[category]
            )
        else:
            print("cannot find url for source: {}".format(page_info))
    return pages_to_return
Esempio n. 3
0
def create_pages(self, data: dict, uuid: str):
    """
    Take formatted JSON log data and save it to a log.
    :param self: Task instance, supplied by Celery.
    :param data: Formatted data.
    :param uuid: Log uuid.
    :return: Log uuid.
    """
    progress = WebSocketProgressRecorder(self)

    messages = data.pop('messages')

    batch_list = []
    batches = range(0, len(messages), 1000)
    total = len(batches)
    for count, batch in enumerate(batches):
        progress.set_progress(count, total)
        # [[0, 1, 2...], [1000, 1001, 1002...], [2000, 2001, 2002...]...]
        batch_list.append(messages[batch:batch +
                                   1000])  # Split messages by the 1000

    while not Log.objects.filter(uuid=uuid).exists():
        time.sleep(1)
    log = Log.objects.update_or_create(uuid=uuid,
                                       defaults={'users':
                                                 data.pop('users')})[0]
    pages = Page.objects.bulk_create([
        Page(log=log, messages=batch_list[i], index=i)
        for i in range(len(batch_list))
    ])
    log.pages.set(pages)
    progress.set_progress(total, total)

    return uuid
Esempio n. 4
0
def make_custom_page(url: str,
                     links: List[str] = [],
                     category: str = 'Social',
                     content: str = 'Social page content') -> Page:
    return Page(url=url,
                categories=[Category(name=category, occurrence=1)],
                content=content,
                links=[Link(link=lin, occurrences=1) for lin in links])
Esempio n. 5
0
def create_page_document(db_session: Session, document_id: uuid.UUID):
    """
    New page with document id
    """
    page = Page(document=document_id)
    db_session.add(page)
    db_session.commit()
    return page.id
Esempio n. 6
0
def create_page_old(db_session: Session, page_id: uuid.UUID):
    """
    New page with old page data copied
    """
    old_page = (db_session.query(Page).filter(Page.id == page_id).filter(
        Page.processed_page == None).first())
    if not old_page:
        raise NotFoundException
    page = Page(rotation=old_page.rotation, document=old_page.document)
    db_session.add(page)
    db_session.flush()
    old_page.processed_page = page.id

    db_session.commit()
    return page.id
Esempio n. 7
0
def guarantee_pages_for_links(pages: Dict[str, Page]) -> Dict[str, Page]:
    """
    :param pages: Original pages from the database with possible links to non-existent pages
    :type pages: Dict[Page]
    :return: Pages with links to guaranteed pages
    :rtype: Dict[Page]
    """
    guaranteed_pages = dict(pages)
    for page_url, page in pages.items():
        links = page.links
        for link in links:
            link_url = link.link
            link_page = guaranteed_pages.get(link_url)
            if not link_page:
                link_page = Page(id=link_url, url=link_url)
                guaranteed_pages[link_url] = link_page

    return guaranteed_pages
Esempio n. 8
0
 def create_from(cls, procedure_id, display_index):
     fields_dict = {
         'display_index': display_index,
         'procedure_id': procedure_id,
     }
     return Page(**fields_dict)
Esempio n. 9
0
def page():
    return Page(name='Super Page', text='<p>Youpi !</p>')
Esempio n. 10
0
def dirty_page():
    return Page(name='<script>niark()</script> Super Page',
                text='<p>Youpi !</p> <abc>super</abc>')
Esempio n. 11
0
def page_summary(request):
  success = False
  user = request.user
  data = {}
  errors = {}
  errors['page_summary'] = []
  data['summary'] = {
    'summary': '',
  }

  if request.GET:
    url = process_url(request.GET.get('url'))

    try:
      p = Page.objects.get(url=url)
      s, s_created = Summary.objects.get_or_create(page=p)

      from_zone = tz.tzutc()
      to_zone = tz.tzlocal()

      date = s.date.replace(tzinfo=from_zone)
      local = date.astimezone(to_zone)

      data['summary'] = {
        'summary': s.summary,
        'user': s.last_editor.username,
        'date': local.strftime('%b %m, %Y,  %I:%M %p'),
      }
      success = True
    except:
      errors['page_summary'].append('Could not get page ' + url)

  if request.POST:
    url = process_url(request.POST.get('url'))
    domain = '{uri.netloc}'.format(uri=urlparse(url))
    summary = request.POST.get('summary')

    try:
      d, d_created = Domain.objects.get_or_create(url=domain)
      d.save()

      if len(Page.objects.filter(url=url)) == 0:
        p = Page(url=url, domain=d)
        p.save()
      else:
        p = Page.objects.get(url=url)

      s, s_created = Summary.objects.get_or_create(page=p)
      prev_summary = s.summary
      sh = SummaryHistory(user=user, new_summary=summary, previous_summary=prev_summary, summary=s)
      sh.save()

      s.summary = summary
      s.last_editor = user
      s.date = datetime.now()
      s.save()

      from_zone = tz.tzutc()
      to_zone = tz.tzlocal()

      local = s.date.replace(tzinfo=from_zone)

      data['summary'] = {
        'summary': summary,
        'user': user.username,
        'date': local.strftime('%b %m, %Y,  %I:%M %p'),
      }
      success = True
    except: 
      errors['page_summary'].append('Could not get page ' + url)

  return {
    'success': success,
    'errors': errors,
    'data': data,
  }
Esempio n. 12
0
def initialize_page(request):
  tags = {}
  errors = {}
  user = request.user
  count_tags = False
  highlights = 0

  if request.POST:
    url = process_url(request.POST.get('url'))
    favIconUrl = request.POST.get('favIconUrl')
    domain_name = request.POST.get('domain_name')
    title = request.POST.get('title')
    add_usertags = request.POST.get('add_usertags')

    domain = '{uri.netloc}'.format(uri=urlparse(url))
    errors['add_page'] = []

    title = url if title == "" else title

    # Add domain
    d, d_created = Domain.objects.get_or_create(url=domain)
    if domain_name is not None:
      d.name = domain_name
    d.save()

    # Add page
    try: 
      p = Page.objects.get(url=url)
      p.title = title
      p.save()
    except:
      if len(Page.objects.filter(url=url)) == 0:
        p = Page(url=url, domain=d)
        p.title = title
        p.save()
        count_tags = True
      else:
        errors['add_page'].append("More than one page exists")

    if len(errors['add_page']) == 0:
      highlights = len(Highlight.objects.filter(page__url=url))
      vts = Tag.objects.filter(page__url=url, highlight=None)
      if len(vts) == 0:
        count_tags = True
      
      for vt in vts:
        vt_info = {
          'user_voted': False,
          'name': vt.common_tag.name,
          'color': vt.common_tag.color,
          'description': vt.common_tag.description,
          'is_private': vt.is_private,
          'vote_count': len(Vote.objects.filter(tag=vt)),
        }

        tags[vt.common_tag.name] = vt_info

        # Add tag to user
        if add_usertags == "true":
          uti, created = UserTagInfo.objects.get_or_create(user=user, page=p, tag=vt)
          uti.save()

      if count_tags:
        errors['get_tc'] = []
        try:
          tc = TagCollection.objects.get(subscribers=user)
          trie = json.loads(tc.trie_blob)
        except: 
          errors['get_tc'].append('User not subscribed')

        if len(errors['get_tc']) == 0:
          # Count value tags for page
          r = requests.get(url, verify=False)
          emotes = countEmote(r.text, trie)
          ts = [(e, emotes[e]) for e in emotes if e]
          sorted(ts, key=lambda x: x[1], reverse=True)

          errors['add_valuetags'] = []

          if len(ts) == 0:
            errors['add_valuetags'].append('No tags counted')

          count = 3
          for tag in ts:
            if tag[1] > 2 and count > 0:
              count -= 1
              name = tag[0]

              # Add tag to page
              try:
                vt = Tag.objects.get(page__url=url, common_tag__name=name, highlight=None)
              except Tag.DoesNotExist:
                try:
                  common_tag = CommonTag.objects.get(name=name)
                  vt = Tag(page=p, common_tag=common_tag, word_count=tag[1])
                  vt.save()

                  # Add tag to user
                  if add_usertags == "true":
                    uti, created = UserTagInfo.objects.get_or_create(user=user, page=p, tag=vt)
                    uti.save()
                except CommonTag.DoesNotExist:
                  errors['add_valuetags'].append("Could not get base tag")

              if len(errors['add_valuetags']) == 0:
                tags[name] = {
                  'name': name,
                  'color': vt.common_tag.color,
                  'description': vt.common_tag.description,
                }

      success = True
      for error_field in errors:
        if errors[error_field] != []:
          success = False

    return {
      'success': success,
      'errors': errors,
      'tags': tags,
      'highlights': highlights,
    }
Esempio n. 13
0
def get_linked_groups(pages: Dict[str, Page],
                      parent_group_id: str = None) -> List[Group]:
    """
    :param parent_group_id:  id of the group of which pages the subgroups will be created
    :type parent_group_id: str
    :param pages: the original pages from db
    :type pages: Dict[str, Page]
    :return: a list of groups with links to other groups with all isolates as a separate group
    :rtype: List[Group]
    """

    mined_data = dict(pages)
    partition_count_last_run = -1
    partition_count = MAX_PARTITION_COUNT + 1
    group_id_to_pages = {}
    number_of_runs = 0
    isolates = []
    linked_groups = []
    original_table_to_alias, original_table_to_original_keys = create_hash_tables(
        mined_data)
    while partition_count > MAX_PARTITION_COUNT and partition_count_last_run != partition_count:
        number_of_runs += 1
        partition_count_last_run = partition_count
        table_to_alias, table_to_original = create_hash_tables(mined_data)
        page_originals_partition, isolated_nodes = get_groups_without_links_and_isolates(
            mined_data, table_to_alias, table_to_original)
        if not isolates and isolated_nodes:
            isolates = isolated_nodes
        linked_groups = get_linked_groups_from_ids(mined_data,
                                                   page_originals_partition,
                                                   parent_group_id)

        new_mined_data = {}
        partition_count = 0
        new_group_ids_with_pages = {}
        for group in linked_groups:
            partition_count += 1
            new_mined_data[group.id] = Page(id=group.id,
                                            url=group.id,
                                            links=group.links,
                                            categories=group.categories)
            new_group_ids_with_pages[group.id] = []
            for subgroup_id in group.members:
                if subgroup_id in group_id_to_pages:
                    new_group_ids_with_pages[
                        group.id] += group_id_to_pages[subgroup_id]
                else:
                    new_group_ids_with_pages[group.id].append(
                        group.members[subgroup_id])

        group_id_to_pages = new_group_ids_with_pages
        mined_data = new_mined_data

    for group in linked_groups:
        group.members = {x.url: x for x in group_id_to_pages[group.id]}

    if parent_group_id is None:
        linked_groups = insert_isolated_nodes_group(
            linked_groups, isolates, pages, original_table_to_original_keys)

    return linked_groups