def testTextNotEmptyStrict(self):
     """Verify strict text returns content, if text is provided and
     not null.
     """
     a = AutoTagify()
     a.text = 'This is a test'
     a.css = 'taggable'
     self.assertEqual(a.tag_list(), ['test'])
     self.assertEqual(a.generate(), 'This is a <a href="/test" class="taggable">test</a> ')
 def testTextNotEmptyNotStrict(self):
     """Verify non-strict text returns content, if text is provided and
     not null.
     """
     a = AutoTagify()
     a.text = 'These are my tests'
     a.css = 'taggable'
     self.assertEqual(a.tag_list(), ['are', 'test'])
     self.assertEqual(a.generate(strict=False),
                      'These <a href="/are" class="taggable">are</a> my <a href="/tests" class="taggable">tests</a> ')
def window():
    global lastFilename
    name,ext=os.path.splitext(request.args.get('type'))
    filenamenew=name+ext
    lastFilename=filenamenew
    status=1
    
    if ext==".pdf":
        # creating a pdf file object 
        pdfFileObj = open(UPLOAD_FOLDER+"/"+filenamenew, 'rb')    
        # creating a pdf reader object 
        pdfReader = PyPDF2.PdfFileReader(pdfFileObj)      
        # printing number of pages in pdf file 
        print(pdfReader.numPages) 
        #print(pdfReader.getDocumentInfo())
        #print(pdfReader.getIsEncrypted())                
        # creating a page object 
        bundle=""
        for i in range(1,pdfReader.numPages):
            pageObj = pdfReader.getPage(i)
            # extracting text from page 
            #print(pageObj.extractText())
            bundle+=pageObj.extractText()      
        #print(bundle)
        # closing the pdf file object 
        pdfFileObj.close()
        
        #Auto tagging
        t = AutoTagify()
        t.text = bundle
        #print(t.tag_list())
        e_words = list(dict.fromkeys(t.tag_list()))  
        #print(e_words)

    else:
        file = open(UPLOAD_FOLDER+"/"+filenamenew,"r+") 
        #print(type(file.read()))

        t = AutoTagify()
        t.text = file.read()
        #print(len(t.tag_list()))
        e_words = list(dict.fromkeys(t.tag_list()))  
        #print(e_words)
        file.close() 
    
    #Summarization
    summary=generate_summary(UPLOAD_FOLDER+"/"+filenamenew,5)

    conn = sqlite3.connect('TAGS.db')
    #c = conn.cursor()
    # Insert a row of data
    conn.execute('''INSERT INTO Tag (Filename,Auto_tag,Manual_tag,Summary,status) VALUES (?,?,?,?,?)''',(filenamenew, str(e_words),str([]),str(summary),status))
            
    # Save (commit) the changes
    conn.commit()
    conn.close()
        
    return render_template('window.html',F=filenamenew,L=e_words)
Example #4
0
# -*- coding: utf-8 -*-
import base64
import os
import random
import time

from auto_tagify import AutoTagify
from boto.s3.key import Key
from PIL import Image
from pymongo import DESCENDING
from pymongo.objectid import ObjectId

import settings

CONTENT_TYPE = 'image/jpeg'
ATAG = AutoTagify()
ATAG.link = "/tag"
RECENT_LIMIT = 12


class Snappy(object):
    """All the snapshot functionality"""
    def __init__(self):
        self.token = ''
        self.env = 'dev'
        self.db = settings.DATABASE

    def set_environment(self, env='dev'):
        if env == 'test':
            self.env = env
            self.db = settings.TEST_DATABASE
Example #5
0
import urllib
import math
import re
import web
from web import session
from datetime import datetime
from redis import Redis
from auto_tagify import AutoTagify

tag = AutoTagify()
tag.link = '/tags'
clean_word = re.compile('[\[\],().:"\'?!*<>/\+={}`~\n\r\t]')
clean_quotes = re.compile('(%27)|()')
r = Redis()
record_max_length = 50

class TapeChatTag(object):
  def __init__(self):
    self.all_entries = ''
    self.next_page = 1
    self.prev_page = 0
    self.page_value = 0
    self.start_pos = 0
    self.text_entries = ''  
    
  def generate(self,user_id):
    if self.next_page < 0: self.next_page = 0
    if self.prev_page < 0: self.prev_page = 0
    if self.page_value < 0: self.page_value = 0
     
    try:
import feedparser
import re
import urllib
from redis import Redis
from BeautifulSoup import BeautifulSoup
from auto_tagify import AutoTagify
from tapechat_tag import TapeChatTag
from time import time

VALID_TAGS = ['p']
tag = AutoTagify()
tag.link = '/tags'
tapechat_tag = TapeChatTag()
r = Redis()
text_unique = []

feeds = r.sort("global:feeds",desc=True)
circle_sym = re.compile('(&#8226;)|(\xe2\x80\xa2)')
p_tags = re.compile('(<p>)|(</p>)')

for feed_id in feeds:
  feed = r.get("fid:" + str(feed_id) + ":url")
  rss = feedparser.parse(feed)
  for entry in rss.entries:
    if not r.exists("guid:" + str(entry.guid) + ":fid"):
      clean_text = BeautifulSoup(entry.summary)
      for t in clean_text.findAll(True):
        if t.name not in VALID_TAGS: t.hidden = True
      tag.text = p_tags.sub(' ',clean_text.renderContents())
      sanitized_text = tag.generate()
      text_id = r.incr("global:nextTextId")
 def testTagsNotEmpty(self):
     """Verify that tags are returned."""
     a = AutoTagify()
     a.text = 'This is a test with other valid tags'
     test_array = ['test', 'other', 'valid', 'tag']
     self.assertEqual(a.tag_list(), test_array)
 def testTextEmpty(self):
     """Verify sending no text returns nothing."""
     a = AutoTagify()
     self.assertEqual(a.generate(), '')