def window(): global lastFilename name,ext=os.path.splitext(request.args.get('type')) filenamenew=name+ext lastFilename=filenamenew status=1 if ext==".pdf": # creating a pdf file object pdfFileObj = open(UPLOAD_FOLDER+"/"+filenamenew, 'rb') # creating a pdf reader object pdfReader = PyPDF2.PdfFileReader(pdfFileObj) # printing number of pages in pdf file print(pdfReader.numPages) #print(pdfReader.getDocumentInfo()) #print(pdfReader.getIsEncrypted()) # creating a page object bundle="" for i in range(1,pdfReader.numPages): pageObj = pdfReader.getPage(i) # extracting text from page #print(pageObj.extractText()) bundle+=pageObj.extractText() #print(bundle) # closing the pdf file object pdfFileObj.close() #Auto tagging t = AutoTagify() t.text = bundle #print(t.tag_list()) e_words = list(dict.fromkeys(t.tag_list())) #print(e_words) else: file = open(UPLOAD_FOLDER+"/"+filenamenew,"r+") #print(type(file.read())) t = AutoTagify() t.text = file.read() #print(len(t.tag_list())) e_words = list(dict.fromkeys(t.tag_list())) #print(e_words) file.close() #Summarization summary=generate_summary(UPLOAD_FOLDER+"/"+filenamenew,5) conn = sqlite3.connect('TAGS.db') #c = conn.cursor() # Insert a row of data conn.execute('''INSERT INTO Tag (Filename,Auto_tag,Manual_tag,Summary,status) VALUES (?,?,?,?,?)''',(filenamenew, str(e_words),str([]),str(summary),status)) # Save (commit) the changes conn.commit() conn.close() return render_template('window.html',F=filenamenew,L=e_words)
def testTextNotEmptyStrict(self): """Verify strict text returns content, if text is provided and not null. """ a = AutoTagify() a.text = 'This is a test' a.css = 'taggable' self.assertEqual(a.tag_list(), ['test']) self.assertEqual(a.generate(), 'This is a <a href="/test" class="taggable">test</a> ')
def testTextNotEmptyNotStrict(self): """Verify non-strict text returns content, if text is provided and not null. """ a = AutoTagify() a.text = 'These are my tests' a.css = 'taggable' self.assertEqual(a.tag_list(), ['are', 'test']) self.assertEqual(a.generate(strict=False), 'These <a href="/are" class="taggable">are</a> my <a href="/tests" class="taggable">tests</a> ')
feeds = r.sort("global:feeds",desc=True) circle_sym = re.compile('(•)|(\xe2\x80\xa2)') p_tags = re.compile('(<p>)|(</p>)') for feed_id in feeds: feed = r.get("fid:" + str(feed_id) + ":url") rss = feedparser.parse(feed) for entry in rss.entries: if not r.exists("guid:" + str(entry.guid) + ":fid"): clean_text = BeautifulSoup(entry.summary) for t in clean_text.findAll(True): if t.name not in VALID_TAGS: t.hidden = True tag.text = p_tags.sub(' ',clean_text.renderContents()) sanitized_text = tag.generate() text_id = r.incr("global:nextTextId") r.set("text:" + str(text_id), sanitized_text) r.set("guid:" + str(entry.guid) + ":fid", feed_id) r.set("text:" + str(text_id) + ":timestamp",str(time())) r.set("text:" + str(text_id) + ":uid", r.get("fid:" + str(feed_id) + ":uid")) for tag_word in set(tag.tag_list()): tag_word = circle_sym.sub('',str(tag_word)) if len(urllib.unquote(tag_word)) > 2: tag_word = urllib.quote(str(tag_word)) if not r.exists("word:" + tag_word + ":tid"): tapechat_tag.add(tag_word,feed_id) r.incr("word:" + tag_word + ":count") r.incr("tid:" + str(r.get("word:" + tag_word + ":tid")) + ":count") r.incr("uid:" + str(r.get("fid:" + str(feed_id) + ":uid")) + ":" + tag_word + ":count") r.push("uid:" + str(r.get("fid:" + str(feed_id) + ":uid")) + ":" + tag_word + ":texts",text_id) r.push("word:" + tag_word + ":texts",text_id) r.save()
def testTagsNotEmpty(self): """Verify that tags are returned.""" a = AutoTagify() a.text = 'This is a test with other valid tags' test_array = ['test', 'other', 'valid', 'tag'] self.assertEqual(a.tag_list(), test_array)