# -*- coding: utf-8 -*- ''' Created on Aug 21, 2015 @author: siddban ''' import absummarizer.WGGraph as wg import os import re import nltk from absummarizer.summarizer import segmentize PROJECT_DIR = os.path.dirname(__file__) + "/../" print "Project dir", PROJECT_DIR RESOURCES_DIR = "resources/" stopwords = wg.load_stopwords(RESOURCES_DIR + "stopwords.en.dat") rankingModes = {"C": "Centroid", "TR": "textrank", "CW": "contentWeighing"} def sentenceCapitalize(sent): sentences = sent.split(". ") sentences2 = [ sentence[0].capitalize() + sentence[1:] for sentence in sentences ] string2 = '. '.join(sentences2) return string2 def tweetCleaner(sentences): p = re.compile(r'http?:\/\/.*[\s\r\n]*',
import absummarizer.WGGraph as wg import os import re import nltk from absummarizer.summarizer import segmentize from flask import Flask, render_template, flash, request from wtforms import Form, TextField, TextAreaField, validators, StringField, SubmitField from http.server import BaseHTTPRequestHandler, HTTPServer import json PROJECT_DIR=os.path.dirname(__file__)+"./" print ("Project dir", PROJECT_DIR) RESOURCES_DIR=PROJECT_DIR+"resources/" stopwords=wg.load_stopwords("resources/stopwords.en.dat") rankingModes={"C":"Centroid","TR":"textrank", "CW":"contentWeighing"} def sentenceCapitalize(sent): sentences = sent.split(". ") sentences2 = [sentence[0].capitalize() + sentence[1:] for sentence in sentences] string2 = '. '.join(sentences2) return string2 def tweetCleaner(sentences): p=re.compile(r'http?:\/\/.*[\s\r\n]*', re.DOTALL) #Regex to remove http from sentences p2=re.compile(r'(^|\s)#.+?\s', re.DOTALL) #Regex p3=re.compile(r'(^|\s)@.+?(\s|$)', re.DOTALL) print ("Initial sentences=>", len(sentences)) final_sentences=[]
# -*- coding: utf-8 -*- ''' Created on Aug 21, 2015 @author: siddban ''' import absummarizer.WGGraph as wg import os import re import nltk from absummarizer.summarizer import segmentize PROJECT_DIR=os.path.dirname(__file__)+"/../" print "Project dir", PROJECT_DIR RESOURCES_DIR="resources/" stopwords=wg.load_stopwords(RESOURCES_DIR+"stopwords.en.dat") rankingModes={"C":"Centroid","TR":"textrank", "CW":"contentWeighing"} def sentenceCapitalize(sent): sentences = sent.split(". ") sentences2 = [sentence[0].capitalize() + sentence[1:] for sentence in sentences] string2 = '. '.join(sentences2) return string2 def tweetCleaner(sentences): p=re.compile(r'http?:\/\/.*[\s\r\n]*', re.DOTALL) #Regex to remove http from sentences p2=re.compile(r'(^|\s)#.+?\s', re.DOTALL) #Regex p3=re.compile(r'(^|\s)@.+?(\s|$)', re.DOTALL) print "Initial sentences=>", len(sentences) final_sentences=[]