Esempio n. 1
0
# -*- coding: utf-8 -*-
'''
Created on Aug 21, 2015

@author: siddban
'''
import absummarizer.WGGraph as wg
import os
import re
import nltk
from absummarizer.summarizer import segmentize

PROJECT_DIR = os.path.dirname(__file__) + "/../"
print "Project dir", PROJECT_DIR
RESOURCES_DIR = "resources/"
stopwords = wg.load_stopwords(RESOURCES_DIR + "stopwords.en.dat")

rankingModes = {"C": "Centroid", "TR": "textrank", "CW": "contentWeighing"}


def sentenceCapitalize(sent):
    sentences = sent.split(". ")
    sentences2 = [
        sentence[0].capitalize() + sentence[1:] for sentence in sentences
    ]
    string2 = '. '.join(sentences2)
    return string2


def tweetCleaner(sentences):
    p = re.compile(r'http?:\/\/.*[\s\r\n]*',
Esempio n. 2
0
import absummarizer.WGGraph as wg
import os
import re
import nltk
from absummarizer.summarizer import segmentize

from flask import Flask, render_template, flash, request
from wtforms import Form, TextField, TextAreaField, validators, StringField, SubmitField

from http.server import BaseHTTPRequestHandler, HTTPServer
import json

PROJECT_DIR=os.path.dirname(__file__)+"./"
print ("Project dir", PROJECT_DIR)
RESOURCES_DIR=PROJECT_DIR+"resources/"
stopwords=wg.load_stopwords("resources/stopwords.en.dat")  

rankingModes={"C":"Centroid","TR":"textrank", "CW":"contentWeighing"}

def sentenceCapitalize(sent):
    sentences = sent.split(". ")
    sentences2 = [sentence[0].capitalize() + sentence[1:] for sentence in sentences]
    string2 = '. '.join(sentences2)
    return string2

def tweetCleaner(sentences):
    p=re.compile(r'http?:\/\/.*[\s\r\n]*', re.DOTALL) #Regex to remove http from sentences
    p2=re.compile(r'(^|\s)#.+?\s', re.DOTALL) #Regex
    p3=re.compile(r'(^|\s)@.+?(\s|$)', re.DOTALL) 
    print ("Initial sentences=>", len(sentences))
    final_sentences=[]
Esempio n. 3
0
# -*- coding: utf-8 -*-
'''
Created on Aug 21, 2015

@author: siddban
'''
import absummarizer.WGGraph as wg
import os
import re
import nltk
from absummarizer.summarizer import segmentize

PROJECT_DIR=os.path.dirname(__file__)+"/../"
print "Project dir", PROJECT_DIR
RESOURCES_DIR="resources/"
stopwords=wg.load_stopwords(RESOURCES_DIR+"stopwords.en.dat")  

rankingModes={"C":"Centroid","TR":"textrank", "CW":"contentWeighing"}

def sentenceCapitalize(sent):
    sentences = sent.split(". ")
    sentences2 = [sentence[0].capitalize() + sentence[1:] for sentence in sentences]
    string2 = '. '.join(sentences2)
    return string2

def tweetCleaner(sentences):
    p=re.compile(r'http?:\/\/.*[\s\r\n]*', re.DOTALL) #Regex to remove http from sentences
    p2=re.compile(r'(^|\s)#.+?\s', re.DOTALL) #Regex
    p3=re.compile(r'(^|\s)@.+?(\s|$)', re.DOTALL) 
    print "Initial sentences=>", len(sentences)
    final_sentences=[]