def run_article_analysis(): # Connecting to the databse engine = db_connect() Session = sessionmaker(bind=engine) session = Session() for article in session.query(Articles): articleAnalysis = DocumentAnalysis(article, False) articleAnalysis.analysis() results = {} for issue in issues: key = issue.get('key') if key in summary['issues']: content = summary['issues'][key].get('content') examples = summary['issues'][key].get('examples') readability = 0 sentiment = 0 for example in examples: readability += example.get('readability') sentiment += sentiment content_words = get_lowercase(get_words(content)) word_count = len(content_words) unique_word_count = len(set(content_words)) content_words_without_stopwords = remove_stop_words(content_words) word_frequency = nltk.FreqDist(content_words_without_stopwords).most_common(20) results[key] = { 'word_count': word_count, 'unique_word_count': unique_word_count, '20_most_frequent': word_frequency, 'instances': len(examples), 'readability': readability/len(examples), 'sentiment': sentiment, } print(results)
def run_press_release_analysis(): # Connecting to the databse engine = db_connect() Session = sessionmaker(bind=engine) session = Session() for press_release in session.query(PressReleases): press_release_analysis = DocumentAnalysis(press_release, True) press_release_analysis.analysis() try: session.add(press_release_analysis.document) session.commit() except: session.rollback()
def __init__(self): engine = db_connect() create_articles_table(engine) self.Session = sessionmaker(bind=engine) self.stats = {}