def main(): args = arg_parse() infile = args.file links = args.links outdir = args.output_dir outfile = scrape_to_json(infile, outdir, links, scraper) analyze(outfile)
def main_page(): if request.method == 'POST': hashtag = '#' + request.form['user_search'] city = request.form['city'].title() radius = request.form['radius'] tweets = search(hashtag, city, radius) if len(tweets) == 0: return render_template('no_results.html', city=city, topic=request.form['user_search']) sentiments = [] for tweet in tweets: sentiments.append(analyze(tweet)) total_score = 0 total_magnitude = 0 for score, magnitude in sentiments: total_score += score total_magnitude += magnitude avg_score = total_score / len(sentiments) avg_magnitude = total_magnitude / len(sentiments) sentiment = overall_sentiment(avg_score, avg_magnitude) return render_template('results.html', city=city, sentiment=sentiment, topic=request.form['user_search'], tweets=tweets) else: return render_template('search.html')
def initialLoadDB(history): chromeData = history h = html2text.HTML2Text() # Ignore converting links from HTML h.ignore_links = True h.ignore_images = True h.ignore_anchors = True h.skip_internal_links = True website_text = [] for link in chromeData: try: if "google.com" in link['url'] or "slack.com" in link['url']: continue query = db.webtext.find({"parent_url": link['url']}) if (query.count() == 0): child_links = [] if link['url'][:4] == 'http': html = requests.get(link['url']) html = html.text parent_html = str(html) plain_text = h.handle(html).strip() else: continue soup = BeautifulSoup(html, "html.parser") children = set() for childLink in soup.findAll('a'): if childLink.get('href') is not None and childLink.get( 'href')[:4] == 'http': if childLink.get('href').count( "/") > 4 or ".html" in childLink.get('href'): children.add(childLink.get('href')) update_query = {'parent_url': link['url']} class_data = classifyForDB([link['url'], plain_text]) sent, magnitude = sentiment.analyze(plain_text) post_data = { 'parent_url': link['url'], 'parent_text': plain_text, 'parent_html': parent_html, 'classify_data': class_data, 'child_links': list(children), 'depth': 0, 'searched': False } result = db.webtext.update(update_query, post_data, upsert=True) print(result) else: continue except Exception as e: print(e)
def home(request): response = {} text = '' if request.method == 'POST': text = request.POST['text'] response = sentiment.analyze(text) return render(request, 'app/index.html', response)
def analyze(request): response = sentiment.analyze(request.body) #return HttpResponse(unescape(json.dumps(response)), content_type='application/json') return render(request, 'app/indexpartial-vert.html', response, content_type='text/html')
def dashboard(): if 'email' not in session: return redirect( url_for('login')) #requires an account to access this page resume = True photo = True emotions = False photo_feedback = False resume_feedback = False score = False error = False if request.method == 'POST': user_info = users.find_one({'email': session['email']}) if 'resume' in request.form: if user_info['resume'] != '': resume_data = user_info['resume'] fh = open("sentiment_results.txt", 'w') parsed_output = json.loads(resume_data) fh.write( json.dumps(parsed_output["DocumentElement"]["Resume"] ["Experience"], indent=4, sort_keys=True)) fh.close() results = sentiment.analyze("sentiment_results.txt") score = results[0] resume_feedback = analyzeResume(score) else: resume = False if 'photo' in request.form: if fs.exists(user_info['image']): image_data = fs.get(user_info['image']) f = open(UPLOAD_FOLDER + user_photo, 'wb') f.write(image_data.read()) f.close() emotions = detect_face.detect_faces(UPLOAD_FOLDER + user_photo) photo_feedback = analyzeRelease( emotions["anger"], emotions["joy"], emotions["surprise"], emotions["blurred"], emotions["headwear"]) if emotions else False error = False if photo_feedback else True else: photo = False return render_template("dashboard.html", emotions=emotions, photo=photo, score=score, resume=resume, photo_feedback=photo_feedback, resume_feedback=resume_feedback, error=error)
def compare(phrase, expected_bool, file_desc, expected_result=None): if file_desc == "path planning": is_command = path.isLocCommand(phrase) if is_command == expected_bool: if is_command: result = path.process_loc(phrase) return check_result(phrase, result, expected_result) else: return 1 else: print("The phrase is: " + str(phrase) + ", correct output should be: " + str(expected_bool) + ", instead we have: " + str(is_command)) return 0 elif file_desc == "is question": result = nlp_util.is_question(phrase) return check_result(phrase, result, (expected_bool, expected_result)) elif file_desc == "sentiment": label, result = sentiment.analyze(phrase) return check_result(phrase, label, expected_bool) else: print("invalid file_desc") return 0
def getUrls(urls): h = html2text.HTML2Text() # Ignore converting links from HTML h.ignore_links = True h.ignore_images = True h.ignore_anchors = True h.skip_internal_links = True website_text = [] for url in urls: # try: db.webtext.delete_one({"parent_url": url}) query = db.webtext.find({"parent_url": url}) if (query.count() == 0): child_links = [] if url[:4] == 'http': html = requests.get(url) html = html.text parent_html = str(html) plain_text = h.handle(html).strip() else: continue soup = BeautifulSoup(html, "html.parser") children = set() for childLink in soup.findAll('a'): if childLink.get('href') is not None and childLink.get( 'href')[:4] == 'http': if childLink.get('href').count( "/") > 4 or ".html" in childLink.get('href'): children.add(childLink.get('href')) update_query = {'parent_url': url} class_data = classifyForDB([url, plain_text]) sent, magnitude = sentiment.analyze(plain_text) if sent is None or magnitude is None: raise ("Google analysis down") post_data = { 'parent_url': url, 'parent_text': plain_text, 'parent_html': parent_html, 'classify_data': class_data, 'sentiment': sent, 'magnitude': magnitude, 'child_links': list(children), 'depth': 0, 'searched': False } result = db.webtext.update(update_query, post_data, upsert=True) result = { k: post_data[k] for k in ["classify_data", "sentiment", "magnitude"] } website_text.append(result) else: data = query.next() result = {"classify_data": data["classify_data"]} if "sentiment" in data: result["sentiment"] = data["sentiment"] result["magnitude"] = data["magnitude"] website_text.append(result) # except Exception as e: # print(e) return website_text
from urllib.request import urlopen import json import sentiment data = urlopen('https://api.github.com/events').read().decode('utf8') response = json.loads(data) for event in response: if event['type'] == 'PushEvent': commits = event['payload']['commits'] if len(commits) > 0: for commit in commits: msg = commit['message'] if sentiment.analyze(msg)['score'] < 0: print( '========================================================' ) print(msg) print('========================================================')
from urllib.request import urlopen import json import sentiment data = urlopen('https://api.github.com/events').read().decode('utf8') response = json.loads(data) for event in response: if event['type'] == 'PushEvent': commits = event['payload']['commits'] if len(commits) > 0: for commit in commits: msg = commit['message'] if sentiment.analyze(msg)['score'] < 0: print('========================================================') print(msg) print('========================================================')
import requests import sentiment import sys import os import datetime import re while (True): response = requests.post( 'http://queue:8080/memq/server/queues/bitcoin2/dequeue') if (response.status_code == 200): message_str = response.json()['body'] print(message_str) filestr = re.escape(message_str) command_str = 'gsutil cp gs://isye-7406/' + filestr + '.csv .' os.system(command_str) sentiment.analyze(message_str) command_str = 'gsutil cp ./' + filestr + '_nlp.csv gs://isye-7406/nlp/' + filestr + '_nlp.csv' print(command_str) os.system(command_str) else: break sys.exit(0)
def get_sentiment_data_stream(): # amt = int(request.args.get('split')) amt = 3 name = request.args.get('name') data = request.get_data(cache=False) ress = speechToText.speechToText(data) res = ress.split() splits = [ " ".join(res[0:i]) for i in range( len(res) // amt, len(res) + len(res) // amt, len(res) // amt) ] data = sentiment.analyze(nlu, ress) keys = [data["keywords"][i]["text"] for i in range(len(data["keywords"]))] for i in range(len(splits)): splits[i] = sentiment.analyze( nlu, splits[i])["emotion"]["document"]["emotion"] items = [] items.append({ "id": "anger", "data": [{ "x": i, "y": splits[i]["anger"] } for i in range(len(splits))] }) items.append({ "id": "sadness", "data": [{ "x": i, "y": splits[i]["sadness"] } for i in range(len(splits))] }) items.append({ "id": "joy", "data": [{ "x": i, "y": splits[i]["joy"] } for i in range(len(splits))] }) items.append({ "id": "fear", "data": [{ "x": i, "y": splits[i]["fear"] } for i in range(len(splits))] }) items.append({ "id": "disgust", "data": [{ "x": i, "y": splits[i]["disgust"] } for i in range(len(splits))] }) ret = {"items": items, "keywords": keys, "name": name, "text": ress} cache[name] = ret return ret
def get_sentiment_data(): data = request.get_data(cache=False) res = speechToText.speechToText(data) fin_res = sentiment.analyze(nlu, res) return fin_res["emotion"]["document"]["emotion"]
print( "--- Opened a CSV file to store the results of your sentiment analysis... \n" ) # tidy up the Tweets and send each to the AYLIEN Text API for c, result in enumerate(results, start=1): tweet = result.text tidy_tweet = tweet.strip() tweet_time = result.created_at tweet_id = result.id if not tweet: print('Empty Tweet') continue response = analyze(tidy_tweet) csv_writer.writerow({ "Tweet_ID": tweet_id, "Time": tweet_time, 'Tweet': tidy_tweet, 'Sentiment': response['compound'], }) print("Analyzed Tweet {}".format(c)) positive = 0 negative = 0 neutral = 0 # count the data in the Sentiment column of the CSV file with open(file_name, 'r') as data: counter = Counter()