def takeTwitter(idApps, data): for tweet in data: tweetContent = db_mysql.escapeString(tweet.text.decode('utf-8')) tweetDate = tweet.date tweetId = tweet.id tweetRetweetCount = tweet.retweets tweetFacoriteCount = tweet.favorites tweetHashtags = tweet.hashtags tweetMentions = tweet.mentions tweetUser = tweet.username tweetUserLocation = tweet.geo if (checkExist(tweetId, "twitter")): print tweetContent dataProcess = mainProcess(tweetContent, idApps) sentiment = dataProcess['sentiment'] # for view counting rule impressi recordCounting = preprocessing.recordCounting(dataProcess) arr_word = recordCounting['arr_word'] arr_counted = recordCounting['arr_counted'] arr_score = recordCounting['arr_score'] role_group = recordCounting['role_group'] print tweetUser print "::" + str(sentiment) sql = '''INSERT INTO apps_opinion(id_apps, sentiment, content, time, sosmed, tweet_id, tweet_retweet_count, tweet_favorite_count, tweet_hashtags, tweet_mentions, user, tweet_user_location, arr_word, arr_counted, arr_score, role_group) VALUES ('%d','%s','%s','%s','%d','%s','%d','%d','%s','%s','%s','%s','%s','%s','%s','%s')''' \ % (idApps, sentiment, str(tweetContent), tweetDate, 1, tweetId, tweetRetweetCount, tweetFacoriteCount, tweetHashtags, tweetMentions, str(tweetUser), tweetUserLocation, arr_word, arr_counted, arr_score, role_group) db_mysql.executeSql(sql) db_mysql.commit()
def saveComment(data_save): content = data_save['text_comment'] id_apps = data_save['id_apps'] time = data_save['date'] sosmed = data_save['sosmed'] user = data_save['name'] id_url = data_save['id_url'] keyword = preprocessing.do(content) dataProcess = impress.play(keyword, id_apps) sentiment = dataProcess['sentiment'] # for view counting rule impressi recordCounting = preprocessing.recordCounting(dataProcess) arr_word = recordCounting['arr_word'] arr_counted = recordCounting['arr_counted'] arr_score = recordCounting['arr_score'] role_group = recordCounting['role_group'] if (checkExists(id_apps, id_url, time)): print content print "::" + str(sentiment) print "===============================" try: sql = '''INSERT INTO apps_opinion(id_apps, sentiment, content, time, sosmed, user, arr_word, arr_counted, arr_score, role_group, id_url) VALUES ('%d', '%s', '%s', '%s', '%d', '%s', '%s', '%s', '%s', '%s', '%s')''' \ % (id_apps, sentiment, str(content), time, sosmed, user, arr_word, arr_counted, arr_score, role_group, id_url) db_mysql.executeSql(sql) db_mysql.commit() pass except Exception as e: pass
def checkExist(id, sosmed): if (sosmed == "twitter"): sql = 'SELECT * FROM apps_opinion WHERE tweet_id="' + str(id) + '"' db_mysql.executeSql(sql) data = db_mysql.fetch('one') if (data == None): return True return False
def checkExists(id_apps, id_url, time): sql = 'SELECT * FROM apps_opinion WHERE id_apps=' + str( id_apps) + ' AND id_url="' + str(id_url) + '" AND time="' + str( time) + '"' db_mysql.executeSql(sql) data = db_mysql.fetch('one') if (data == None): return True return False
def play(): process_scrapy = CrawlerProcess({'USER_AGENT': settings.USER_AGENT}) sql = 'SELECT * FROM apps_run WHERE status=1' db_mysql.executeSql(sql) appsRun = db_mysql.fetch('all') for app in appsRun: idApps = app[1] idSosmed = app[2] sql = 'SELECT * FROM apps WHERE id=' + str(idApps) db_mysql.executeSql(sql) apps = db_mysql.fetch('one') query = {} query['idApps'] = apps[0] query['qTwitter'] = apps[2] query['since'] = apps[6] query['until'] = apps[7] query['tags'] = apps[8].split(', ') query['q'] = apps[9] query['tags'].append(query['q']) print idSosmed if (idSosmed == 1): dataTwitter = cr_twitter.play(query['qTwitter'], query['since'], query['until']) takeTwitter(idApps, dataTwitter) elif (idSosmed == settings.online_news['liputan6']): urls = [] if (query['until'] == None): query['until'] = datetime.today().strftime('%Y-%m-%d') for tag in query['tags']: tag = tag.replace(" ", "-") urls.append('http://m.liputan6.com/tag/' + tag) process_scrapy.crawl(Liputan6, urls=urls, query=query) # Liputan6 elif (idSosmed == settings.online_news['detikcom']): urls = [] q = query['q'].replace(" ", "+") if (query['until'] == None): query['until'] = datetime.today().strftime('%Y-%m-%d') since = datetime.strptime(query['since'], "%Y-%m-%d").strftime("%d/%m/%Y") until = datetime.strptime(query['until'], "%Y-%m-%d").strftime("%d/%m/%Y") urls.append('https://www.detik.com/search/searchall?query=' + q + '&sortby=time&fromdatex=' + since + '&todatex=' + until) process_scrapy.crawl(DetikCom, urls=urls, query=query) # print "=================================================process================================"; process_scrapy.start( ) # the script will block here until the crawling is finished
def probably(word, senti): global totalNaive sql = 'SELECT count(*) FROM eval_opinion WHERE training = ' + str( senti) + ' AND content LIKE "%' + word + '%"' db_mysql.executeSql(sql) inWord = db_mysql.fetch('all')[0][0] if (senti == 1): return (inWord / totalNaive['positif']) * (totalNaive['positif'] / totalNaive['data']) elif (senti == -1): return (inWord / totalNaive['negatif']) * (totalNaive['negatif'] / totalNaive['data']) elif (senti == 0): return (inWord / totalNaive['netral']) * (totalNaive['netral'] / totalNaive['data'])
def analyzeSentiment(): sql = '''SELECT * FROM data_test''' db_mysql.executeSql(sql) result = db_mysql.fetch('all') for data in result: kalimat = data[2] keyword = preprocessing.do(kalimat) sentiment = impress.play(keyword)['sentiment'] print "==========================\n\nID: " + str( data[0]) + "\n" + kalimat + "\n--Sentiment: " + str( sentiment) + "\n" sql = '''UPDATE data_test SET sentiment=%d WHERE id=%d ''' % (sentiment, data[0]) db_mysql.executeSql(sql) db_mysql.commit()
def initial(): global totalNaive sql = '''SELECT count(*) FROM eval_opinion WHERE training = 1''' db_mysql.executeSql(sql) result = db_mysql.fetch('all') totalNaive['positif'] = result[0][0] sql = '''SELECT count(*) FROM eval_opinion WHERE training = -1''' db_mysql.executeSql(sql) result = db_mysql.fetch('all') totalNaive['negatif'] = result[0][0] sql = '''SELECT count(*) FROM eval_opinion WHERE training = 0''' db_mysql.executeSql(sql) result = db_mysql.fetch('all') totalNaive['netral'] = result[0][0] totalNaive['data'] = totalNaive['positif'] + totalNaive[ 'negatif'] + totalNaive['netral']