def play(): process_scrapy = CrawlerProcess({'USER_AGENT': settings.USER_AGENT}) sql = 'SELECT * FROM apps_run WHERE status=1' db_mysql.executeSql(sql) appsRun = db_mysql.fetch('all') for app in appsRun: idApps = app[1] idSosmed = app[2] sql = 'SELECT * FROM apps WHERE id=' + str(idApps) db_mysql.executeSql(sql) apps = db_mysql.fetch('one') query = {} query['idApps'] = apps[0] query['qTwitter'] = apps[2] query['since'] = apps[6] query['until'] = apps[7] query['tags'] = apps[8].split(', ') query['q'] = apps[9] query['tags'].append(query['q']) print idSosmed if (idSosmed == 1): dataTwitter = cr_twitter.play(query['qTwitter'], query['since'], query['until']) takeTwitter(idApps, dataTwitter) elif (idSosmed == settings.online_news['liputan6']): urls = [] if (query['until'] == None): query['until'] = datetime.today().strftime('%Y-%m-%d') for tag in query['tags']: tag = tag.replace(" ", "-") urls.append('http://m.liputan6.com/tag/' + tag) process_scrapy.crawl(Liputan6, urls=urls, query=query) # Liputan6 elif (idSosmed == settings.online_news['detikcom']): urls = [] q = query['q'].replace(" ", "+") if (query['until'] == None): query['until'] = datetime.today().strftime('%Y-%m-%d') since = datetime.strptime(query['since'], "%Y-%m-%d").strftime("%d/%m/%Y") until = datetime.strptime(query['until'], "%Y-%m-%d").strftime("%d/%m/%Y") urls.append('https://www.detik.com/search/searchall?query=' + q + '&sortby=time&fromdatex=' + since + '&todatex=' + until) process_scrapy.crawl(DetikCom, urls=urls, query=query) # print "=================================================process================================"; process_scrapy.start( ) # the script will block here until the crawling is finished
def checkExist(id, sosmed): if (sosmed == "twitter"): sql = 'SELECT * FROM apps_opinion WHERE tweet_id="' + str(id) + '"' db_mysql.executeSql(sql) data = db_mysql.fetch('one') if (data == None): return True return False
def checkExists(id_apps, id_url, time): sql = 'SELECT * FROM apps_opinion WHERE id_apps=' + str( id_apps) + ' AND id_url="' + str(id_url) + '" AND time="' + str( time) + '"' db_mysql.executeSql(sql) data = db_mysql.fetch('one') if (data == None): return True return False
def initial(): global totalNaive sql = '''SELECT count(*) FROM eval_opinion WHERE training = 1''' db_mysql.executeSql(sql) result = db_mysql.fetch('all') totalNaive['positif'] = result[0][0] sql = '''SELECT count(*) FROM eval_opinion WHERE training = -1''' db_mysql.executeSql(sql) result = db_mysql.fetch('all') totalNaive['negatif'] = result[0][0] sql = '''SELECT count(*) FROM eval_opinion WHERE training = 0''' db_mysql.executeSql(sql) result = db_mysql.fetch('all') totalNaive['netral'] = result[0][0] totalNaive['data'] = totalNaive['positif'] + totalNaive[ 'negatif'] + totalNaive['netral']
def probably(word, senti): global totalNaive sql = 'SELECT count(*) FROM eval_opinion WHERE training = ' + str( senti) + ' AND content LIKE "%' + word + '%"' db_mysql.executeSql(sql) inWord = db_mysql.fetch('all')[0][0] if (senti == 1): return (inWord / totalNaive['positif']) * (totalNaive['positif'] / totalNaive['data']) elif (senti == -1): return (inWord / totalNaive['negatif']) * (totalNaive['negatif'] / totalNaive['data']) elif (senti == 0): return (inWord / totalNaive['netral']) * (totalNaive['netral'] / totalNaive['data'])
def analyzeSentiment(): sql = '''SELECT * FROM data_test''' db_mysql.executeSql(sql) result = db_mysql.fetch('all') for data in result: kalimat = data[2] keyword = preprocessing.do(kalimat) sentiment = impress.play(keyword)['sentiment'] print "==========================\n\nID: " + str( data[0]) + "\n" + kalimat + "\n--Sentiment: " + str( sentiment) + "\n" sql = '''UPDATE data_test SET sentiment=%d WHERE id=%d ''' % (sentiment, data[0]) db_mysql.executeSql(sql) db_mysql.commit()