def get_user(): with open('data_user_polri.txt', 'r') as f: users = f.readlines() users = [x.replace('\n', '').lower() for x in users] conn = MySQLdb.connect('192.168.150.158', 'backend', 'rahasia123', 'ipa_main') cursor = conn.cursor(MySQLdb.cursors.DictCursor) solr = eBsolr('http://192.168.150.105:8983/solr/twitter_tweet', 'config.conf') for user in users: _id, gender, tweet = user.split('|') sql = 'SELECT * FROM twitter_account WHERE id={}'.format(_id) cursor.execute(sql) row = cursor.fetchone() if row: query = 'polri AND user_id:{} AND pub_year:2019'.format(_id) response = solr.getDocsFacets(query, 'sentiment', rows=1) try: pos = response['facets']['sentiment']['1'] except: pos = 0 try: neg = response['facets']['sentiment']['-1'] except: neg = 0 try: neu = response['facets']['sentiment']['0'] except: neu = 0 print '{}|{}|{}|{}|{}|{}|{}'.format(_id, row['screen_name'], row['name'], gender, neg, neu, pos) cursor.close() conn.close()
def save_data(): solr = eBsolr('http://192.168.150.101:8983/solr/instagram', 'config.conf') query = 'polri AND pub_year:2019' run = True data = [] i = 0 while run: print i response = solr.getDocs(query, 'user_id, sentiment', start=i * 10000, rows=10000) i += 1 data += response['docs'] if len(response['docs']) < 10000: run = False f = open('data_solr_instagram.txt', 'w') f.write(json.dumps(data)) f.close() print 'end'
from library.eBsolr import eBsolr import MySQLdb conn = MySQLdb.connect('192.168.150.154', 'backend', 'rahasia123', 'mm_prod_news') cursor = conn.cursor() _path = '/home/aditya/ISA/' _file = '201902b.csv' with open('{}{}'.format(_path, _file), 'r') as f: data = f.readlines() solr = eBsolr('http://192.168.150.126:8983/solr/news', 'config.conf') for d in data: _solr_status = 'not ok' _sql_status = 'not ok' isa_id, media, title = d.replace('\n', '').split('|') query = 'title:"{}" AND media:{}'.format(title, media.replace(' ', '+')) response = solr.getDocs(query, rows=1, fields='id') if response['count'] > 0: _solr_status = 'ok' _id = response['docs'][0]['id'] sql = 'SELECT n_id FROM news WHERE n_id="{}"'.format(_id) cursor.execute(sql) row = cursor.fetchone() if row: _sql_status = 'ok' print '{}|{}|{}|{}'.format(media, title, _solr_status, _sql_status) cursor.close() conn.close()
from library.eBsolr import eBsolr solr = eBsolr('http://192.168.150.101:8983/solr/fb_post', '') query = '(kpk OR "komisi pemberantasan korupsi") AND pub_day:[20180622 TO 20190621]' print 'request solr start' response = solr.getDocs(query, 'user_id', rows=300000) print 'request solr done' # print response['docs'] ids = {} for doc in response['docs']: if 'user_id' in doc: try: ids[doc['user_id']] += 1 except: ids[doc['user_id']] = 1 print len(ids)
dbpwd = 'rahasia' dbhost = '192.168.150.155' dbport = 5432 pool_pg = PersistentDB(psycopg2, host=dbhost, user=dbuser, password=dbpwd, database=dbname) conn = pool_pg.connection() cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) solr_host = 'http://192.168.150.132:8983/solr/twitter_tweet' solr = eBsolr(solr_host, 'config.conf') sql = "SELECT * FROM topic_tweet_mapping WHERE t_id=6965 AND country <> '' AND pub_day BETWEEN '20190801' AND '20190831';" cursor.execute(sql) rows = cursor.fetchall() for row in rows: query = 'id:{}'.format(row['tweet_id']) response = solr.getDocs(query, 'text') try: text = response['docs'][0]['text'].encode('utf-8') except: text = response['docs'][0]['text'] try: province = row['province'].split('|')[-1] except:
import MySQLdb from library.eBsolr import eBsolr import redis import json solr_facebook = eBsolr('http://10.11.12.88:8983/solr/facebook', 'config') def pusher(_date): redis_pointer = redis.StrictRedis(host='127.0.0.1', port=6379, db=1) conn = MySQLdb.connect('10.11.12.35', 'backend', 'rahasia123', 'fb_master') query = 'pub_day:{}'.format(_date) cursor = conn.cursor(MySQLdb.cursors.DictCursor) run = True start = 0 while run: response = solr_facebook.getDocs(query, start=start, rows=10000, fields='id') for doc in response['docs']: sql = "SELECT * FROM fb_master_posts WHERE id='{}'".format( doc['id']) cursor.execute(sql) row = cursor.fetchone() if row: row['insert_date'] = row['insert_date'].strftime( '%Y-%m-%d %H:%M:%S') redis_pointer.rpush('facebook_analyze', json.dumps(row)) print row['id'] start += 10000