Example #1
0
def get_user():
    with open('data_user_polri.txt', 'r') as f:
        users = f.readlines()

    users = [x.replace('\n', '').lower() for x in users]
    conn = MySQLdb.connect('192.168.150.158', 'backend', 'rahasia123', 'ipa_main')
    cursor = conn.cursor(MySQLdb.cursors.DictCursor)
    solr = eBsolr('http://192.168.150.105:8983/solr/twitter_tweet', 'config.conf')
    for user in users:
        _id, gender, tweet = user.split('|')
        sql = 'SELECT * FROM twitter_account WHERE id={}'.format(_id)
        cursor.execute(sql)
        row = cursor.fetchone()
        if row:
            query = 'polri AND user_id:{} AND pub_year:2019'.format(_id)
            response = solr.getDocsFacets(query, 'sentiment', rows=1)
            try:
                pos = response['facets']['sentiment']['1']
            except:
                pos = 0
            try:
                neg = response['facets']['sentiment']['-1']
            except:
                neg = 0
            try:
                neu = response['facets']['sentiment']['0']
            except:
                neu = 0

            print '{}|{}|{}|{}|{}|{}|{}'.format(_id, row['screen_name'], row['name'], gender,
                                                neg, neu, pos)
    cursor.close()
    conn.close()
Example #2
0
def save_data():
    solr = eBsolr('http://192.168.150.101:8983/solr/instagram', 'config.conf')

    query = 'polri AND pub_year:2019'

    run = True
    data = []
    i = 0
    while run:
        print i
        response = solr.getDocs(query, 'user_id, sentiment', start=i * 10000, rows=10000)
        i += 1
        data += response['docs']
        if len(response['docs']) < 10000:
            run = False

    f = open('data_solr_instagram.txt', 'w')
    f.write(json.dumps(data))
    f.close()
    print 'end'
Example #3
0
from library.eBsolr import eBsolr
import MySQLdb
conn = MySQLdb.connect('192.168.150.154', 'backend', 'rahasia123', 'mm_prod_news')
cursor = conn.cursor()

_path = '/home/aditya/ISA/'
_file = '201902b.csv'

with open('{}{}'.format(_path, _file), 'r') as f:
    data = f.readlines()

solr = eBsolr('http://192.168.150.126:8983/solr/news', 'config.conf')

for d in data:
    _solr_status = 'not ok'
    _sql_status = 'not ok'
    isa_id, media, title = d.replace('\n', '').split('|')
    query = 'title:"{}" AND media:{}'.format(title, media.replace(' ', '+'))
    response = solr.getDocs(query, rows=1, fields='id')
    if response['count'] > 0:
        _solr_status = 'ok'
        _id = response['docs'][0]['id']
        sql = 'SELECT n_id FROM news WHERE n_id="{}"'.format(_id)
        cursor.execute(sql)
        row = cursor.fetchone()
        if row:
            _sql_status = 'ok'
    print '{}|{}|{}|{}'.format(media, title, _solr_status, _sql_status)
    
cursor.close()
conn.close()
Example #4
0
from library.eBsolr import eBsolr

solr = eBsolr('http://192.168.150.101:8983/solr/fb_post', '')

query = '(kpk OR "komisi pemberantasan korupsi") AND pub_day:[20180622 TO 20190621]'

print 'request solr start'
response = solr.getDocs(query, 'user_id', rows=300000)
print 'request solr done'

# print response['docs']

ids = {}
for doc in response['docs']:
    if 'user_id' in doc:
        try:
            ids[doc['user_id']] += 1
        except:
            ids[doc['user_id']] = 1

print len(ids)
Example #5
0
dbpwd = 'rahasia'
dbhost = '192.168.150.155'
dbport = 5432

pool_pg = PersistentDB(psycopg2,
                       host=dbhost,
                       user=dbuser,
                       password=dbpwd,
                       database=dbname)

conn = pool_pg.connection()
cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

solr_host = 'http://192.168.150.132:8983/solr/twitter_tweet'

solr = eBsolr(solr_host, 'config.conf')

sql = "SELECT * FROM topic_tweet_mapping WHERE t_id=6965 AND country <> ''  AND pub_day BETWEEN '20190801' AND '20190831';"

cursor.execute(sql)
rows = cursor.fetchall()
for row in rows:
    query = 'id:{}'.format(row['tweet_id'])
    response = solr.getDocs(query, 'text')
    try:
        text = response['docs'][0]['text'].encode('utf-8')
    except:
        text = response['docs'][0]['text']
    try:
        province = row['province'].split('|')[-1]
    except:
Example #6
0
import MySQLdb
from library.eBsolr import eBsolr
import redis
import json

solr_facebook = eBsolr('http://10.11.12.88:8983/solr/facebook', 'config')


def pusher(_date):
    redis_pointer = redis.StrictRedis(host='127.0.0.1', port=6379, db=1)
    conn = MySQLdb.connect('10.11.12.35', 'backend', 'rahasia123', 'fb_master')
    query = 'pub_day:{}'.format(_date)
    cursor = conn.cursor(MySQLdb.cursors.DictCursor)
    run = True
    start = 0
    while run:
        response = solr_facebook.getDocs(query,
                                         start=start,
                                         rows=10000,
                                         fields='id')
        for doc in response['docs']:
            sql = "SELECT * FROM fb_master_posts WHERE id='{}'".format(
                doc['id'])
            cursor.execute(sql)
            row = cursor.fetchone()
            if row:
                row['insert_date'] = row['insert_date'].strftime(
                    '%Y-%m-%d %H:%M:%S')
                redis_pointer.rpush('facebook_analyze', json.dumps(row))
                print row['id']
        start += 10000