예제 #1
0
def main():
    sql = MySqlUtils('Twitter')
    users = sql.get_data(user_query)
    users_list = [user['user_handle'] for user in users]
    print('Query count {}'.format(len(users)))
    query = 'SELECT tweet_id, urls_contained FROM Twitter.tweet where  user_handle IN (' + ','.join(
        ("'{}'".format(user)
         for user in users_list)) + ")" + " and related_content IS NULL;"

    # query="SELECT  tweet_id, urls_contained FROM tweet WHERE urls_contained LIKE '%.jpg%';"
    print(query)
    data = sql.get_data(query)
    print("no. of tweet id tweet_id", len(data))
    count = 0
    for url in data[10:]:
        count = count + 1
        # url_conatined field may have more than 1 url which are seprated by ','
        if not url['urls_contained']:
            query = 'UPDATE tweet SET related_content = "{}" WHERE tweet_id={}'.format(
                '', url['tweet_id'])
        else:
            content = get_content(url)
            if content:
                content = prep_content(content)
                query = 'UPDATE tweet SET related_content = "{}" WHERE tweet_id={}'.format(
                    content, url['tweet_id'])
            else:
                query = 'UPDATE tweet SET related_content = "{}" WHERE tweet_id={}'.format(
                    '', url['tweet_id'])

    # print(query)
        print("tweet_id remains", len(data) - count)
        sql.cursor.execute(query)
예제 #2
0
def get_user_descriptions(length=500000):
    """build corpus: list of tweets from twitter account


    :param length: To avoid memory explosion
    :return:
    """
    sql = MySqlUtils()
    users = sql.get_data(user_query)
    users_list = [user['user_handle'] for user in users[:length]]
    print('Query count {}'.format(len(users)))
    query = 'SELECT description, user_handle FROM user where user_handle IN (' + ','.join(
        ("'{}'".format(user) for user in users_list)) + ')'

    descriptions = sql.get_data(query)
    corpus = dict()
    print("Descriptions", len(descriptions))

    for row in descriptions:
        text = row['description']
        user_handle = row['user_handle']
        if len(text) > 0:
            tokens = clean_tweet(text, stem=False, lemmatize=False, as_string=False)
            corpus[user_handle] = ' '.join(tokens)
    return corpus
예제 #3
0
def get_users_score(users_retweets_count, all_retweet_count):
    sql = MySqlUtils()
    users_details = dict()
    fol_count = 0
    lik_count = 0
    ver_count = 0
    for user_handle, count in users_retweets_count.items():
        query = "SELECT followers_count, likes_count, verified from user WHERE user_handle='{}'".format(user_handle)
        details = sql.get_data(query)
        users_details[user_handle] = [details[0]['followers_count'], details[0]['likes_count'], details[0]['verified'],
                                      count]
        fol_count = fol_count + details[0]['followers_count']
        lik_count = lik_count + details[0]['likes_count']
        ver_count = ver_count + details[0]['verified']

    fol_mean = fol_count / len(users_details)
    lik_mean = lik_count / len(users_details)
    ver_mean = ver_count / len(users_details)
    users_score = {}
    for user_handle in users_details:
        users_score[user_handle] = users_details[user_handle][0] / fol_mean + users_details[user_handle][1] / lik_mean + \
                                   users_details[user_handle][2] / ver_mean + users_details[user_handle][
                                       3] / all_retweet_count

    return users_score
def get_data(length=500000):
    sql = MySqlUtils()
    users = sql.get_data(user_query)
    users_list = [user['user_handle'] for user in users[:length]]
    print('Query count {}'.format(len(users)))
    query = 'SELECT text, user_handle, retweets, retweets_permalink FROM tweet where user_handle IN (' + ','.join(
        ("'{}'".format(user) for user in users_list)) + ')'

    tweets = sql.get_data(query)
    print("Tweets", len(tweets))
    return tweets
예제 #5
0
 def run_store_profile_info(self, names, query_id):
     sql = MySqlUtils()
     non_processed_handles = names  # list
     responses_iterator = self.yield_profile_requests(non_processed_handles)
     for responses in responses_iterator:
         for response in responses:
             if response and response.status_code == 200:
                 twitter_user_dictionary = self.get_profile_dictionary(
                     response, query_id)
                 try:
                     sql.dict_to_sql(twitter_user_dictionary, 'user')
                 except Exception as e:
                     print(e)
                     print(twitter_user_dictionary)
예제 #6
0
def get_data(length=500000, offset=0):
    """build corpus: list of tweets from twitter account


    :param length: To avoid memory explosion
    :return:
    """
    sql = MySqlUtils()
    users = sql.get_data(user_query)
    users_list = [user['user_handle'] for user in users[offset:(offset + length)]]
    print('Query count {}'.format(len(users)))
    query = 'SELECT text, user_handle, retweets, retweets_permalink FROM tweet where user_handle IN (' + ','.join(
        ("'{}'".format(user) for user in users_list)) + ')'

    tweets = sql.get_data(query)
    corpus = dict()
    users_retweets_count = dict()
    print("Tweets", len(tweets))
    retweets = []
    user_handle = ''
    all_retweet_count = []

    for tweet in tweets:
        if tweet['user_handle'] not in corpus:
            if user_handle:
                users_retweets_count[user_handle] = np.sum(retweets)
                all_retweet_count.extend(retweets)
            retweets = []
            user_handle = tweet['user_handle']
            corpus[tweet['user_handle']] = tweet['text']
            if tweet['retweets_permalink']:
                retweets.append(0)
            else:
                retweets.append(tweet['retweets'])
        else:
            corpus[tweet['user_handle']] = corpus[tweet['user_handle']] + '. ' + tweet['text']
            if tweet['retweets_permalink']:
                retweets.append(0)
            else:
                retweets.append(tweet['retweets'])
    # for last user_handle
    all_retweet_count.extend(retweets)
    users_retweets_count[user_handle] = np.sum(retweets)
    for user_handle, text in corpus.items():
        # TODO: Too Slow. Speed this up
        tokens = clean_tweet(text, stem=False, lemmatize=False, as_string=False)

        corpus[user_handle] = ' '.join(tokens, )
    return corpus, users_retweets_count, np.sum(all_retweet_count)
예제 #7
0
def get_top_followers():
    sql = MySqlUtils()
    users_score = joblib.load('time_22_4_2018_users_score.pkl')
    sorted_scores = sorted(users_score.items(), reverse=True, key=operator.itemgetter(1))[:100]
    results = []
    for item in sorted_scores:
        query = """SELECT * from user where user_handle='%s'""" % (item[0])
        result = sql.get_data(query)
        result = result[0]
        result['score'] = item[1]
        results.append(result)
        # pprint(result)
        # print('\n')

    joblib.dump(results, 'time_22_4_2018_results.pkl')
예제 #8
0
def got_query(query, start_date, end_date, max_num):
    # Wrapper for get old tweets

    #:param start_date:
    #:param end_date:
    #:return:
    print("{} {} {} {}".format(query, start_date, end_date, max_num))
    tweetCriteria = gotmanager.TweetCriteria().setQuerySearch(query).setSince(
        start_date).setUntil(end_date).setMaxTweets(max_num)
    tweets = gotmanager.TweetManager.getTweets(tweetCriteria)
    sql = MySqlUtils()
    for tweet in tweets:
        tweet.query = query
        # print(tweet.__dict__)
        sql.dict_to_sql(get_cleaned_tweet_dictionary(tweet), 'tweet')
예제 #9
0
def got_username(user_handle=None,
                 start_date=None,
                 end_date=None,
                 max_num=None,
                 include_retweets=None,
                 include_replies=False):

    if start_date is None:
        start_date = "2017-01-01"
    if end_date is None:
        end_date = "2018-01-01"
    #usernames=joblib.load('/home/analytics/analytics/data/username.txt')
    print(start_date)
    for name in user_handle:
        print(name)
        print(
            '----------------------------------------------------------------')
        tweetCriteria = gotmanager.TweetCriteria()
        tweetCriteria.setUsername('@' + name.strip('@')).setSince(
            start_date).setUntil(end_date).setMaxTweets(max_num)
        tweetCriteria.include_retweets = include_retweets
        tweetCriteria.include_replies = include_replies

        tweets = gotmanager.TweetManager.getTweets(tweetCriteria)
        print(len(tweets))
        sql = MySqlUtils()
        tweet_dicts = []
        for tweet in tweets:
            #print (tweet)
            print(tweet.__dict__)
            tweet_dict = get_cleaned_tweet_dictionary(tweet)
            try:
                tweet_dicts.append(tweet_dict)
                sql.dict_to_sql(tweet_dict, 'tweet')
            except UnicodeEncodeError as e:
                print(tweet_dict)
                raise e
        data = pd.DataFrame(tweet_dicts)
obj    :-> scrape the twitter user profile and store into the database.

input  :-> domain
output :-> store user profile into database and user_status into queue table

"""

import os, getopt
import sys

home = os.path.abspath(os.path.dirname(__file__))
sys.path.append(home + '/../../')
from src.scraping.twitter_loader import TwitterLoader
from src.mysql_utils import MySqlUtils

sql = MySqlUtils()


def main(argv):
    domain = ''
    source_type = ''
    entity = ''

    options = ("entity=", "source_type=", "domain=")
    try:
        opts, args = getopt.getopt(argv, "", options)
    except getopt.GetoptError:
        print("Incorrect Parameters \nUsage:")
        print_error()
        sys.exit()
from scipy import sparse
from sklearn.feature_extraction.text import CountVectorizer
from ekphrasis.classes.preprocessor import TextPreProcessor
from ekphrasis.classes.tokenizer import SocialTokenizer
from ekphrasis.dicts.emoticons import emoticons
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import KMeans
import numpy as np
home = os.path.abspath(os.path.dirname(__file__))
sys.path.append(home + '/../../')
from src.mysql_utils import MySqlUtils
from src.NLP.preprocessing import clean_tweet, stopwords
import joblib

sql=MySqlUtils()



final_query = '"india" "nicobar" "north and middle andaman" "south andaman" "anantapur" "chittoor" "east godavari" "guntur" "krishna" "kurnool" "prakasam" "srikakulam" "sri potti sriramulu nellore" "visakhapatnam" "vizianagaram" "west godavari" "ysr district, kadapa  cuddapah" "anjaw" "changlang" "dibang valley" "east kameng" "east siang" "kra daadi" "kurung kumey" "lohit" "longding" "lower dibang valley" "lower siang" "lower subansiri" "namsai" "papum pare" "siang" "tawang" "tirap" "upper siang" "upper subansiri" "west kameng" "west siang" "baksa" "barpeta" "biswanath" "bongaigaon" "cachar" "charaideo" "chirang" "darrang" "dhemaji" "dhubri" "dibrugarh" "dima hasao  north cachar hills" "goalpara" "golaghat" "hailakandi" "hojai" "jorhat" "kamrup" "kamrup metropolitan" "karbi anglong" "karimganj" "kokrajhar" "lakhimpur" "majuli" "morigaon" "nagaon" "nalbari" "sivasagar" "sonitpur" "south salamara-mankachar" "tinsukia" "udalguri" "west karbi anglong" "araria" "arwal" "aurangabad" "banka" "begusarai" "bhagalpur" "bhojpur" "buxar" "darbhanga" "east champaran  motihari" "gaya" "gopalganj" "jamui" "jehanabad" "kaimur  bhabua" "katihar" "khagaria" "kishanganj" "lakhisarai" "madhepura" "madhubani" "munger  monghyr" "muzaffarpur" "nalanda" "nawada" "patna" "purnia  purnea" "rohtas" "saharsa" "samastipur" "saran" "sheikhpura" "sheohar" "sitamarhi" "siwan" "supaul" "vaishali" "west champaran" "chandigarh" "balod" "baloda bazar" "balrampur" "bastar" "bemetara" "bijapur" "bilaspur" "dantewada  south bastar" "dhamtari" "durg" "gariyaband" "janjgir-champa" "jashpur" "kabirdham  kawardha" "kanker  north bastar" "kondagaon" "korba" "korea  koriya" "mahasamund" "mungeli" "narayanpur" "raigarh" "raipur" "rajnandgaon" "sukma" "surajpur  " "surguja" "dadra & nagar haveli" "daman" "diu" "central delhi" "east delhi" "new delhi" "north delhi" "north east  delhi" "north west  delhi" "shahdara" "south delhi" "south east delhi" "south west  delhi" "west delhi" "north goa" "south goa" "ahmedabad" "amreli" "anand" "aravalli" "banaskantha  palanpur" "bharuch" "bhavnagar" "botad" "chhota udepur" "dahod" "dangs  ahwa" "devbhoomi dwarka" "gandhinagar" "gir somnath" "jamnagar" "junagadh" "kachchh" "kheda  nadiad" "mahisagar" "mehsana" "morbi" "narmada  rajpipla" "navsari" "panchmahal  godhra" "patan" "porbandar" "rajkot" "sabarkantha  himmatnagar" "surat" "surendranagar" "tapi  vyara" "vadodara" "valsad" "ambala" "bhiwani" "charkhi dadri" "faridabad" "fatehabad" "gurgaon" "hisar" "jhajjar" "jind" "kaithal" "karnal" "kurukshetra" "mahendragarh" "mewat" "palwal" "panchkula" "panipat" "rewari" "rohtak" "sirsa" "sonipat" "yamunanagar" "bilaspur" "chamba" "hamirpur" "kangra" "kinnaur" "kullu" "lahaul & spiti" "mandi" "shimla" "sirmaur  sirmour" "solan" "una" "anantnag" "bandipore" "baramulla" "budgam" "doda" "ganderbal" "jammu" "kargil" "kathua" "kishtwar" "kulgam" "kupwara" "leh" "poonch" "pulwama" "rajouri" "ramban" "reasi" "samba" "shopian" "srinagar" "udhampur" "bokaro" "chatra" "deoghar" "dhanbad" "dumka" "east singhbhum" "garhwa" "giridih" "godda" "gumla" "hazaribag" "jamtara" "khunti" "koderma" "latehar" "lohardaga" "pakur" "palamu" "ramgarh" "ranchi" "sahibganj" "seraikela-kharsawan" "simdega" "west singhbhum" "bagalkot" "ballari  bellary" "belagavi  belgaum" "bengaluru  bangalore rural" "bengaluru  bangalore urban" "bidar" "chamarajanagar" "chikballapur" "chikkamagaluru  chikmagalur" "chitradurga" "dakshina kannada" "davangere" "dharwad" "gadag" "hassan" "haveri" "kalaburagi  gulbarga" "kodagu" "kolar" "koppal" "mandya" "mysuru  mysore" "raichur" "ramanagara" "shivamogga  shimoga" "tumakuru  tumkur" "udupi" "uttara kannada  karwar" "vijayapura  bijapur" "yadgir" "alappuzha" "ernakulam" "idukki" "kannur" "kasaragod" "kollam" "kottayam" "kozhikode" "malappuram" "palakkad" "pathanamthitta" "thiruvananthapuram" "thrissur" "wayanad" "lakshadweep" "agar malwa" "alirajpur" "anuppur" "ashoknagar" "balaghat" "barwani" "betul" "bhind" "bhopal" "burhanpur" "chhatarpur" "chhindwara" "damoh" "datia" "dewas" "dhar" "dindori" "guna" "gwalior" "harda" "hoshangabad" "indore" "jabalpur" "jhabua" "katni" "khandwa" "khargone" "mandla" "mandsaur" "morena" "narsinghpur" "neemuch" "panna" "raisen" "rajgarh" "ratlam" "rewa" "sagar" "satna" "sehore" "seoni" "shahdol" "shajapur" "sheopur" "shivpuri" "sidhi" "singrauli" "tikamgarh" "ujjain" "umaria" "vidisha" "ahmednagar" "akola" "amravati" "aurangabad" "beed" "bhandara" "buldhana" "chandrapur" "dhule" "gadchiroli" "gondia" "hingoli" "jalgaon" "jalna" "kolhapur" "latur" "mumbai city" "mumbai suburban" "nagpur" "nanded" "nandurbar" "nashik" "osmanabad" "palghar" "parbhani" "pune" "raigad" "ratnagiri" "sangli" "satara" "sindhudurg" "solapur" "thane" "wardha" "washim" "yavatmal" "bishnupur" "chandel" "churachandpur" "imphal east" "imphal west" "jiribam" "kakching" "kamjong" "kangpokpi" "noney" "pherzawl" "senapati" "tamenglong" "tengnoupal" "thoubal" "ukhrul" "east garo hills" "east jaintia hills" "east khasi hills" "north garo hills" "ri bhoi" "south garo hills" "south west garo hills " "south west khasi hills" "west garo hills" "west jaintia hills" "west khasi hills" "aizawl" "champhai" "kolasib" "lawngtlai" "lunglei" "mamit" "saiha" "serchhip" "dimapur" "kiphire" "kohima" "longleng" "mokokchung" "mon" "peren" "phek" "tuensang" "wokha" "zunheboto" "angul" "balangir" "balasore" "bargarh" "bhadrak" "boudh" "cuttack" "deogarh" "dhenkanal" "gajapati" "ganjam" "jagatsinghapur" "jajpur" "jharsuguda" "kalahandi" "kandhamal" "kendrapara" "kendujhar  keonjhar" "khordha" "koraput" "malkangiri" "mayurbhanj" "nabarangpur" "nayagarh" "nuapada" "puri" "rayagada" "sambalpur" "sonepur" "sundargarh" "karaikal" "mahe" "pondicherry" "yanam" "amritsar" "barnala" "bathinda" "faridkot" "fatehgarh sahib" "fazilka" "ferozepur" "gurdaspur" "hoshiarpur" "jalandhar" "kapurthala" "ludhiana" "mansa" "moga" "muktsar" "nawanshahr  shahid bhagat singh nagar" "pathankot" "patiala" "rupnagar" "sahibzada ajit singh nagar  mohali" "sangrur" "tarn taran" "ajmer" "alwar" "banswara" "baran" "barmer" "bharatpur" "bhilwara" "bikaner" "bundi" "chittorgarh" "churu" "dausa" "dholpur" "dungarpur" "hanumangarh" "jaipur" "jaisalmer" "jalore" "jhalawar" "jhunjhunu" "jodhpur" "karauli" "kota" "nagaur" "pali" "pratapgarh" "rajsamand" "sawai madhopur" "sikar" "sirohi" "sri ganganagar" "tonk" "udaipur" "east sikkim" "north sikkim" "south sikkim" "west sikkim" "ariyalur" "chennai" "coimbatore" "cuddalore" "dharmapuri" "dindigul" "erode" "kanchipuram" "kanyakumari" "karur" "krishnagiri" "madurai" "nagapattinam" "namakkal" "nilgiris" "perambalur" "pudukkottai" "ramanathapuram" "salem" "sivaganga" "thanjavur" "theni" "thoothukudi  tuticorin" "tiruchirappalli" "tirunelveli" "tiruppur" "tiruvallur" "tiruvannamalai" "tiruvarur" "vellore" "viluppuram" "virudhunagar" "adilabad" "bhadradri kothagudem" "hyderabad" "jagtial" "jangaon" "jayashankar bhoopalpally" "jogulamba gadwal" "kamareddy" "karimnagar" "khammam" "komaram bheem asifabad" "mahabubabad" "mahabubnagar" "mancherial" "medak" "medchal" "nagarkurnool" "nalgonda" "nirmal" "nizamabad" "peddapalli" "rajanna sircilla" "rangareddy" "sangareddy" "siddipet" "suryapet" "vikarabad" "wanaparthy" "warangal  rural" "warangal  urban" "yadadri bhuvanagiri" "dhalai" "gomati" "khowai" "north tripura" "sepahijala" "south tripura" "unakoti" "west tripura" "almora" "bageshwar" "chamoli" "champawat" "dehradun" "haridwar" "nainital" "pauri garhwal" "pithoragarh" "rudraprayag" "tehri garhwal" "udham singh nagar" "uttarkashi" "agra" "aligarh" "allahabad" "ambedkar nagar" "amethi  chatrapati sahuji mahraj nagar" "amroha  j.p. nagar" "auraiya" "azamgarh" "baghpat" "bahraich" "ballia" "balrampur" "banda" "barabanki" "bareilly" "basti" "bhadohi" "bijnor" "budaun" "bulandshahr" "chandauli" "chitrakoot" "deoria" "etah" "etawah" "faizabad" "farrukhabad" "fatehpur" "firozabad" "gautam buddha nagar" "ghaziabad" "ghazipur" "gonda" "gorakhpur" "hamirpur" "hapur  panchsheel nagar" "hardoi" "hathras" "jalaun" "jaunpur" "jhansi" "kannauj" "kanpur dehat" "kanpur nagar" "kanshiram nagar  kasganj" "kaushambi" "kushinagar  padrauna" "lakhimpur - kheri" "lalitpur" "lucknow" "maharajganj" "mahoba" "mainpuri" "mathura" "mau" "meerut" "mirzapur" "moradabad" "muzaffarnagar" "pilibhit" "pratapgarh" "raebareli" "rampur" "saharanpur" "sambhal  bhim nagar" "sant kabir nagar" "shahjahanpur" "shamali  prabuddh nagar" "shravasti" "siddharth nagar" "sitapur" "sonbhadra" "sultanpur" "unnao" "varanasi" "alipurduar" "bankura" "birbhum" "burdwan  bardhaman" "cooch behar" "dakshin dinajpur  south dinajpur" "darjeeling" "hooghly" "howrah" "jalpaiguri" "kalimpong" "kolkata" "malda" "murshidabad" "nadia" "north 24 parganas" "paschim medinipur  west medinipur" "purba medinipur  east medinipur" "purulia" "south 24 parganas" "uttar dinajpur  north dinajpur" "andaman and nicobar island  ut" "andhra pradesh" "arunachal pradesh" "assam" "bihar" "chandigarh  ut" "chhattisgarh" "dadra and nagar haveli  ut" "daman and diu  ut" "delhi  nct" "goa" "gujarat" "haryana" "himachal pradesh" "jammu and kashmir" "jharkhand" "karnataka" "kerala" "lakshadweep  ut" "madhya pradesh" "maharashtra" "manipur" "meghalaya" "mizoram" "nagaland" "odisha" "puducherry  ut" "punjab" "rajasthan" "sikkim" "tamil nadu" "telangana" "tripura" "uttarakhand" "uttar pradesh" "west bengal"'
user_query = """SELECT queue.user_handle FROM
                  queue JOIN user
                  ON queue.user_handle = user.user_handle
                  WHERE
                  queue.tweet_status=1 AND user.lang='en' AND
                  user.total_tweet_count > 25 AND queue.query_id=1 AND
                      MATCH (user.location, user.time_zone, user.description) AGAINST ('{}' IN BOOLEAN MODE);""".format(final_query)

print(user_query)

users=sql.get_data(user_query)
"""
todo: use grequests instead of requests
"""

import requests
import os, sys
import mimetypes
import tqdm
from urllib.parse import urlparse
from os.path import splitext

home = os.path.abspath(os.path.dirname(__file__))
sys.path.append(home + '/../../')
from src.mysql_utils import MySqlUtils

sql = MySqlUtils('Twitter')
image_path = '/home/analytics/data_partition/images/images'

image_ext = [
    '.ani', '.bmp', '.cal', '.fax', '.gif', '.img', '.jbg', '.jpe', '.jpeg',
    '.jpg', '.mac', '.pbm', '.pcd', '.pcx', '.pct', '.pgm', '.png', '.ppm',
    '.psd', '.ras', '.tga', '.tiff', '.wmf'
]

final_query = '"india" "nicobar" "north and middle andaman" "south andaman" "anantapur" "chittoor" "east godavari" "guntur" "krishna" "kurnool" "prakasam" "srikakulam" "sri potti sriramulu nellore" "visakhapatnam" "vizianagaram" "west godavari" "ysr district, kadapa  cuddapah" "anjaw" "changlang" "dibang valley" "east kameng" "east siang" "kra daadi" "kurung kumey" "lohit" "longding" "lower dibang valley" "lower siang" "lower subansiri" "namsai" "papum pare" "siang" "tawang" "tirap" "upper siang" "upper subansiri" "west kameng" "west siang" "baksa" "barpeta" "biswanath" "bongaigaon" "cachar" "charaideo" "chirang" "darrang" "dhemaji" "dhubri" "dibrugarh" "dima hasao  north cachar hills" "goalpara" "golaghat" "hailakandi" "hojai" "jorhat" "kamrup" "kamrup metropolitan" "karbi anglong" "karimganj" "kokrajhar" "lakhimpur" "majuli" "morigaon" "nagaon" "nalbari" "sivasagar" "sonitpur" "south salamara-mankachar" "tinsukia" "udalguri" "west karbi anglong" "araria" "arwal" "aurangabad" "banka" "begusarai" "bhagalpur" "bhojpur" "buxar" "darbhanga" "east champaran  motihari" "gaya" "gopalganj" "jamui" "jehanabad" "kaimur  bhabua" "katihar" "khagaria" "kishanganj" "lakhisarai" "madhepura" "madhubani" "munger  monghyr" "muzaffarpur" "nalanda" "nawada" "patna" "purnia  purnea" "rohtas" "saharsa" "samastipur" "saran" "sheikhpura" "sheohar" "sitamarhi" "siwan" "supaul" "vaishali" "west champaran" "chandigarh" "balod" "baloda bazar" "balrampur" "bastar" "bemetara" "bijapur" "bilaspur" "dantewada  south bastar" "dhamtari" "durg" "gariyaband" "janjgir-champa" "jashpur" "kabirdham  kawardha" "kanker  north bastar" "kondagaon" "korba" "korea  koriya" "mahasamund" "mungeli" "narayanpur" "raigarh" "raipur" "rajnandgaon" "sukma" "surajpur  " "surguja" "dadra & nagar haveli" "daman" "diu" "central delhi" "east delhi" "new delhi" "north delhi" "north east  delhi" "north west  delhi" "shahdara" "south delhi" "south east delhi" "south west  delhi" "west delhi" "north goa" "south goa" "ahmedabad" "amreli" "anand" "aravalli" "banaskantha  palanpur" "bharuch" "bhavnagar" "botad" "chhota udepur" "dahod" "dangs  ahwa" "devbhoomi dwarka" "gandhinagar" "gir somnath" "jamnagar" "junagadh" "kachchh" "kheda  nadiad" "mahisagar" "mehsana" "morbi" "narmada  rajpipla" "navsari" "panchmahal  godhra" "patan" "porbandar" "rajkot" "sabarkantha  himmatnagar" "surat" "surendranagar" "tapi  vyara" "vadodara" "valsad" "ambala" "bhiwani" "charkhi dadri" "faridabad" "fatehabad" "gurgaon" "hisar" "jhajjar" "jind" "kaithal" "karnal" "kurukshetra" "mahendragarh" "mewat" "palwal" "panchkula" "panipat" "rewari" "rohtak" "sirsa" "sonipat" "yamunanagar" "bilaspur" "chamba" "hamirpur" "kangra" "kinnaur" "kullu" "lahaul & spiti" "mandi" "shimla" "sirmaur  sirmour" "solan" "una" "anantnag" "bandipore" "baramulla" "budgam" "doda" "ganderbal" "jammu" "kargil" "kathua" "kishtwar" "kulgam" "kupwara" "leh" "poonch" "pulwama" "rajouri" "ramban" "reasi" "samba" "shopian" "srinagar" "udhampur" "bokaro" "chatra" "deoghar" "dhanbad" "dumka" "east singhbhum" "garhwa" "giridih" "godda" "gumla" "hazaribag" "jamtara" "khunti" "koderma" "latehar" "lohardaga" "pakur" "palamu" "ramgarh" "ranchi" "sahibganj" "seraikela-kharsawan" "simdega" "west singhbhum" "bagalkot" "ballari  bellary" "belagavi  belgaum" "bengaluru  bangalore rural" "bengaluru  bangalore urban" "bidar" "chamarajanagar" "chikballapur" "chikkamagaluru  chikmagalur" "chitradurga" "dakshina kannada" "davangere" "dharwad" "gadag" "hassan" "haveri" "kalaburagi  gulbarga" "kodagu" "kolar" "koppal" "mandya" "mysuru  mysore" "raichur" "ramanagara" "shivamogga  shimoga" "tumakuru  tumkur" "udupi" "uttara kannada  karwar" "vijayapura  bijapur" "yadgir" "alappuzha" "ernakulam" "idukki" "kannur" "kasaragod" "kollam" "kottayam" "kozhikode" "malappuram" "palakkad" "pathanamthitta" "thiruvananthapuram" "thrissur" "wayanad" "lakshadweep" "agar malwa" "alirajpur" "anuppur" "ashoknagar" "balaghat" "barwani" "betul" "bhind" "bhopal" "burhanpur" "chhatarpur" "chhindwara" "damoh" "datia" "dewas" "dhar" "dindori" "guna" "gwalior" "harda" "hoshangabad" "indore" "jabalpur" "jhabua" "katni" "khandwa" "khargone" "mandla" "mandsaur" "morena" "narsinghpur" "neemuch" "panna" "raisen" "rajgarh" "ratlam" "rewa" "sagar" "satna" "sehore" "seoni" "shahdol" "shajapur" "sheopur" "shivpuri" "sidhi" "singrauli" "tikamgarh" "ujjain" "umaria" "vidisha" "ahmednagar" "akola" "amravati" "aurangabad" "beed" "bhandara" "buldhana" "chandrapur" "dhule" "gadchiroli" "gondia" "hingoli" "jalgaon" "jalna" "kolhapur" "latur" "mumbai city" "mumbai suburban" "nagpur" "nanded" "nandurbar" "nashik" "osmanabad" "palghar" "parbhani" "pune" "raigad" "ratnagiri" "sangli" "satara" "sindhudurg" "solapur" "thane" "wardha" "washim" "yavatmal" "bishnupur" "chandel" "churachandpur" "imphal east" "imphal west" "jiribam" "kakching" "kamjong" "kangpokpi" "noney" "pherzawl" "senapati" "tamenglong" "tengnoupal" "thoubal" "ukhrul" "east garo hills" "east jaintia hills" "east khasi hills" "north garo hills" "ri bhoi" "south garo hills" "south west garo hills " "south west khasi hills" "west garo hills" "west jaintia hills" "west khasi hills" "aizawl" "champhai" "kolasib" "lawngtlai" "lunglei" "mamit" "saiha" "serchhip" "dimapur" "kiphire" "kohima" "longleng" "mokokchung" "mon" "peren" "phek" "tuensang" "wokha" "zunheboto" "angul" "balangir" "balasore" "bargarh" "bhadrak" "boudh" "cuttack" "deogarh" "dhenkanal" "gajapati" "ganjam" "jagatsinghapur" "jajpur" "jharsuguda" "kalahandi" "kandhamal" "kendrapara" "kendujhar  keonjhar" "khordha" "koraput" "malkangiri" "mayurbhanj" "nabarangpur" "nayagarh" "nuapada" "puri" "rayagada" "sambalpur" "sonepur" "sundargarh" "karaikal" "mahe" "pondicherry" "yanam" "amritsar" "barnala" "bathinda" "faridkot" "fatehgarh sahib" "fazilka" "ferozepur" "gurdaspur" "hoshiarpur" "jalandhar" "kapurthala" "ludhiana" "mansa" "moga" "muktsar" "nawanshahr  shahid bhagat singh nagar" "pathankot" "patiala" "rupnagar" "sahibzada ajit singh nagar  mohali" "sangrur" "tarn taran" "ajmer" "alwar" "banswara" "baran" "barmer" "bharatpur" "bhilwara" "bikaner" "bundi" "chittorgarh" "churu" "dausa" "dholpur" "dungarpur" "hanumangarh" "jaipur" "jaisalmer" "jalore" "jhalawar" "jhunjhunu" "jodhpur" "karauli" "kota" "nagaur" "pali" "pratapgarh" "rajsamand" "sawai madhopur" "sikar" "sirohi" "sri ganganagar" "tonk" "udaipur" "east sikkim" "north sikkim" "south sikkim" "west sikkim" "ariyalur" "chennai" "coimbatore" "cuddalore" "dharmapuri" "dindigul" "erode" "kanchipuram" "kanyakumari" "karur" "krishnagiri" "madurai" "nagapattinam" "namakkal" "nilgiris" "perambalur" "pudukkottai" "ramanathapuram" "salem" "sivaganga" "thanjavur" "theni" "thoothukudi  tuticorin" "tiruchirappalli" "tirunelveli" "tiruppur" "tiruvallur" "tiruvannamalai" "tiruvarur" "vellore" "viluppuram" "virudhunagar" "adilabad" "bhadradri kothagudem" "hyderabad" "jagtial" "jangaon" "jayashankar bhoopalpally" "jogulamba gadwal" "kamareddy" "karimnagar" "khammam" "komaram bheem asifabad" "mahabubabad" "mahabubnagar" "mancherial" "medak" "medchal" "nagarkurnool" "nalgonda" "nirmal" "nizamabad" "peddapalli" "rajanna sircilla" "rangareddy" "sangareddy" "siddipet" "suryapet" "vikarabad" "wanaparthy" "warangal  rural" "warangal  urban" "yadadri bhuvanagiri" "dhalai" "gomati" "khowai" "north tripura" "sepahijala" "south tripura" "unakoti" "west tripura" "almora" "bageshwar" "chamoli" "champawat" "dehradun" "haridwar" "nainital" "pauri garhwal" "pithoragarh" "rudraprayag" "tehri garhwal" "udham singh nagar" "uttarkashi" "agra" "aligarh" "allahabad" "ambedkar nagar" "amethi  chatrapati sahuji mahraj nagar" "amroha  j.p. nagar" "auraiya" "azamgarh" "baghpat" "bahraich" "ballia" "balrampur" "banda" "barabanki" "bareilly" "basti" "bhadohi" "bijnor" "budaun" "bulandshahr" "chandauli" "chitrakoot" "deoria" "etah" "etawah" "faizabad" "farrukhabad" "fatehpur" "firozabad" "gautam buddha nagar" "ghaziabad" "ghazipur" "gonda" "gorakhpur" "hamirpur" "hapur  panchsheel nagar" "hardoi" "hathras" "jalaun" "jaunpur" "jhansi" "kannauj" "kanpur dehat" "kanpur nagar" "kanshiram nagar  kasganj" "kaushambi" "kushinagar  padrauna" "lakhimpur - kheri" "lalitpur" "lucknow" "maharajganj" "mahoba" "mainpuri" "mathura" "mau" "meerut" "mirzapur" "moradabad" "muzaffarnagar" "pilibhit" "pratapgarh" "raebareli" "rampur" "saharanpur" "sambhal  bhim nagar" "sant kabir nagar" "shahjahanpur" "shamali  prabuddh nagar" "shravasti" "siddharth nagar" "sitapur" "sonbhadra" "sultanpur" "unnao" "varanasi" "alipurduar" "bankura" "birbhum" "burdwan  bardhaman" "cooch behar" "dakshin dinajpur  south dinajpur" "darjeeling" "hooghly" "howrah" "jalpaiguri" "kalimpong" "kolkata" "malda" "murshidabad" "nadia" "north 24 parganas" "paschim medinipur  west medinipur" "purba medinipur  east medinipur" "purulia" "south 24 parganas" "uttar dinajpur  north dinajpur" "andaman and nicobar island  ut" "andhra pradesh" "arunachal pradesh" "assam" "bihar" "chandigarh  ut" "chhattisgarh" "dadra and nagar haveli  ut" "daman and diu  ut" "delhi  nct" "goa" "gujarat" "haryana" "himachal pradesh" "jammu and kashmir" "jharkhand" "karnataka" "kerala" "lakshadweep  ut" "madhya pradesh" "maharashtra" "manipur" "meghalaya" "mizoram" "nagaland" "odisha" "puducherry  ut" "punjab" "rajasthan" "sikkim" "tamil nadu" "telangana" "tripura" "uttarakhand" "uttar pradesh" "west bengal"'
user_query = """SELECT queue.user_handle FROM
                  queue JOIN user
                  ON queue.user_handle = user.user_handle
                  WHERE
                  queue.tweet_status=1 AND user.lang='en' AND
                  user.total_tweet_count > 25 AND queue.query_id=1 AND