def dbconversation(tablename):
    #creates edges from OP to mentioned
    graph = nx.DiGraph()
    #Plot following/follower network
    #restrict to those with 3 tweets or more
    mygetter = DBTweetGetter(None, None)
    con = lite.connect("tweetsdb.db")
    cur = con.cursor()
    mintime = 1358090418
    cur.execute("SELECT DISTINCT ScreenName FROM " + tablename +
                " WHERE ConvertedTime>" + str(mintime) + " AND IsRetweet=0")
    l = cur.fetchall()

    users = []
    for item in l:
        users.append(item[0].lower())
    cur.execute("SELECT Tweet, ScreenName FROM " + tablename +
                " WHERE ConvertedTime>" + str(mintime) + " AND IsRetweet=0")
    d = cur.fetchall()
    lz = len(d)
    z = 1
    for item in d:
        #print "Tweet " +str(z)+"/"+str(lz)
        z += 1
        if ("@" in item[0].lower()) and ("rt:" not in item[0].lower()):
            #Continue until character not in valid set, then check if is user in set
            #First count number of @s
            names = []
            c = item[0].lower().count("@")
            start = 0
            for i in range(c):
                s = item[0].lower().index("@", start)
                start = s + 1
                k = 0
                try:
                    j = item[0][start]
                except:
                    print item[0]
                while j in valid_characters:
                    k += 1
                    try:
                        j = item[0][start + k]
                    except:
                        j = "/"
                names.append(item[0][start:start + k].lower())

            for name in names:
                if name.lower() in users and name.lower() != item[1].lower():
                    try:
                        graph[item[1].lower()][name.lower()]['weight'] += 1
                    except:
                        graph.add_edge(item[1].lower(), name.lower(), weight=1)

                    #graph.add_edge(item[1].lower(), name.lower())

    print "Built graph"
    nx.write_gml(graph, "newconv" + tablename + "nortdir.gml")
    print "Wrote graph"
Beispiel #2
0
from classtweetgetter import DBTweetGetter
#from classtweetreader import DBTweetReader

import datetime
tags = [
    "IPCC", "UNFCCC", "AR5", "WGII", "WGIII", "LTFchat", "Pages2k", "Pages",
    "HadCRUT", "GISS"
]

for name in tags:
    myTweets = DBTweetGetter("IPCCdb.db", name)
    myTweets.query2("#" + name, 1600000, False)
    #Write log file
    # with open("log"+name+".txt", "a") as myfile:
    #     now = datetime.datetime.now()
    #     timestr=now.strftime("%d_%m_%H%M")
    #     myfile.write(timestr+"\n")
    #     readTweet=DBTweetReader("IPCCdb.db", tablename)
    #     print "Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))
    #     myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))+"\n")
    #     udict=readTweet.getUserDict(tablename)
    #     print "Total number of users: " + str(len(udict.keys()))
    #     myfile.write("Total number of users: " + str(len(udict.keys()))+"\n")
    #     for item in udict.items():
    #         if item[1]<3:
    #             del udict[item[0]]
    #     print "Total number of users with 3 tweets or more: " + str(len(udict.keys()))
    # myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n"
import sqlite3 as lite
import sys
from classtweetgetter import DBTweetGetter

mytweetgetter = DBTweetGetter(None, None)

con = lite.connect("tweetsdb.db")
cur = con.cursor()
ucon = lite.connect("userdb.db")
ucur = ucon.cursor()

tables = [
    'htclimatechange', 'htclimate', 'htglobalwarming', 'ClimateChange',
    'GlobalWarming'
]

names = []
for item in tables:
    cur.execute("SELECT DISTINCT ScreenName FROM " + item)
    a = cur.fetchall()
    for item2 in a:
        names.append(item2[0])

already = []
ucur.execute("SELECT ScreenName FROM usermap")
b = ucur.fetchall()
for item in b:
    already.append(item[0])

deleted = []
i = 0
from classtweetgetter import DBTweetGetter
from classtweetreader import DBTweetReader

import datetime
tablename = "htclimate"
queryname = '#climate'
fname = "#climate"

myTweets = DBTweetGetter("tweetsdb.db", tablename)
myTweets.query(queryname, 160000, False)
#Write log file
with open("dblog" + fname + ".txt", "a") as myfile:
    now = datetime.datetime.now()
    timestr = now.strftime("%d_%m_%H%M")
    myfile.write(timestr + "\n")
    readTweet = DBTweetReader("tweetsdb.db", tablename)
    print "Total number of tweets: " + str(
        readTweet.getNumberOfTweets(tablename))
    myfile.write("Total number of tweets: " +
                 str(readTweet.getNumberOfTweets(tablename)) + "\n")
    udict = readTweet.getUserDict(tablename)
    print "Total number of users: " + str(len(udict.keys()))
    myfile.write("Total number of users: " + str(len(udict.keys())) + "\n")
    for item in udict.items():
        if item[1] < 3:
            del udict[item[0]]
    print "Total number of users with 3 tweets or more: " + str(
        len(udict.keys()))
    myfile.write("Total number of users with 3 tweets or more: " +
                 str(len(udict.keys())) + "\n")
import sqlite3 as lite
import sys
import numpy as np
from classtweetgetter import DBTweetGetter
from time import sleep


mygetter=DBTweetGetter(None,None)
con=lite.connect("tweetsdb.db")
cur=con.cursor()

ucon=lite.connect("userdb.db")
ucur=ucon.cursor()
crawlers=[]
chainlengths=[]
nsame=[]
noded={}
chainlfile=open("chaindata.txt","w")
nsamefile=open("nsamedata.txt","w")
class chainCrawler(object):
    #note this method will repeat chains if there is V structure, should be minimal effect
    def __init__(self, node, n):
        self.n=n
        self.node=node
        self.stopwalk=False

    def walk(self):
        while self.stopwalk==False:
            self.step()
        return 0
    def step(self):
from classtweetgetter import DBTweetGetter
from classtweetreader import DBTweetReader

import datetime

myTweets=DBTweetGetter("tweetsdb.db", "htclimatechange")
myTweets.query('#climatechange',160000, False)
#Write log file
with open("dblog#climatechange.txt", "a") as myfile:
    now = datetime.datetime.now()
    timestr=now.strftime("%d_%m_%H%M")
    myfile.write(timestr+"\n")
    readTweet=DBTweetReader("tweetsdb.db", "htclimatechange")
    print "Total number of tweets: " + str(readTweet.getNumberOfTweets("htclimatechange"))
    myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets("htclimatechange"))+"\n")
    udict=readTweet.getUserDict("htclimatechange")
    print "Total number of users: " + str(len(udict.keys()))
    myfile.write("Total number of users: " + str(len(udict.keys()))+"\n")
    for item in udict.items():
        if item[1]<3:
            del udict[item[0]]
    print "Total number of users with 3 tweets or more: " + str(len(udict.keys()))
    myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n")

#modify this script
# with open("gettweetsClimateChange.py", "r") as myfile:
#     mytext=myfile.read()

# st=mytext.index("myTweets=TweetGetter")+22
# end=mytext.index('")', st)
def dbplotffnetwork():
    graph = nx.DiGraph()
    #Plot following/follower network
    #restrict to those with 3 tweets or more
    mintime = 1358090418
    maxtime = 1363963163
    mygetter = DBTweetGetter(None, None)
    con = lite.connect("tweetsdb.db")
    cur = con.cursor()
    ucon = lite.connect("userdb.db")
    ucur = ucon.cursor()
    tusers = []
    users = []

    cur.execute(
        "SELECT ScreenName FROM htglobalwarming WHERE ConvertedTime > " +
        str(mintime) + " AND ConvertedTime < " + str(maxtime) +
        " COLLATE NOCASE")
    temp = cur.fetchall()
    for item in temp:
        tusers.append(item[0].lower())
    for item in tusers:
        if not (item in users):
            if tusers.count(item) > 7:
                users.append(item)

    print len(users)

    # cur.execute("SELECT ScreenName FROM htclimatechange WHERE ConvertedTime > "+str(mintime)+" AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE")
    # temp=cur.fetchall()
    # tusers=[]
    # for item in temp:
    #     tusers.append(item[0].lower())
    # for item in tusers:
    #     if not (item in users):
    #         if tusers.count(item)>29:
    #             users.append(item)

    # print len(users)

    # cur.execute("SELECT ScreenName FROM htagw WHERE ConvertedTime > "+str(mintime) + " COLLATE NOCASE")
    # temp=cur.fetchall()
    # tusers=[]
    # for item in temp:
    #     tusers.append(item[0].lower())
    # for item in tusers:
    #     if not (item in users):
    #         if tusers.count(item)>2:
    #             users.append(item)

    # print len(users)

    #aim for 380
    #sys.exit("Hammertime")
    i = 0
    try:
        users.remove("undercoverzen")
        users.remove("jivelad")
        users.remove("anabananazavala")
        #TODO Formalise this
    except:
        pass
    for user in users:
        print "User " + str(i) + "/" + str(len(users))
        i += 1
        #For each user check which other users are in friends, followers
        ucur.execute("SELECT FriendId FROM friends WHERE ScreenName='" +
                     user.lower() + "' COLLATE NOCASE")
        frl = []
        temp = ucur.fetchall()
        skip = False
        if len(temp) == 0:
            #get friends
            print "Downloading friends for " + user.lower()
            friendslist = mygetter.getFriends(user.lower(), [], -1)
            if friendslist != "FAIL":
                for friend in friendslist:
                    ucur.execute("INSERT INTO friends VALUES('" +
                                 user.lower() + "'," + str(friend) + ")")
                frl = friendslist
            else:
                skip = True
                try:
                    users.remove(user.lower())
                except:
                    pass
            sleep(10)

        else:
            for item in temp:
                frl.append(item[0])

        ucur.execute("SELECT FollowerId FROM followers WHERE ScreenName='" +
                     user + "' COLLATE NOCASE")
        fol = []
        temp = ucur.fetchall()
        skip = False
        if len(temp) == 0:
            #get friends
            print "Downloading followers for " + user.lower()
            followerslist = mygetter.getFollowers(user.lower(), [], -1)
            if followerslist != "FAIL":
                for follower in followerslist:
                    ucur.execute("INSERT INTO followers VALUES('" +
                                 user.lower() + "'," + str(follower) + ")")
                fol = followerslist
            else:
                skip = True
                try:
                    users.remove(user.lower())
                except:
                    pass
            sleep(10)

        else:
            for item in temp:
                fol.append(item[0])

        ucon.commit()
        if skip == False:
            graph.add_node(user.lower())
            for other in users:
                skip2 = False
                ucur.execute("SELECT UserId FROM usermap WHERE ScreenName='" +
                             other.lower() + "' COLLATE NOCASE")
                temp = ucur.fetchall()
                if len(temp) == 0:
                    #get ID from web
                    print "Downloading userid for " + other.lower()
                    x = mygetter.getIDfromUser(other.lower())
                    if x != "FAIL":
                        ucur.execute("INSERT INTO usermap VALUES('" +
                                     other.lower() + "'," + x + ")")
                        sid = x
                        ucon.commit()
                    else:
                        try:
                            users.remove(other.lower())
                        except:
                            pass
                        skip2 = True
                    sleep(10)
                else:
                    sid = temp[0][0]
                if skip2 == False:
                    if sid in fol:
                        graph.add_edge(other.lower(), user.lower())
                    if sid in frl:
                        graph.add_edge(user.lower(), other.lower())

    print "Built graph"
    nx.write_gml(graph, "newfriendfollowerhtccgt29.gml")
    ucon.commit()
    con.close()
    ucon.close()
    print "Wrote graph"
from classtweetgetter import DBTweetGetter
from classtweetreader import DBTweetReader

import datetime
tablename="htclimaterealists"
queryname='#climaterealists'
fname="#climaterealists"

myTweets=DBTweetGetter("tweetsdb.db", tablename)
myTweets.query(queryname,3160000, False)
#Write log file
with open("dblog"+fname+".txt", "a") as myfile:
    now = datetime.datetime.now()
    timestr=now.strftime("%d_%m_%H%M")
    myfile.write(timestr+"\n")
    readTweet=DBTweetReader("tweetsdb.db", tablename)
    print "Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))
    myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))+"\n")
    udict=readTweet.getUserDict(tablename)
    print "Total number of users: " + str(len(udict.keys()))
    myfile.write("Total number of users: " + str(len(udict.keys()))+"\n")
    for item in udict.items():
        if item[1]<3:
            del udict[item[0]]
    print "Total number of users with 3 tweets or more: " + str(len(udict.keys()))
    myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n")

#modify this script
# with open("gettweetsClimateChange.py", "r") as myfile:
#     mytext=myfile.read()
        s = s.replace(r"'", r"\'")

    s = s.replace(r"\'", r"''")
    s = "'" + s + "'"
    return s


tcon = lite.connect("tweetsdb.db")
tcur = tcon.cursor()
mintime = 1358090418
con = lite.connect("diffusersetcdb.db")
cur = con.cursor()
cur.execute("CREATE TABLE tweets(ScreenName TEXT, Tweet TEXT)")
cur.execute("CREATE TABLE descriptions(ScreenName TEXT, Description TEXT)")

mytweetgetter = DBTweetGetter(None, None)

userlist = [
    'bottonT', 'bethanyjayne_o9', 'james12h', 'marclanders', 'ss0alexander',
    'jholuvu', 'lucid_serenity', 'aashishmusic', 'lachecard', 'ombuscool',
    'nurdan_dirik', 'orlandovips', 'cochran_sarah', 'erdemturgan',
    'wowpitbull', 'sonymusicmexico', 'juan20da', 'mountainspop', 'ldesherl',
    'hayleysellick', 'viewpointmelb', 'peped6', '0hastronaut', 'kateescorey',
    'tiamauli', 'ridwaancn', 'obsessionwill', '7daniel_ronaldo',
    'gatewaypundit', 'keylacabanillas', 's_cebi_o', 'tarndeep_virdi',
    'pauldoogood', 'aem4444', 'larindaguedes', 'mbleez', 'june_stoyer',
    'eifever', 'anshumminhas', 'joseaparicio90', 'just2opine', 'motiffmusic',
    'juanmaganmusic', 'drsarahsviews', 'annedinning', 'omilynn', 'c_harris82',
    'kencaldeira', 'sensato', 'acminaj95', 'weez100_', 'yungrugga_',
    'climate_sceptic', 'serega_markov', 'pitbull', 'ladyseastar',
    'robcarrollmusic', 'pecaito1', 'fanspitbull', 'yoloswag_1d', 'juufaria',
from classtweetgetter import DBTweetGetter
from classtweetreader import DBTweetReader

import datetime

myTweets = DBTweetGetter("tweetsdb.db", "htclimatechange")
myTweets.query('#climatechange', 160000, False)
#Write log file
with open("dblog#climatechange.txt", "a") as myfile:
    now = datetime.datetime.now()
    timestr = now.strftime("%d_%m_%H%M")
    myfile.write(timestr + "\n")
    readTweet = DBTweetReader("tweetsdb.db", "htclimatechange")
    print "Total number of tweets: " + str(
        readTweet.getNumberOfTweets("htclimatechange"))
    myfile.write("Total number of tweets: " +
                 str(readTweet.getNumberOfTweets("htclimatechange")) + "\n")
    udict = readTweet.getUserDict("htclimatechange")
    print "Total number of users: " + str(len(udict.keys()))
    myfile.write("Total number of users: " + str(len(udict.keys())) + "\n")
    for item in udict.items():
        if item[1] < 3:
            del udict[item[0]]
    print "Total number of users with 3 tweets or more: " + str(
        len(udict.keys()))
    myfile.write("Total number of users with 3 tweets or more: " +
                 str(len(udict.keys())) + "\n")

#modify this script
# with open("gettweetsClimateChange.py", "r") as myfile:
#     mytext=myfile.read()
from classtweetgetter import DBTweetGetter
#from classtweetreader import DBTweetReader

import datetime
tags=["IPCC","UNFCCC","AR5","WGII","WGIII","LTFchat","Pages2k","Pages","HadCRUT","GISS"]


for name in tags:
    myTweets=DBTweetGetter("IPCCdb.db", name)
    myTweets.query2("#"+name,1600000, False)
    #Write log file
    # with open("log"+name+".txt", "a") as myfile:
    #     now = datetime.datetime.now()
    #     timestr=now.strftime("%d_%m_%H%M")
    #     myfile.write(timestr+"\n")
    #     readTweet=DBTweetReader("IPCCdb.db", tablename)
    #     print "Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))
    #     myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))+"\n")
    #     udict=readTweet.getUserDict(tablename)
    #     print "Total number of users: " + str(len(udict.keys()))
    #     myfile.write("Total number of users: " + str(len(udict.keys()))+"\n")
    #     for item in udict.items():
    #         if item[1]<3:
    #             del udict[item[0]]
    #     print "Total number of users with 3 tweets or more: " + str(len(udict.keys()))
    # myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n"
def dbplotffnetwork():
    graph=nx.DiGraph()
    #Plot following/follower network
    #restrict to those with 3 tweets or more
    mintime=1358090418
    maxtime=1363963163
    mygetter=DBTweetGetter(None,None)
    con = lite.connect("tweetsdb.db")
    cur=con.cursor()
    ucon = lite.connect("userdb.db")
    ucur=ucon.cursor()
    tusers=[]
    users=[]
    
    cur.execute("SELECT ScreenName FROM htglobalwarming WHERE ConvertedTime > "+str(mintime) +" AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE")
    temp=cur.fetchall()
    for item in temp:
        tusers.append(item[0].lower())
    for item in tusers:
        if not (item in users):
            if tusers.count(item)>7:
                users.append(item)

    print len(users)


    # cur.execute("SELECT ScreenName FROM htclimatechange WHERE ConvertedTime > "+str(mintime)+" AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE")
    # temp=cur.fetchall()
    # tusers=[]
    # for item in temp:
    #     tusers.append(item[0].lower())
    # for item in tusers:
    #     if not (item in users):
    #         if tusers.count(item)>29:
    #             users.append(item)

    # print len(users)


    # cur.execute("SELECT ScreenName FROM htagw WHERE ConvertedTime > "+str(mintime) + " COLLATE NOCASE")
    # temp=cur.fetchall()
    # tusers=[]
    # for item in temp:
    #     tusers.append(item[0].lower())
    # for item in tusers:
    #     if not (item in users):
    #         if tusers.count(item)>2:
    #             users.append(item)

    # print len(users)


    #aim for 380
    #sys.exit("Hammertime")
    i=0
    try:
        users.remove("undercoverzen")
        users.remove("jivelad")
        users.remove("anabananazavala")
        #TODO Formalise this
    except:
        pass
    for user in users:
        print "User " + str(i)+"/"+str(len(users))
        i+=1
        #For each user check which other users are in friends, followers
        ucur.execute("SELECT FriendId FROM friends WHERE ScreenName='"+user.lower()+"' COLLATE NOCASE")
        frl=[]
        temp=ucur.fetchall()
        skip=False
        if len(temp)==0:
            #get friends
            print "Downloading friends for " + user.lower()
            friendslist=mygetter.getFriends(user.lower(), [], -1)
            if friendslist!="FAIL":
                for friend in friendslist:
                    ucur.execute("INSERT INTO friends VALUES('" + user.lower() + "'," + str(friend) + ")" )
                frl=friendslist
            else:
                skip=True
                try:
                    users.remove(user.lower())
                except:
                    pass
            sleep(10)
                
        else:
            for item in temp:
                frl.append(item[0])


        ucur.execute("SELECT FollowerId FROM followers WHERE ScreenName='"+user+"' COLLATE NOCASE")
        fol=[]
        temp=ucur.fetchall()
        skip=False
        if len(temp)==0:
            #get friends
            print "Downloading followers for " + user.lower()
            followerslist=mygetter.getFollowers(user.lower(), [], -1)
            if followerslist!="FAIL":
                for follower in followerslist:
                    ucur.execute("INSERT INTO followers VALUES('" + user.lower() + "'," + str(follower) + ")" )
                fol=followerslist
            else:
                skip=True
                try:
                    users.remove(user.lower())
                except:
                    pass
            sleep(10)
                
        else:
            for item in temp:
                fol.append(item[0])

        ucon.commit()
        if skip==False:
            graph.add_node(user.lower())
            for other in users:
                skip2=False
                ucur.execute("SELECT UserId FROM usermap WHERE ScreenName='"+other.lower()+"' COLLATE NOCASE")
                temp=ucur.fetchall()
                if len(temp)==0:
                    #get ID from web
                    print "Downloading userid for " + other.lower()
                    x=mygetter.getIDfromUser(other.lower())
                    if x!="FAIL":
                        ucur.execute("INSERT INTO usermap VALUES('" +other.lower()+ "'," + x + ")" )
                        sid=x
                        ucon.commit()
                    else:
                        try:
                            users.remove(other.lower())
                        except:
                            pass
                        skip2=True
                    sleep(10)
                else:
                    sid=temp[0][0]
                if skip2==False:
                    if sid in fol:
                        graph.add_edge(other.lower(), user.lower())
                    if sid in frl:
                        graph.add_edge(user.lower(), other.lower())

    print "Built graph"
    nx.write_gml(graph, "newfriendfollowerhtccgt29.gml")
    ucon.commit()
    con.close()
    ucon.close()
    print "Wrote graph"
import sqlite3 as lite
import sys
from classtweetgetter import DBTweetGetter

mytweetgetter=DBTweetGetter(None, None)

con=lite.connect("tweetsdb.db")
cur=con.cursor()
ucon=lite.connect("userdb.db")
ucur=ucon.cursor()


tables=['htclimatechange','htclimate','htglobalwarming','ClimateChange','GlobalWarming']

names=[]
for item in tables:
    cur.execute("SELECT DISTINCT ScreenName FROM " + item)
    a=cur.fetchall()
    for item2 in a:
        names.append(item2[0])


already=[]
ucur.execute("SELECT ScreenName FROM usermap")
b=ucur.fetchall()
for item in b:
    already.append(item[0])

deleted=[]
i=0
l=len(names)