def dbconversation(tablename): #creates edges from OP to mentioned graph = nx.DiGraph() #Plot following/follower network #restrict to those with 3 tweets or more mygetter = DBTweetGetter(None, None) con = lite.connect("tweetsdb.db") cur = con.cursor() mintime = 1358090418 cur.execute("SELECT DISTINCT ScreenName FROM " + tablename + " WHERE ConvertedTime>" + str(mintime) + " AND IsRetweet=0") l = cur.fetchall() users = [] for item in l: users.append(item[0].lower()) cur.execute("SELECT Tweet, ScreenName FROM " + tablename + " WHERE ConvertedTime>" + str(mintime) + " AND IsRetweet=0") d = cur.fetchall() lz = len(d) z = 1 for item in d: #print "Tweet " +str(z)+"/"+str(lz) z += 1 if ("@" in item[0].lower()) and ("rt:" not in item[0].lower()): #Continue until character not in valid set, then check if is user in set #First count number of @s names = [] c = item[0].lower().count("@") start = 0 for i in range(c): s = item[0].lower().index("@", start) start = s + 1 k = 0 try: j = item[0][start] except: print item[0] while j in valid_characters: k += 1 try: j = item[0][start + k] except: j = "/" names.append(item[0][start:start + k].lower()) for name in names: if name.lower() in users and name.lower() != item[1].lower(): try: graph[item[1].lower()][name.lower()]['weight'] += 1 except: graph.add_edge(item[1].lower(), name.lower(), weight=1) #graph.add_edge(item[1].lower(), name.lower()) print "Built graph" nx.write_gml(graph, "newconv" + tablename + "nortdir.gml") print "Wrote graph"
from classtweetgetter import DBTweetGetter #from classtweetreader import DBTweetReader import datetime tags = [ "IPCC", "UNFCCC", "AR5", "WGII", "WGIII", "LTFchat", "Pages2k", "Pages", "HadCRUT", "GISS" ] for name in tags: myTweets = DBTweetGetter("IPCCdb.db", name) myTweets.query2("#" + name, 1600000, False) #Write log file # with open("log"+name+".txt", "a") as myfile: # now = datetime.datetime.now() # timestr=now.strftime("%d_%m_%H%M") # myfile.write(timestr+"\n") # readTweet=DBTweetReader("IPCCdb.db", tablename) # print "Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename)) # myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))+"\n") # udict=readTweet.getUserDict(tablename) # print "Total number of users: " + str(len(udict.keys())) # myfile.write("Total number of users: " + str(len(udict.keys()))+"\n") # for item in udict.items(): # if item[1]<3: # del udict[item[0]] # print "Total number of users with 3 tweets or more: " + str(len(udict.keys())) # myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n"
import sqlite3 as lite import sys from classtweetgetter import DBTweetGetter mytweetgetter = DBTweetGetter(None, None) con = lite.connect("tweetsdb.db") cur = con.cursor() ucon = lite.connect("userdb.db") ucur = ucon.cursor() tables = [ 'htclimatechange', 'htclimate', 'htglobalwarming', 'ClimateChange', 'GlobalWarming' ] names = [] for item in tables: cur.execute("SELECT DISTINCT ScreenName FROM " + item) a = cur.fetchall() for item2 in a: names.append(item2[0]) already = [] ucur.execute("SELECT ScreenName FROM usermap") b = ucur.fetchall() for item in b: already.append(item[0]) deleted = [] i = 0
from classtweetgetter import DBTweetGetter from classtweetreader import DBTweetReader import datetime tablename = "htclimate" queryname = '#climate' fname = "#climate" myTweets = DBTweetGetter("tweetsdb.db", tablename) myTweets.query(queryname, 160000, False) #Write log file with open("dblog" + fname + ".txt", "a") as myfile: now = datetime.datetime.now() timestr = now.strftime("%d_%m_%H%M") myfile.write(timestr + "\n") readTweet = DBTweetReader("tweetsdb.db", tablename) print "Total number of tweets: " + str( readTweet.getNumberOfTweets(tablename)) myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename)) + "\n") udict = readTweet.getUserDict(tablename) print "Total number of users: " + str(len(udict.keys())) myfile.write("Total number of users: " + str(len(udict.keys())) + "\n") for item in udict.items(): if item[1] < 3: del udict[item[0]] print "Total number of users with 3 tweets or more: " + str( len(udict.keys())) myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys())) + "\n")
import sqlite3 as lite import sys import numpy as np from classtweetgetter import DBTweetGetter from time import sleep mygetter=DBTweetGetter(None,None) con=lite.connect("tweetsdb.db") cur=con.cursor() ucon=lite.connect("userdb.db") ucur=ucon.cursor() crawlers=[] chainlengths=[] nsame=[] noded={} chainlfile=open("chaindata.txt","w") nsamefile=open("nsamedata.txt","w") class chainCrawler(object): #note this method will repeat chains if there is V structure, should be minimal effect def __init__(self, node, n): self.n=n self.node=node self.stopwalk=False def walk(self): while self.stopwalk==False: self.step() return 0 def step(self):
from classtweetgetter import DBTweetGetter from classtweetreader import DBTweetReader import datetime myTweets=DBTweetGetter("tweetsdb.db", "htclimatechange") myTweets.query('#climatechange',160000, False) #Write log file with open("dblog#climatechange.txt", "a") as myfile: now = datetime.datetime.now() timestr=now.strftime("%d_%m_%H%M") myfile.write(timestr+"\n") readTweet=DBTweetReader("tweetsdb.db", "htclimatechange") print "Total number of tweets: " + str(readTweet.getNumberOfTweets("htclimatechange")) myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets("htclimatechange"))+"\n") udict=readTweet.getUserDict("htclimatechange") print "Total number of users: " + str(len(udict.keys())) myfile.write("Total number of users: " + str(len(udict.keys()))+"\n") for item in udict.items(): if item[1]<3: del udict[item[0]] print "Total number of users with 3 tweets or more: " + str(len(udict.keys())) myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n") #modify this script # with open("gettweetsClimateChange.py", "r") as myfile: # mytext=myfile.read() # st=mytext.index("myTweets=TweetGetter")+22 # end=mytext.index('")', st)
def dbplotffnetwork(): graph = nx.DiGraph() #Plot following/follower network #restrict to those with 3 tweets or more mintime = 1358090418 maxtime = 1363963163 mygetter = DBTweetGetter(None, None) con = lite.connect("tweetsdb.db") cur = con.cursor() ucon = lite.connect("userdb.db") ucur = ucon.cursor() tusers = [] users = [] cur.execute( "SELECT ScreenName FROM htglobalwarming WHERE ConvertedTime > " + str(mintime) + " AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE") temp = cur.fetchall() for item in temp: tusers.append(item[0].lower()) for item in tusers: if not (item in users): if tusers.count(item) > 7: users.append(item) print len(users) # cur.execute("SELECT ScreenName FROM htclimatechange WHERE ConvertedTime > "+str(mintime)+" AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE") # temp=cur.fetchall() # tusers=[] # for item in temp: # tusers.append(item[0].lower()) # for item in tusers: # if not (item in users): # if tusers.count(item)>29: # users.append(item) # print len(users) # cur.execute("SELECT ScreenName FROM htagw WHERE ConvertedTime > "+str(mintime) + " COLLATE NOCASE") # temp=cur.fetchall() # tusers=[] # for item in temp: # tusers.append(item[0].lower()) # for item in tusers: # if not (item in users): # if tusers.count(item)>2: # users.append(item) # print len(users) #aim for 380 #sys.exit("Hammertime") i = 0 try: users.remove("undercoverzen") users.remove("jivelad") users.remove("anabananazavala") #TODO Formalise this except: pass for user in users: print "User " + str(i) + "/" + str(len(users)) i += 1 #For each user check which other users are in friends, followers ucur.execute("SELECT FriendId FROM friends WHERE ScreenName='" + user.lower() + "' COLLATE NOCASE") frl = [] temp = ucur.fetchall() skip = False if len(temp) == 0: #get friends print "Downloading friends for " + user.lower() friendslist = mygetter.getFriends(user.lower(), [], -1) if friendslist != "FAIL": for friend in friendslist: ucur.execute("INSERT INTO friends VALUES('" + user.lower() + "'," + str(friend) + ")") frl = friendslist else: skip = True try: users.remove(user.lower()) except: pass sleep(10) else: for item in temp: frl.append(item[0]) ucur.execute("SELECT FollowerId FROM followers WHERE ScreenName='" + user + "' COLLATE NOCASE") fol = [] temp = ucur.fetchall() skip = False if len(temp) == 0: #get friends print "Downloading followers for " + user.lower() followerslist = mygetter.getFollowers(user.lower(), [], -1) if followerslist != "FAIL": for follower in followerslist: ucur.execute("INSERT INTO followers VALUES('" + user.lower() + "'," + str(follower) + ")") fol = followerslist else: skip = True try: users.remove(user.lower()) except: pass sleep(10) else: for item in temp: fol.append(item[0]) ucon.commit() if skip == False: graph.add_node(user.lower()) for other in users: skip2 = False ucur.execute("SELECT UserId FROM usermap WHERE ScreenName='" + other.lower() + "' COLLATE NOCASE") temp = ucur.fetchall() if len(temp) == 0: #get ID from web print "Downloading userid for " + other.lower() x = mygetter.getIDfromUser(other.lower()) if x != "FAIL": ucur.execute("INSERT INTO usermap VALUES('" + other.lower() + "'," + x + ")") sid = x ucon.commit() else: try: users.remove(other.lower()) except: pass skip2 = True sleep(10) else: sid = temp[0][0] if skip2 == False: if sid in fol: graph.add_edge(other.lower(), user.lower()) if sid in frl: graph.add_edge(user.lower(), other.lower()) print "Built graph" nx.write_gml(graph, "newfriendfollowerhtccgt29.gml") ucon.commit() con.close() ucon.close() print "Wrote graph"
from classtweetgetter import DBTweetGetter from classtweetreader import DBTweetReader import datetime tablename="htclimaterealists" queryname='#climaterealists' fname="#climaterealists" myTweets=DBTweetGetter("tweetsdb.db", tablename) myTweets.query(queryname,3160000, False) #Write log file with open("dblog"+fname+".txt", "a") as myfile: now = datetime.datetime.now() timestr=now.strftime("%d_%m_%H%M") myfile.write(timestr+"\n") readTweet=DBTweetReader("tweetsdb.db", tablename) print "Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename)) myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))+"\n") udict=readTweet.getUserDict(tablename) print "Total number of users: " + str(len(udict.keys())) myfile.write("Total number of users: " + str(len(udict.keys()))+"\n") for item in udict.items(): if item[1]<3: del udict[item[0]] print "Total number of users with 3 tweets or more: " + str(len(udict.keys())) myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n") #modify this script # with open("gettweetsClimateChange.py", "r") as myfile: # mytext=myfile.read()
s = s.replace(r"'", r"\'") s = s.replace(r"\'", r"''") s = "'" + s + "'" return s tcon = lite.connect("tweetsdb.db") tcur = tcon.cursor() mintime = 1358090418 con = lite.connect("diffusersetcdb.db") cur = con.cursor() cur.execute("CREATE TABLE tweets(ScreenName TEXT, Tweet TEXT)") cur.execute("CREATE TABLE descriptions(ScreenName TEXT, Description TEXT)") mytweetgetter = DBTweetGetter(None, None) userlist = [ 'bottonT', 'bethanyjayne_o9', 'james12h', 'marclanders', 'ss0alexander', 'jholuvu', 'lucid_serenity', 'aashishmusic', 'lachecard', 'ombuscool', 'nurdan_dirik', 'orlandovips', 'cochran_sarah', 'erdemturgan', 'wowpitbull', 'sonymusicmexico', 'juan20da', 'mountainspop', 'ldesherl', 'hayleysellick', 'viewpointmelb', 'peped6', '0hastronaut', 'kateescorey', 'tiamauli', 'ridwaancn', 'obsessionwill', '7daniel_ronaldo', 'gatewaypundit', 'keylacabanillas', 's_cebi_o', 'tarndeep_virdi', 'pauldoogood', 'aem4444', 'larindaguedes', 'mbleez', 'june_stoyer', 'eifever', 'anshumminhas', 'joseaparicio90', 'just2opine', 'motiffmusic', 'juanmaganmusic', 'drsarahsviews', 'annedinning', 'omilynn', 'c_harris82', 'kencaldeira', 'sensato', 'acminaj95', 'weez100_', 'yungrugga_', 'climate_sceptic', 'serega_markov', 'pitbull', 'ladyseastar', 'robcarrollmusic', 'pecaito1', 'fanspitbull', 'yoloswag_1d', 'juufaria',
from classtweetgetter import DBTweetGetter from classtweetreader import DBTweetReader import datetime myTweets = DBTweetGetter("tweetsdb.db", "htclimatechange") myTweets.query('#climatechange', 160000, False) #Write log file with open("dblog#climatechange.txt", "a") as myfile: now = datetime.datetime.now() timestr = now.strftime("%d_%m_%H%M") myfile.write(timestr + "\n") readTweet = DBTweetReader("tweetsdb.db", "htclimatechange") print "Total number of tweets: " + str( readTweet.getNumberOfTweets("htclimatechange")) myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets("htclimatechange")) + "\n") udict = readTweet.getUserDict("htclimatechange") print "Total number of users: " + str(len(udict.keys())) myfile.write("Total number of users: " + str(len(udict.keys())) + "\n") for item in udict.items(): if item[1] < 3: del udict[item[0]] print "Total number of users with 3 tweets or more: " + str( len(udict.keys())) myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys())) + "\n") #modify this script # with open("gettweetsClimateChange.py", "r") as myfile: # mytext=myfile.read()
from classtweetgetter import DBTweetGetter #from classtweetreader import DBTweetReader import datetime tags=["IPCC","UNFCCC","AR5","WGII","WGIII","LTFchat","Pages2k","Pages","HadCRUT","GISS"] for name in tags: myTweets=DBTweetGetter("IPCCdb.db", name) myTweets.query2("#"+name,1600000, False) #Write log file # with open("log"+name+".txt", "a") as myfile: # now = datetime.datetime.now() # timestr=now.strftime("%d_%m_%H%M") # myfile.write(timestr+"\n") # readTweet=DBTweetReader("IPCCdb.db", tablename) # print "Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename)) # myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))+"\n") # udict=readTweet.getUserDict(tablename) # print "Total number of users: " + str(len(udict.keys())) # myfile.write("Total number of users: " + str(len(udict.keys()))+"\n") # for item in udict.items(): # if item[1]<3: # del udict[item[0]] # print "Total number of users with 3 tweets or more: " + str(len(udict.keys())) # myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n"
def dbplotffnetwork(): graph=nx.DiGraph() #Plot following/follower network #restrict to those with 3 tweets or more mintime=1358090418 maxtime=1363963163 mygetter=DBTweetGetter(None,None) con = lite.connect("tweetsdb.db") cur=con.cursor() ucon = lite.connect("userdb.db") ucur=ucon.cursor() tusers=[] users=[] cur.execute("SELECT ScreenName FROM htglobalwarming WHERE ConvertedTime > "+str(mintime) +" AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE") temp=cur.fetchall() for item in temp: tusers.append(item[0].lower()) for item in tusers: if not (item in users): if tusers.count(item)>7: users.append(item) print len(users) # cur.execute("SELECT ScreenName FROM htclimatechange WHERE ConvertedTime > "+str(mintime)+" AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE") # temp=cur.fetchall() # tusers=[] # for item in temp: # tusers.append(item[0].lower()) # for item in tusers: # if not (item in users): # if tusers.count(item)>29: # users.append(item) # print len(users) # cur.execute("SELECT ScreenName FROM htagw WHERE ConvertedTime > "+str(mintime) + " COLLATE NOCASE") # temp=cur.fetchall() # tusers=[] # for item in temp: # tusers.append(item[0].lower()) # for item in tusers: # if not (item in users): # if tusers.count(item)>2: # users.append(item) # print len(users) #aim for 380 #sys.exit("Hammertime") i=0 try: users.remove("undercoverzen") users.remove("jivelad") users.remove("anabananazavala") #TODO Formalise this except: pass for user in users: print "User " + str(i)+"/"+str(len(users)) i+=1 #For each user check which other users are in friends, followers ucur.execute("SELECT FriendId FROM friends WHERE ScreenName='"+user.lower()+"' COLLATE NOCASE") frl=[] temp=ucur.fetchall() skip=False if len(temp)==0: #get friends print "Downloading friends for " + user.lower() friendslist=mygetter.getFriends(user.lower(), [], -1) if friendslist!="FAIL": for friend in friendslist: ucur.execute("INSERT INTO friends VALUES('" + user.lower() + "'," + str(friend) + ")" ) frl=friendslist else: skip=True try: users.remove(user.lower()) except: pass sleep(10) else: for item in temp: frl.append(item[0]) ucur.execute("SELECT FollowerId FROM followers WHERE ScreenName='"+user+"' COLLATE NOCASE") fol=[] temp=ucur.fetchall() skip=False if len(temp)==0: #get friends print "Downloading followers for " + user.lower() followerslist=mygetter.getFollowers(user.lower(), [], -1) if followerslist!="FAIL": for follower in followerslist: ucur.execute("INSERT INTO followers VALUES('" + user.lower() + "'," + str(follower) + ")" ) fol=followerslist else: skip=True try: users.remove(user.lower()) except: pass sleep(10) else: for item in temp: fol.append(item[0]) ucon.commit() if skip==False: graph.add_node(user.lower()) for other in users: skip2=False ucur.execute("SELECT UserId FROM usermap WHERE ScreenName='"+other.lower()+"' COLLATE NOCASE") temp=ucur.fetchall() if len(temp)==0: #get ID from web print "Downloading userid for " + other.lower() x=mygetter.getIDfromUser(other.lower()) if x!="FAIL": ucur.execute("INSERT INTO usermap VALUES('" +other.lower()+ "'," + x + ")" ) sid=x ucon.commit() else: try: users.remove(other.lower()) except: pass skip2=True sleep(10) else: sid=temp[0][0] if skip2==False: if sid in fol: graph.add_edge(other.lower(), user.lower()) if sid in frl: graph.add_edge(user.lower(), other.lower()) print "Built graph" nx.write_gml(graph, "newfriendfollowerhtccgt29.gml") ucon.commit() con.close() ucon.close() print "Wrote graph"
import sqlite3 as lite import sys from classtweetgetter import DBTweetGetter mytweetgetter=DBTweetGetter(None, None) con=lite.connect("tweetsdb.db") cur=con.cursor() ucon=lite.connect("userdb.db") ucur=ucon.cursor() tables=['htclimatechange','htclimate','htglobalwarming','ClimateChange','GlobalWarming'] names=[] for item in tables: cur.execute("SELECT DISTINCT ScreenName FROM " + item) a=cur.fetchall() for item2 in a: names.append(item2[0]) already=[] ucur.execute("SELECT ScreenName FROM usermap") b=ucur.fetchall() for item in b: already.append(item[0]) deleted=[] i=0 l=len(names)