def dbconversation(tablename):
    #creates edges from OP to mentioned
    graph = nx.DiGraph()
    #Plot following/follower network
    #restrict to those with 3 tweets or more
    mygetter = DBTweetGetter(None, None)
    con = lite.connect("tweetsdb.db")
    cur = con.cursor()
    mintime = 1358090418
    cur.execute("SELECT DISTINCT ScreenName FROM " + tablename +
                " WHERE ConvertedTime>" + str(mintime) + " AND IsRetweet=0")
    l = cur.fetchall()

    users = []
    for item in l:
        users.append(item[0].lower())
    cur.execute("SELECT Tweet, ScreenName FROM " + tablename +
                " WHERE ConvertedTime>" + str(mintime) + " AND IsRetweet=0")
    d = cur.fetchall()
    lz = len(d)
    z = 1
    for item in d:
        #print "Tweet " +str(z)+"/"+str(lz)
        z += 1
        if ("@" in item[0].lower()) and ("rt:" not in item[0].lower()):
            #Continue until character not in valid set, then check if is user in set
            #First count number of @s
            names = []
            c = item[0].lower().count("@")
            start = 0
            for i in range(c):
                s = item[0].lower().index("@", start)
                start = s + 1
                k = 0
                try:
                    j = item[0][start]
                except:
                    print item[0]
                while j in valid_characters:
                    k += 1
                    try:
                        j = item[0][start + k]
                    except:
                        j = "/"
                names.append(item[0][start:start + k].lower())

            for name in names:
                if name.lower() in users and name.lower() != item[1].lower():
                    try:
                        graph[item[1].lower()][name.lower()]['weight'] += 1
                    except:
                        graph.add_edge(item[1].lower(), name.lower(), weight=1)

                    #graph.add_edge(item[1].lower(), name.lower())

    print "Built graph"
    nx.write_gml(graph, "newconv" + tablename + "nortdir.gml")
    print "Wrote graph"
Beispiel #2
0
from classtweetgetter import DBTweetGetter
#from classtweetreader import DBTweetReader

import datetime
tags = [
    "IPCC", "UNFCCC", "AR5", "WGII", "WGIII", "LTFchat", "Pages2k", "Pages",
    "HadCRUT", "GISS"
]

for name in tags:
    myTweets = DBTweetGetter("IPCCdb.db", name)
    myTweets.query2("#" + name, 1600000, False)
    #Write log file
    # with open("log"+name+".txt", "a") as myfile:
    #     now = datetime.datetime.now()
    #     timestr=now.strftime("%d_%m_%H%M")
    #     myfile.write(timestr+"\n")
    #     readTweet=DBTweetReader("IPCCdb.db", tablename)
    #     print "Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))
    #     myfile.write("Total number of tweets: " + str(readTweet.getNumberOfTweets(tablename))+"\n")
    #     udict=readTweet.getUserDict(tablename)
    #     print "Total number of users: " + str(len(udict.keys()))
    #     myfile.write("Total number of users: " + str(len(udict.keys()))+"\n")
    #     for item in udict.items():
    #         if item[1]<3:
    #             del udict[item[0]]
    #     print "Total number of users with 3 tweets or more: " + str(len(udict.keys()))
    # myfile.write("Total number of users with 3 tweets or more: " + str(len(udict.keys()))+"\n"
import sqlite3 as lite
import sys
from classtweetgetter import DBTweetGetter

mytweetgetter = DBTweetGetter(None, None)

con = lite.connect("tweetsdb.db")
cur = con.cursor()
ucon = lite.connect("userdb.db")
ucur = ucon.cursor()

tables = [
    'htclimatechange', 'htclimate', 'htglobalwarming', 'ClimateChange',
    'GlobalWarming'
]

names = []
for item in tables:
    cur.execute("SELECT DISTINCT ScreenName FROM " + item)
    a = cur.fetchall()
    for item2 in a:
        names.append(item2[0])

already = []
ucur.execute("SELECT ScreenName FROM usermap")
b = ucur.fetchall()
for item in b:
    already.append(item[0])

deleted = []
i = 0
from classtweetgetter import DBTweetGetter
from classtweetreader import DBTweetReader

import datetime
tablename = "htclimate"
queryname = '#climate'
fname = "#climate"

myTweets = DBTweetGetter("tweetsdb.db", tablename)
myTweets.query(queryname, 160000, False)
#Write log file
with open("dblog" + fname + ".txt", "a") as myfile:
    now = datetime.datetime.now()
    timestr = now.strftime("%d_%m_%H%M")
    myfile.write(timestr + "\n")
    readTweet = DBTweetReader("tweetsdb.db", tablename)
    print "Total number of tweets: " + str(
        readTweet.getNumberOfTweets(tablename))
    myfile.write("Total number of tweets: " +
                 str(readTweet.getNumberOfTweets(tablename)) + "\n")
    udict = readTweet.getUserDict(tablename)
    print "Total number of users: " + str(len(udict.keys()))
    myfile.write("Total number of users: " + str(len(udict.keys())) + "\n")
    for item in udict.items():
        if item[1] < 3:
            del udict[item[0]]
    print "Total number of users with 3 tweets or more: " + str(
        len(udict.keys()))
    myfile.write("Total number of users with 3 tweets or more: " +
                 str(len(udict.keys())) + "\n")
import sqlite3 as lite
import sys
import numpy as np
from classtweetgetter import DBTweetGetter
from time import sleep


mygetter=DBTweetGetter(None,None)
con=lite.connect("tweetsdb.db")
cur=con.cursor()

ucon=lite.connect("userdb.db")
ucur=ucon.cursor()
crawlers=[]
chainlengths=[]
nsame=[]
noded={}
chainlfile=open("chaindata.txt","w")
nsamefile=open("nsamedata.txt","w")
class chainCrawler(object):
    #note this method will repeat chains if there is V structure, should be minimal effect
    def __init__(self, node, n):
        self.n=n
        self.node=node
        self.stopwalk=False

    def walk(self):
        while self.stopwalk==False:
            self.step()
        return 0
    def step(self):
def dbplotffnetwork():
    graph = nx.DiGraph()
    #Plot following/follower network
    #restrict to those with 3 tweets or more
    mintime = 1358090418
    maxtime = 1363963163
    mygetter = DBTweetGetter(None, None)
    con = lite.connect("tweetsdb.db")
    cur = con.cursor()
    ucon = lite.connect("userdb.db")
    ucur = ucon.cursor()
    tusers = []
    users = []

    cur.execute(
        "SELECT ScreenName FROM htglobalwarming WHERE ConvertedTime > " +
        str(mintime) + " AND ConvertedTime < " + str(maxtime) +
        " COLLATE NOCASE")
    temp = cur.fetchall()
    for item in temp:
        tusers.append(item[0].lower())
    for item in tusers:
        if not (item in users):
            if tusers.count(item) > 7:
                users.append(item)

    print len(users)

    # cur.execute("SELECT ScreenName FROM htclimatechange WHERE ConvertedTime > "+str(mintime)+" AND ConvertedTime < " + str(maxtime) + " COLLATE NOCASE")
    # temp=cur.fetchall()
    # tusers=[]
    # for item in temp:
    #     tusers.append(item[0].lower())
    # for item in tusers:
    #     if not (item in users):
    #         if tusers.count(item)>29:
    #             users.append(item)

    # print len(users)

    # cur.execute("SELECT ScreenName FROM htagw WHERE ConvertedTime > "+str(mintime) + " COLLATE NOCASE")
    # temp=cur.fetchall()
    # tusers=[]
    # for item in temp:
    #     tusers.append(item[0].lower())
    # for item in tusers:
    #     if not (item in users):
    #         if tusers.count(item)>2:
    #             users.append(item)

    # print len(users)

    #aim for 380
    #sys.exit("Hammertime")
    i = 0
    try:
        users.remove("undercoverzen")
        users.remove("jivelad")
        users.remove("anabananazavala")
        #TODO Formalise this
    except:
        pass
    for user in users:
        print "User " + str(i) + "/" + str(len(users))
        i += 1
        #For each user check which other users are in friends, followers
        ucur.execute("SELECT FriendId FROM friends WHERE ScreenName='" +
                     user.lower() + "' COLLATE NOCASE")
        frl = []
        temp = ucur.fetchall()
        skip = False
        if len(temp) == 0:
            #get friends
            print "Downloading friends for " + user.lower()
            friendslist = mygetter.getFriends(user.lower(), [], -1)
            if friendslist != "FAIL":
                for friend in friendslist:
                    ucur.execute("INSERT INTO friends VALUES('" +
                                 user.lower() + "'," + str(friend) + ")")
                frl = friendslist
            else:
                skip = True
                try:
                    users.remove(user.lower())
                except:
                    pass
            sleep(10)

        else:
            for item in temp:
                frl.append(item[0])

        ucur.execute("SELECT FollowerId FROM followers WHERE ScreenName='" +
                     user + "' COLLATE NOCASE")
        fol = []
        temp = ucur.fetchall()
        skip = False
        if len(temp) == 0:
            #get friends
            print "Downloading followers for " + user.lower()
            followerslist = mygetter.getFollowers(user.lower(), [], -1)
            if followerslist != "FAIL":
                for follower in followerslist:
                    ucur.execute("INSERT INTO followers VALUES('" +
                                 user.lower() + "'," + str(follower) + ")")
                fol = followerslist
            else:
                skip = True
                try:
                    users.remove(user.lower())
                except:
                    pass
            sleep(10)

        else:
            for item in temp:
                fol.append(item[0])

        ucon.commit()
        if skip == False:
            graph.add_node(user.lower())
            for other in users:
                skip2 = False
                ucur.execute("SELECT UserId FROM usermap WHERE ScreenName='" +
                             other.lower() + "' COLLATE NOCASE")
                temp = ucur.fetchall()
                if len(temp) == 0:
                    #get ID from web
                    print "Downloading userid for " + other.lower()
                    x = mygetter.getIDfromUser(other.lower())
                    if x != "FAIL":
                        ucur.execute("INSERT INTO usermap VALUES('" +
                                     other.lower() + "'," + x + ")")
                        sid = x
                        ucon.commit()
                    else:
                        try:
                            users.remove(other.lower())
                        except:
                            pass
                        skip2 = True
                    sleep(10)
                else:
                    sid = temp[0][0]
                if skip2 == False:
                    if sid in fol:
                        graph.add_edge(other.lower(), user.lower())
                    if sid in frl:
                        graph.add_edge(user.lower(), other.lower())

    print "Built graph"
    nx.write_gml(graph, "newfriendfollowerhtccgt29.gml")
    ucon.commit()
    con.close()
    ucon.close()
    print "Wrote graph"
from classtweetgetter import DBTweetGetter
from classtweetreader import DBTweetReader

import datetime

myTweets = DBTweetGetter("tweetsdb.db", "htclimatechange")
myTweets.query('#climatechange', 160000, False)
#Write log file
with open("dblog#climatechange.txt", "a") as myfile:
    now = datetime.datetime.now()
    timestr = now.strftime("%d_%m_%H%M")
    myfile.write(timestr + "\n")
    readTweet = DBTweetReader("tweetsdb.db", "htclimatechange")
    print "Total number of tweets: " + str(
        readTweet.getNumberOfTweets("htclimatechange"))
    myfile.write("Total number of tweets: " +
                 str(readTweet.getNumberOfTweets("htclimatechange")) + "\n")
    udict = readTweet.getUserDict("htclimatechange")
    print "Total number of users: " + str(len(udict.keys()))
    myfile.write("Total number of users: " + str(len(udict.keys())) + "\n")
    for item in udict.items():
        if item[1] < 3:
            del udict[item[0]]
    print "Total number of users with 3 tweets or more: " + str(
        len(udict.keys()))
    myfile.write("Total number of users with 3 tweets or more: " +
                 str(len(udict.keys())) + "\n")

#modify this script
# with open("gettweetsClimateChange.py", "r") as myfile:
#     mytext=myfile.read()