Example #1
0
def sen_run_over_db(cursor):
	users=db_get_all_users(cursor)
	
	tweets=[]
	cur_time=time.time()
	done=0
	for i in range(0,len(users)):
		u=users[i]
		print(u,i,len(users))

		if db_is_col(cursor,u,"sen")==False:
			db_add_col(cursor,u,"sen")
			print("adding")
	


		tweets=db_get_cols_from_table(cursor,u,["tweet_id","tweet","sen","date"])

		for t in tweets:
			delta=int((cur_time-int(t[3]))/60/60/24)
			#print(delta)
			if delta<365*2:
				if t[2]=="None":
					st=t[1].replace("\\\\","")

					s=sentiment(st)
					print(".", end='', flush=True)
					#print(t[1],s,"new")
					db_update_record(cursor,u,"tweet_id",t[0],"sen",str(s))
					#db_commit()
			#else:
			#	print(t[2])
		print("")
		db_commit()
Example #2
0
def retweet_anal_to_flat_files(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []

    if os.path.isdir("retweet_anal") == False:
        os.mkdir("retweet_anal")

    for i in range(0, len(users)):
        f = open(os.path.join("retweet_anal", users[i]), 'w')
        f.close()

    for i in range(0, len(users)):
        u = users[i]
        print(u)
        cur_time = time.time()
        tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"])
        rt = 0
        origonal = 0
        for ii in range(0, len(tweets)):
            t = tweets[ii][1]
            d = int(tweets[ii][0])

            if t.startswith("RT @"):
                user = t.split(":")[0][4:]
                if user in users:
                    short = t
                    if len(short) > 100:
                        short = short[:100]
                    f = open(os.path.join("retweet_anal", user), 'a')
                    f.write(str(d) + ":" + short + "\n")
                    f.close()
Example #3
0
def short_to_long_over_db(cursor):
    max_days = 365

    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []

    for i in range(0, len(users)):
        u = users[i]
        print(u)
        if db_is_col(cursor, u, "url") == False:
            db_add_col(cursor, u, "url", length="1000")
            print("adding url")

        cur_time = time.time()
        tweets = db_get_cols_from_table(cursor, u,
                                        ["date", "tweet", "url", "tweet_id"])
        http = 0
        urls = []
        for ii in range(0, len(tweets)):
            date = int(tweets[ii][0])
            t = tweets[ii][1]
            url = tweets[ii][2]
            id = tweets[ii][3]
            if url == "None":
                if ((cur_time - date) / 60 / 60 / 24) < max_days:
                    ret = re.search("(?P<url>https?://[^\s]+)", t)
                    if ret != None:
                        ret = ret.group("url")
                        if ret.count("\\u2026") == 0:
                            urls.append([ret, id])

            #print(urls)
            if len(urls) > 40 or (len(urls) > 0 and ii == len(tweets) - 1):
                #print("fetch",urls)
                urls_out = fetch_parallel(urls)
                #print("fetch..")

                for iii in range(0, len(urls_out)):
                    if urls_out[iii][0] != "error":
                        if urls_out[iii][0].startswith('https://'):
                            urls_out[iii][0] = urls_out[iii][0][8:]
                        if urls_out[iii][0].startswith('http://'):
                            urls_out[iii][0] = urls_out[iii][0][7:]

                        if len(urls_out[iii][0]) > 1000:
                            urls_out[iii][0] = urls_out[iii][0][:1000]

                        #print(urls[iii][0])
                        #print(urls_out[iii][0])
                        #print()
                    #print(urls_out[iii])
                    db_update_record(cursor, u, "tweet_id", urls_out[iii][1],
                                     "url", urls_out[iii][0])
                    db_commit()

                urls = []
                ids = []
def time_on_twitter(cursor):
	users=db_get_all_users(cursor)

	tweets=[]
	done=0
	v=[]
	rt=0
	origonal=0

	for i in range(0,len(users)):
		u=users[i]
		print(u)	
		cur_time=time.time()
		tweets=db_get_cols_from_table(cursor,u,["date","tweet"])

		for ii in range(0,len(tweets)):
			t=tweets[ii][1]
			delta=(cur_time-int(tweets[ii][0]))/60/60/24
			if delta<365:
				if t.startswith("RT "):
					rt=rt+1
				else:
					origonal=origonal+1

		print(origonal,rt,len(users),(origonal*30.0+rt*10.0)/60/60)
Example #5
0
def at_usage_graph(cursor):
    print("making @ graph")
    words_delete_all()
    users = db_get_all_users(cursor)
    tweets = []
    for u in users:
        tweets = db_get_tweets_in_last_time(cursor, u)
        for i in range(0, len(tweets)):
            word_add_array_at(tweets[i])

    word_clean()
    names, values = words_ret_hist()
    names = names[:20]
    values = values[:20]
    names.reverse()
    values.reverse()
    #word_print()

    y_pos = np.arange(len(names))

    plt.figure()  #,dpi=300 figsize=(25.0, 16.0)
    bars = plt.barh(y_pos, values, align='center')

    plt.yticks(y_pos, names)
    plt.xlabel('Usage')
    plt.xticks(rotation='vertical')
    plt.savefig('/var/www/html/graphs/at_usage.png', bbox_inches='tight')
Example #6
0
def do_retweet_anal():
    cursor = db_get_mariadb_cursor()
    hist = [0] * 24
    users = db_get_all_users(cursor)
    data = []
    for u in users:
        print(u)
        lines = open(os.path.join("retweet_anal", u)).read().splitlines()
        date = []
        tweets = []
        for l in lines:
            s = l.split(":")
            date.append(int(s[0]))
            tweets.append(s[2][1:])

        if len(tweets) > 0:
            tweets, date = (list(t) for t in zip(*sorted(zip(tweets, date))))
            cur_t = ""
            for i in range(0, len(tweets)):
                if cur_t != tweets[i]:
                    cur_t = tweets[i]
                    start_time = get_tweet_time(cursor, u, tweets[i])
                    #print(tweets[i],start_time)

                delta = int((date[i] - start_time) / 60 / 60)
                data.append(delta)
        print(data)

    f = open("retweets.txt", 'w')
    for i in range(0, len(data)):
        f.write(str(data[i]) + "\n")
    f.close()
Example #7
0
    def __init__(self):
        super(main, self).__init__()
        self.setMinimumSize(800, 400)
        self.show()
        self.tweet_pos = 0

        self.all_user_tweets = []

        self.users = db_get_all_users(cursor)

        vbox = QVBoxLayout()

        self.status = QLabel("hello")

        hbox = QHBoxLayout()
        self.back = QPushButton("Back")
        self.back.clicked.connect(self.callback_back)
        self.next = QPushButton("Next")
        self.next.clicked.connect(self.callback_next)
        self.save = QPushButton("Save")
        self.save.clicked.connect(self.callback_save)

        self.prog = QProgressBar()

        hbox.addWidget(self.back)
        hbox.addWidget(self.next)
        hbox.addWidget(self.save)

        self.cb = QComboBox()
        self.cb.setStyleSheet("font: 24pt ;")

        self.cb_users = QComboBox()

        for file in class_get_files():
            self.cb.addItem(file)

        for u in self.users:
            self.cb_users.addItem(u)

        self.cb_users.currentIndexChanged.connect(self.callback_user_changed)

        h_button_widget = QWidget()
        h_button_widget.setLayout(hbox)

        self.display = QTextEdit()
        self.display.setStyleSheet("font: 24pt ;")
        self.display.cursorPositionChanged.connect(self.callback_cursor_move)

        vbox.addWidget(self.cb_users)
        vbox.addWidget(self.cb)
        vbox.addWidget(self.display)
        vbox.addWidget(h_button_widget)
        vbox.addWidget(self.status)
        vbox.addWidget(self.prog)

        wvbox = QWidget()
        wvbox.setLayout(vbox)

        self.setCentralWidget(wvbox)
def time_domain(cursor, words=[]):
    print("Time domain", words)
    users = db_get_all_users(cursor)

    store = [0] * days
    store_float = [0.0] * days
    past = []
    for i in range(0, len(store)):
        past.append(i)

    for user in users:
        user_make_hist(cursor, store, user, words=words)

    #print(store)

    ####################

    date1 = epoch_end
    date2 = time.time()

    # every monday
    mondays = WeekdayLocator(MONDAY)

    # every 3rd month
    months = MonthLocator(range(1, 13), bymonthday=1, interval=3)
    monthsFmt = DateFormatter("%b '%y")

    ####################
    title = ",".join(words)
    plt.figure(figsize=(7.5, 4.0))
    plt.title("Tweets containing:" + title, fontsize=20)
    #plt.gcf().subplots_adjust(bottom=0.15)

    r = float(hash(title + "r") % 256) / 256
    g = float(hash(title + "g") % 256) / 256
    b = float(hash(title + "b") % 256) / 256
    colors = (r, g, b)

    plt.bar(past, store, color=colors, edgecolor="none")
    plt.ylabel('Tweets/day', fontsize=20)
    plt.xlabel('Time (days in past)', fontsize=25)

    #ax = plt.axes()

    #ax.xaxis.set_major_locator(months)
    #ax.xaxis.set_major_formatter(monthsFmt)
    #ax.xaxis.set_minor_locator(mondays)
    #ax.autoscale_view()

    #fig = plt.figure(1)
    #fig.autofmt_xdate()
    save_file = "/var/www/html/graphs/time_domain.png"
    plt.savefig(save_file, bbox_inches='tight')

    tweet_image(save_file,
                title="The number of times MPs used the words '" + title +
                "' during the last year.")
Example #9
0
def clas_stats(cursor):
    users = db_get_all_users(cursor)
    for u in users:
        print(u)
        words_delete_all()
        c = db_get_cols_from_table(cursor, u, ["clas"])
        for w in c:
            #print(w)
            word_add(w[0])

        words = words_ret_hist()
        w = ""
        for i in range(0, len(words[0])):
            w = w + words[0][i] + "=" + str(words[1][i]) + ";"
        w = w[:-1]
        db_update_record(cursor, "user_names", "user_id", u, "clas", w)
        db_commit()
        print(w)
    adas
def hashtag_get_most_used(cursor,delta=172800/2):
	words_delete_all()
	users=db_get_all_users(cursor)
	tweets=[]
	for u in users:
		tweets=db_get_tweets_in_last_time(cursor,u,delta=delta)
		for i in range(0,len(tweets)):
			word_add_array_hashtag(tweets[i])

	word_clean()
	names,values=words_ret_hist()

	file = open("word_usage.txt","w") 

	for i in range(0,len(names)):
		file.write(names[i]+"\n") 

	file.close()

	return names,values
Example #11
0
def re_tweet(cursor, delta=172800 / 2):
    words_delete_all()

    users = db_get_all_users(cursor)
    tweets = []
    for u in users:
        #print(u)

        tweets = db_get_tweets_in_last_time(cursor, u, delta=1e10)
        tot = len(tweets)
        for i in range(0, len(tweets)):
            if tweets[i].startswith("RT @"):
                user = tweets[i].split(":")[0][3:]
                word_add_array_at(user)

        #word_clean()

        print(u)

    names, values = words_ret_hist(max_len=400)
    f = open('/var/www/html/stats/retweets.txt', 'w')
    print(names)
    for i in range(0, len(names)):
        ismp = users.count(names[i][1:])
        party = db_user_get_job1(cursor, names[i][1:])
        if party == None:
            party = "notmp"
        #print (names[i][1:],party)
        out = names[i] + " " + str(values[i]) + " " + str(ismp) + " " + party
        f.write(out + "\n")

    #print(names,values)

    f.close()

    aasdsad
def cal_retweets(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []
    update = False
    if update == True:
        for i in range(0, len(users)):
            u = users[i]
            print(u)
            cur_time = time.time()
            tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"])
            rt = 0
            origonal = 0
            for ii in range(0, len(tweets)):
                t = tweets[ii][1]
                delta = (cur_time - int(tweets[ii][0])) / 60 / 60 / 24
                if delta < 100.0:
                    if t.startswith("RT "):
                        rt = rt + 1
                    else:
                        origonal = origonal + 1
            if rt + origonal != 0:
                frac = 100.0 * rt / (rt + origonal)
            else:
                frac = 0.0
            db_update_record(cursor, "user_names", "user_id", u, "retweets",
                             str(frac))

            db_commit()

    tweets_per_day = db_get_cols_from_table(cursor, "user_names",
                                            ["retweets", "job_type1"])

    con = []
    lab = []
    lib = []
    snp = []

    for i in range(0, len(tweets_per_day)):
        party = tweets_per_day[i][1]
        if party.startswith("con") == True:
            con.append(int(tweets_per_day[i][0]))

        if party.startswith("lab") == True:
            lab.append(int(tweets_per_day[i][0]))

        if party.startswith("lib") == True:
            lib.append(int(tweets_per_day[i][0]))

        if party.startswith("snp") == True:
            snp.append(int(tweets_per_day[i][0]))

        if int(tweets_per_day[i][0]) != 0:
            v.append(int(tweets_per_day[i][0]))

    m = 100
    dx = 1.0
    x = 0.0
    xbins = []
    while (x < m):
        xbins.append(x)
        x = x + dx

    con[0] = 0.0
    lab[0] = 0.0
    snp[0] = 0.0
    #lib[0]=0.0

    for_web = False
    if for_web == True:
        plt.figure(figsize=(25.0, 6.0), dpi=300)
        plt.title("Re-tweets from MPs as % over last 100 days", fontsize=30)
        plt.gcf().subplots_adjust(bottom=0.15)

        plt.hist(v, bins=xbins, alpha=0.5, color='green')
        plt.hist(con, bins=xbins, alpha=0.8, color='blue')
        plt.hist(lab, bins=xbins, alpha=0.8, color='red')
        plt.hist(snp, bins=xbins, alpha=0.8, color='purple')
        plt.hist(lib, bins=xbins, alpha=0.8, color='yellow')

        plt.legend(('All', 'Con', 'Lab', 'SNP', "Lib"), fontsize=25)

        plt.ylabel('Number of MPs', fontsize=25)
        plt.xlabel('Percentage of tweets that are retweets', fontsize=25)

        plt.savefig("/var/www/html/graphs/retweets.png", bbox_inches='tight')
    else:

        matplotlib.rcParams['font.family'] = 'Open Sans'

        plt.figure(figsize=(6.0, 6.0), dpi=300)
        ax = plt.subplot(111)
        #plt.title("Re-tweets from MPs as % over last 100 days", fontsize=30)
        plt.gcf().subplots_adjust(bottom=0.15)

        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)

        plt.hist(v, bins=xbins, alpha=1.0, color='#36845b')
        #plt.hist(con, bins=xbins, alpha=0.8,color="#00539f")
        #plt.hist(lab, bins=xbins, alpha=0.8,color="#d50000")
        #plt.hist(snp, bins=xbins, alpha=0.8,color="#fff685")
        #plt.hist(lib, bins=xbins, alpha=0.8,color='yellow')

        #plt.legend( ( 'Con', 'Lab','SNP'), fontsize=25)

        plt.ylabel('Number of MPs', fontsize=25)
        plt.xlabel('Percentage retweets', fontsize=25)

        plt.savefig("retweet_dist.png", bbox_inches='tight')
Example #13
0
def clas(cursor, delta=172800 / 2):
    users = db_get_all_users(cursor)

    tweets = []
    #users=["JoJohnsonUK"]
    done = 0
    for i in range(271, 562):
        u = users[i]
        print(u, i, len(users))
        #ada
        if db_is_col(cursor, u, "clas") == False:
            db_add_col(cursor, u, "clas")
            print("adding")

        tweets = db_get_cols_from_table(cursor, u, ["tweet_id", "tweet"])

        for t in tweets:
            clas, tot, res = clas_clas_text(t[1])
            db_update_record(cursor, u, "tweet_id", t[0], "clas", clas)
            #print(clas)
            #print(t)

        db_commit()

        #adas
    types = None
    if tot != 0:
        m = len(tweets)
        if m > 500:
            m = 500

        for i in range(0, m):

            #print(tweets[i])
            out = clas_clas_text(tweets[i])
            if types == None:
                types = res
            else:
                #print(types)
                for ii in range(0, len(res)):
                    types[ii][1] = types[ii][1] + res[ii][1]
            if out != "unknown":
                clas = clas + 1

        path = "/var/www/html/interests/" + u + ".txt"
        #print(types)

        #asadasds

        if clas != 0:
            types = sorted(types, key=itemgetter(1), reverse=True)

            sum_ids = 0
            for i in range(0, len(types)):
                sum_ids = sum_ids + types[i][1]

            clas_perent = 100.0 * (clas / tot)
            print(u, types, clas_perent)
            f = open(path, 'w')
            for i in range(0, len(types)):
                f.write(types[i][0] + " " +
                        str(int((types[i][1] / sum_ids) * 100.0)) + "\n")

            #f.write("clas "+str(int(clas_perent))+"\n")

            f.close()
Example #14
0
def cal_tweets_per_day(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []
    update = False
    if update == True:
        for i in range(0, len(users)):
            u = users[i]
            print(u)
            cur_time = time.time()
            tweets = db_get_cols_from_table(cursor, u, ["date"])
            count = 0
            for ii in range(0, len(tweets)):
                delta = (cur_time - int(tweets[ii][0])) / 60 / 60 / 24
                if delta < 100.0:
                    count = count + 1

            db_update_record(cursor, "user_names", "user_id", u,
                             "tweets_per_day", str(count / 100.0))

            #v.append(count)

            db_commit()

    tweets_per_day = db_get_cols_from_table(cursor, "user_names",
                                            ["tweets_per_day", "job_type1"])

    con = []
    lab = []
    lib = []
    snp = []

    for i in range(0, len(tweets_per_day)):
        party = tweets_per_day[i][1]
        if party.startswith("con") == True:
            con.append(int(tweets_per_day[i][0]))

        if party.startswith("lab") == True:
            lab.append(int(tweets_per_day[i][0]))

        if party.startswith("lib") == True:
            lib.append(int(tweets_per_day[i][0]))

        if party.startswith("snp") == True:
            snp.append(int(tweets_per_day[i][0]))

        v.append(int(tweets_per_day[i][0]))

    m = 60
    dx = 1.0
    x = 0.0
    xbins = []
    while (x < m):
        xbins.append(x)
        x = x + dx

    for_web = False
    if for_web == True:
        plt.figure(figsize=(25.0, 6.0), dpi=300)
        plt.title("Tweets from MPs per day", fontsize=30)
        plt.gcf().subplots_adjust(bottom=0.15)

        plt.hist(v, bins=xbins, alpha=0.5, color='green')
        plt.hist(con, bins=xbins, alpha=0.8, color='blue')
        plt.hist(lab, bins=xbins, alpha=0.8, color='red')
        plt.hist(snp, bins=xbins, alpha=0.8, color='purple')
        plt.hist(lib, bins=xbins, alpha=0.8, color='yellow')

        plt.legend(('All', 'Con', 'Lab', 'SNP', "Lib"), fontsize=25)

        plt.ylabel('Number of MPs', fontsize=25)
        plt.xlabel('Number of tweets/day', fontsize=25)

        plt.savefig("/var/www/html/graphs/tweets_per_day.png",
                    bbox_inches='tight')
    else:

        matplotlib.rcParams['font.family'] = 'Open Sans'

        ###############All#############
        plt.clf()
        plt.figure(figsize=(6.0, 6.0), dpi=300)
        ax = plt.subplot(111)

        #plt.title("Tweets from MPs per day", fontsize=30)
        plt.gcf().subplots_adjust(bottom=0.15)

        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)

        ax.hist(v, bins=xbins, alpha=1.0, color='#36845b')

        plt.ylabel('Number of MPs', fontsize=20)
        plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all.png",
                    bbox_inches='tight')

        ###############Con#############

        ax = setup_graph()
        axes = plt.gca()
        axes.set_ylim([0, 40])
        plt.hist(con, bins=xbins, alpha=0.8, color="#00539f")

        #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20)

        #plt.ylabel('Number of MPs', fontsize=20)
        #plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all_con.png",
                    bbox_inches='tight')

        ###############Lab#############

        ax = setup_graph()
        axes = plt.gca()
        axes.set_ylim([0, 40])
        plt.hist(lab, bins=xbins, alpha=0.8, color="#d50000")

        #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20)

        #plt.ylabel('Number of MPs', fontsize=20)
        #plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all_lab.png",
                    bbox_inches='tight')

        ###############SNP#############

        ax = setup_graph()
        axes = plt.gca()
        axes.set_ylim([0, 40])
        plt.hist(snp, bins=xbins, alpha=0.8, color="#fff685")

        #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20)

        #plt.ylabel('Number of MPs', fontsize=20)
        #plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all_snp.png",
                    bbox_inches='tight')

        ###############Lib#############

        ax = setup_graph()

        plt.hist(lib, bins=xbins, alpha=0.8, color='#faa01a')

        #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20)

        #plt.ylabel('Number of MPs', fontsize=20)
        #plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all_lib.png",
                    bbox_inches='tight')
Example #15
0
def http_retweets(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []
    update = False
    if update == True:
        for i in range(0, len(users)):
            u = users[i]
            print(u)
            cur_time = time.time()
            tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"])
            http = 0
            for ii in range(0, len(tweets)):
                t = tweets[ii][1]
                delta = (cur_time - int(tweets[ii][0])) / 60 / 60 / 24
                if delta < 100.0:
                    if t.count("http"):
                        http = http + 1

            if http != 0:
                frac = 100.0 * http / (len(tweets))
            else:
                frac = 0.0

            db_update_record(cursor, "user_names", "user_id", u, "http_tweets",
                             str(frac))

            db_commit()

    tweets_per_day = db_get_cols_from_table(cursor, "user_names",
                                            ["http_tweets", "job_type1"])

    print(tweets_per_day)
    con = []
    lab = []
    lib = []
    snp = []

    for i in range(0, len(tweets_per_day)):
        party = tweets_per_day[i][1]
        if party.startswith("con") == True:
            con.append(int(tweets_per_day[i][0]))

        if party.startswith("lab") == True:
            lab.append(int(tweets_per_day[i][0]))

        if party.startswith("lib") == True:
            lib.append(int(tweets_per_day[i][0]))

        if party.startswith("snp") == True:
            snp.append(int(tweets_per_day[i][0]))

        v.append(int(tweets_per_day[i][0]))

    m = 100
    dx = 1.0
    x = 0.0
    xbins = []
    while (x < m):
        xbins.append(x)
        x = x + dx

    plt.figure(figsize=(25.0, 6.0), dpi=300)
    plt.title("Percentage of tweets which contain web links", fontsize=30)
    plt.gcf().subplots_adjust(bottom=0.15)

    plt.hist(v, bins=xbins, alpha=0.5, color='green')
    plt.hist(con, bins=xbins, alpha=0.8, color='blue')
    plt.hist(lab, bins=xbins, alpha=0.8, color='red')
    plt.hist(snp, bins=xbins, alpha=0.8, color='purple')
    plt.hist(lib, bins=xbins, alpha=0.8, color='yellow')

    plt.legend(('All', 'Con', 'Lab', 'SNP', "Lib"), fontsize=25)

    plt.ylabel('Number of MPs', fontsize=25)
    plt.xlabel('Percent', fontsize=25)

    plt.savefig("/var/www/html/graphs/http_tweets.png", bbox_inches='tight')
def hashtag_flow(cursor):
	print("making hashtag flow graph")
	path="/var/www/html/graphs/"
	thumbs="/var/www/html/thumbs"
	if os.path.isdir(thumbs)==False:
		os.mkdir(thumbs)

	os.chdir("/var/www/html/graphs/")
	for f in glob.glob("hashtag_flow*.png"):
		os.remove(f)

	file_number=0
	ago=0.0
	pngs=""
	plt.figure(figsize=(8.0, 4.0))
	#color = cm.inferno_r(np.linspace(.4,.8, 20))
	loop=0
	pop_hash_tags=""

	while(ago<48):
		print("ago=",ago)
		words_delete_all()
		users=db_get_all_users(cursor)
		tweets=[]
		for u in users:
			tweets=db_get_tweets_in_time_frame(cursor,u,width=4,time_ago=ago)
			for i in range(0,len(tweets)):
				word_add_array_hashtag(tweets[i])

		word_clean()
		names,values=words_ret_hist()
		if len(names)>=10:
			names=names[:10]
			values=values[:10]
			names.reverse()
			values.reverse()

			if loop==0:
				pop_hash_tags=" ".join(names)

			color=[]
			#names[5]="#brexit"
			for i in range(0,len(names)):
				names[i]=names[i].strip().lower()
				r = float(hash(names[i]+"r") % 256) / 256 
				g = float(hash(names[i]+"g") % 256) / 256
				b = float(hash(names[i]+"b") % 256) / 256
				color.append([r,g,b,1.0])
				#print(names[i],(r,g,b,1.0))
			#print(color)

			#word_print()

			y_pos = np.arange(len(names))
			 #,dpi=300 figsize=(25.0, 16.0)
			plt.cla()
			bars=plt.barh(y_pos, values, align='center',color=color)
			plt.yticks(y_pos, names)
			t=time.time()
			t=t-ago*60*60
			ago_to_2dp="%.2f" %  ago
			plt.title("Number of tweets "+str(ago_to_2dp)+" hours ago from MPs")
			plt.xlabel('Tweets')
			plt.xlim([0,40])
			plt.xticks(rotation='vertical')
			plt.subplots_adjust(left=0.35, right=0.95, top=0.9, bottom=0.2)
			plt.savefig('/var/www/html/graphs/hashtag_flow'+str(file_number)+'.png')
			pngs=pngs+" hashtag_flow"+str(file_number)+'.png'
			file_number=file_number+1
		ago=ago+0.25
		loop=loop+1


	os.system("convert -delay 30 -loop 0 -quality 50% "+pngs+" hashtag_flow.gif")
	m = hashlib.md5()
	m.update(str(time.time()).encode('utf-8'))
	random_file=m.hexdigest()+".gif"
	shutil.copyfile(os.path.join(path,"hashtag_flow.gif"), os.path.join(thumbs,random_file))
	my_twitter_tweet("Top hashtags used by MPs in last 48 hours: http://mpstweets.com/flow.php?fname="+random_file+" "+pop_hash_tags)
Example #17
0
def noun_anal(cursor):
	words_delete_all()

	users=db_get_all_users(cursor)

	tweets=[]
	v=[]
	update=False

	if update==True:
		print(len(users))
		for i in range(0,len(users)):
			u=users[i]
			print(i,u)	
			cur_time=time.time()
			tweets=db_get_cols_from_table(cursor,u,["tweet","date"])

			for ii in range(0,len(tweets)):
				t=tweets[ii][0]
				date=int(tweets[ii][1])
				if ((cur_time-date)/60/60/24)<100.0:
					#print(t)
					word_add_array(t)

		names,values=words_ret_hist()
		f = open('noun_hist.dat', 'w')
		for i in range(0,len(names)):
			f.write(names[i]+" "+str(values[i])+"\n")
		f.close()

	lines = open('noun_hist.dat').read().splitlines()
	http=[]
	times=[]
	for i in range(0,len(lines)):
		a=lines[i].split()
		http.append(a[0])
		times.append(int(a[1]))
		if i>15:
			break

	http.reverse()
	times.reverse()

	y_pos = np.arange(len(http))

	for_web=False
	if for_web==True:
		plt.figure(figsize=(25.0, 16.0),dpi=300)
		bars=plt.bar(y_pos, times,color="blue")

		plt.xticks(y_pos, http, fontsize=35)
		plt.legend(loc='best', fontsize=30)
		plt.ylabel('Usage (Tweets)', fontsize=30)
		#plt.yscale('log', fontsize=30)
		plt.yticks(fontsize=30)
		plt.xticks(rotation=45, rotation_mode="anchor", ha="right")
		plt.tight_layout()
		plt.savefig('/var/www/html/graphs/nouns.png')
	else:
		plt.figure(figsize=(10.0, 10.0),dpi=300)

		ax = plt.subplot(111)
		ax.spines['right'].set_visible(False)
		ax.spines['top'].set_visible(False)

		bars=plt.barh(y_pos, times,color="#36845b")
		for tick in ax.xaxis.get_major_ticks():
			tick.label.set_fontsize(25) 

		plt.yticks(y_pos, http, fontsize=25)
		plt.xticks(rotation=45, rotation_mode="anchor", ha="right")
		plt.legend(loc='best', fontsize=25)
		plt.xlabel('Usage (Tweets)', fontsize=25)
		#plt.yscale('log', fontsize=30)
		plt.tight_layout()
		plt.savefig('nouns.png')
Example #18
0
def topics(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []
    update = True
    days_in_past = 200
    cur_time = time.time() - 7 * 24 * 60 * 60
    if update == True:
        brexit = [0] * days_in_past
        defence = [0] * days_in_past
        econ = [0] * days_in_past
        education = [0] * days_in_past
        health = [0] * days_in_past
        home_aff = [0] * days_in_past
        international = [0] * days_in_past
        misc = [0] * days_in_past
        party = [0] * days_in_past
        sci_env = [0] * days_in_past
        sport = [0] * days_in_past
        terror = [0] * days_in_past
        transport = [0] * days_in_past

        for i in range(0, int(len(users) / 1)):  #
            u = users[i]
            print(u)

            tweets = db_get_cols_from_table(cursor, u, ["date", "clas"])
            #internal=0
            #external=0

            for ii in range(0, len(tweets)):
                c = tweets[ii][1]
                delta = int((cur_time - int(tweets[ii][0])) / 60 / 60 / 24)
                if delta < days_in_past:
                    if c == "health":
                        health[delta] += 1
                    elif c == "transport":
                        transport[delta] += 1
                    elif c == "education":
                        education[delta] += 1
                    elif c == "sci_env" or c == "science":
                        sci_env[delta] += 1
                    elif c == "party":
                        party[delta] += 1
                    elif c == "brexit":
                        brexit[delta] += 1

    m = days_in_past
    dx = 1.0
    x = 0.0
    xbins = []
    while (x < m):
        xbins.append(x)
        x = x + dx

    matplotlib.rcParams['font.family'] = 'Open Sans'

    plt.figure(figsize=(25.0, 6.0), dpi=300)

    ax = plt.subplot(111)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    plt.gcf().subplots_adjust(bottom=0.15)

    plt.semilogy(xbins, health, alpha=1.0, color="#36845b")
    plt.semilogy(xbins, transport, alpha=1.0, color="#a3d9bc")
    plt.semilogy(xbins, education, alpha=1.0, color="#808080")
    plt.semilogy(xbins, sci_env, alpha=1.0, color="#305496")
    plt.semilogy(xbins, party, alpha=1.0, color="#ffc000")
    plt.tick_params(axis='y', labelsize=25)
    plt.tick_params(axis='x', labelsize=25)

    plt.legend(
        ('Health', "Transport", "Education", "Science/Env", "Political"),
        fontsize=25)
    plt.xlim((0, 225))
    plt.ylabel('Number of tweets', fontsize=25)
    plt.xlabel('Days in past', fontsize=25)

    plt.savefig("topics.png", bbox_inches='tight')