def make_url_hist(cursor, party=""):
    max_days = 365
    words_delete_all()
    users = db_get_cols_from_table(cursor, "user_names",
                                   ["user_id", "job_type1"])

    tweets = []
    done = 0
    v = []

    for i in range(0, 10):
        u = users[i][0]
        p = users[i][1]
        print(u, p)
        if p.startswith(party) == True:
            cur_time = time.time()
            tweets = db_get_cols_from_table(cursor, u, ["date", "url"])
            http = 0
            urls = []
            for ii in range(0, len(tweets)):
                date = int(tweets[ii][0])
                url = tweets[ii][1]

                if url != "None" and url != "error":
                    if ((cur_time - date) / 60 / 60 / 24) < max_days:
                        address = url.split("/")[0]
                        if address.startswith("www."):
                            address = address[4:]
                        word_add(address)

    word_clean()
    return words_ret_hist()
Exemple #2
0
def sen_run_over_db(cursor):
	users=db_get_all_users(cursor)
	
	tweets=[]
	cur_time=time.time()
	done=0
	for i in range(0,len(users)):
		u=users[i]
		print(u,i,len(users))

		if db_is_col(cursor,u,"sen")==False:
			db_add_col(cursor,u,"sen")
			print("adding")
	


		tweets=db_get_cols_from_table(cursor,u,["tweet_id","tweet","sen","date"])

		for t in tweets:
			delta=int((cur_time-int(t[3]))/60/60/24)
			#print(delta)
			if delta<365*2:
				if t[2]=="None":
					st=t[1].replace("\\\\","")

					s=sentiment(st)
					print(".", end='', flush=True)
					#print(t[1],s,"new")
					db_update_record(cursor,u,"tweet_id",t[0],"sen",str(s))
					#db_commit()
			#else:
			#	print(t[2])
		print("")
		db_commit()
Exemple #3
0
def short_to_long_over_db(cursor):
    max_days = 365

    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []

    for i in range(0, len(users)):
        u = users[i]
        print(u)
        if db_is_col(cursor, u, "url") == False:
            db_add_col(cursor, u, "url", length="1000")
            print("adding url")

        cur_time = time.time()
        tweets = db_get_cols_from_table(cursor, u,
                                        ["date", "tweet", "url", "tweet_id"])
        http = 0
        urls = []
        for ii in range(0, len(tweets)):
            date = int(tweets[ii][0])
            t = tweets[ii][1]
            url = tweets[ii][2]
            id = tweets[ii][3]
            if url == "None":
                if ((cur_time - date) / 60 / 60 / 24) < max_days:
                    ret = re.search("(?P<url>https?://[^\s]+)", t)
                    if ret != None:
                        ret = ret.group("url")
                        if ret.count("\\u2026") == 0:
                            urls.append([ret, id])

            #print(urls)
            if len(urls) > 40 or (len(urls) > 0 and ii == len(tweets) - 1):
                #print("fetch",urls)
                urls_out = fetch_parallel(urls)
                #print("fetch..")

                for iii in range(0, len(urls_out)):
                    if urls_out[iii][0] != "error":
                        if urls_out[iii][0].startswith('https://'):
                            urls_out[iii][0] = urls_out[iii][0][8:]
                        if urls_out[iii][0].startswith('http://'):
                            urls_out[iii][0] = urls_out[iii][0][7:]

                        if len(urls_out[iii][0]) > 1000:
                            urls_out[iii][0] = urls_out[iii][0][:1000]

                        #print(urls[iii][0])
                        #print(urls_out[iii][0])
                        #print()
                    #print(urls_out[iii])
                    db_update_record(cursor, u, "tweet_id", urls_out[iii][1],
                                     "url", urls_out[iii][0])
                    db_commit()

                urls = []
                ids = []
def time_on_twitter(cursor):
	users=db_get_all_users(cursor)

	tweets=[]
	done=0
	v=[]
	rt=0
	origonal=0

	for i in range(0,len(users)):
		u=users[i]
		print(u)	
		cur_time=time.time()
		tweets=db_get_cols_from_table(cursor,u,["date","tweet"])

		for ii in range(0,len(tweets)):
			t=tweets[ii][1]
			delta=(cur_time-int(tweets[ii][0]))/60/60/24
			if delta<365:
				if t.startswith("RT "):
					rt=rt+1
				else:
					origonal=origonal+1

		print(origonal,rt,len(users),(origonal*30.0+rt*10.0)/60/60)
Exemple #5
0
def retweet_anal_to_flat_files(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []

    if os.path.isdir("retweet_anal") == False:
        os.mkdir("retweet_anal")

    for i in range(0, len(users)):
        f = open(os.path.join("retweet_anal", users[i]), 'w')
        f.close()

    for i in range(0, len(users)):
        u = users[i]
        print(u)
        cur_time = time.time()
        tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"])
        rt = 0
        origonal = 0
        for ii in range(0, len(tweets)):
            t = tweets[ii][1]
            d = int(tweets[ii][0])

            if t.startswith("RT @"):
                user = t.split(":")[0][4:]
                if user in users:
                    short = t
                    if len(short) > 100:
                        short = short[:100]
                    f = open(os.path.join("retweet_anal", user), 'a')
                    f.write(str(d) + ":" + short + "\n")
                    f.close()
Exemple #6
0
def get_tweet_time(cursor, u, st):
    tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"])

    for ii in range(0, len(tweets)):
        t = tweets[ii][1]
        d = int(tweets[ii][0])
        if t.startswith(st) == True:
            return d

    return -1
Exemple #7
0
def clas_stats(cursor):
    users = db_get_all_users(cursor)
    for u in users:
        print(u)
        words_delete_all()
        c = db_get_cols_from_table(cursor, u, ["clas"])
        for w in c:
            #print(w)
            word_add(w[0])

        words = words_ret_hist()
        w = ""
        for i in range(0, len(words[0])):
            w = w + words[0][i] + "=" + str(words[1][i]) + ";"
        w = w[:-1]
        db_update_record(cursor, "user_names", "user_id", u, "clas", w)
        db_commit()
        print(w)
    adas
Exemple #8
0
def noun_anal(cursor):
	words_delete_all()

	users=db_get_all_users(cursor)

	tweets=[]
	v=[]
	update=False

	if update==True:
		print(len(users))
		for i in range(0,len(users)):
			u=users[i]
			print(i,u)	
			cur_time=time.time()
			tweets=db_get_cols_from_table(cursor,u,["tweet","date"])

			for ii in range(0,len(tweets)):
				t=tweets[ii][0]
				date=int(tweets[ii][1])
				if ((cur_time-date)/60/60/24)<100.0:
					#print(t)
					word_add_array(t)

		names,values=words_ret_hist()
		f = open('noun_hist.dat', 'w')
		for i in range(0,len(names)):
			f.write(names[i]+" "+str(values[i])+"\n")
		f.close()

	lines = open('noun_hist.dat').read().splitlines()
	http=[]
	times=[]
	for i in range(0,len(lines)):
		a=lines[i].split()
		http.append(a[0])
		times.append(int(a[1]))
		if i>15:
			break

	http.reverse()
	times.reverse()

	y_pos = np.arange(len(http))

	for_web=False
	if for_web==True:
		plt.figure(figsize=(25.0, 16.0),dpi=300)
		bars=plt.bar(y_pos, times,color="blue")

		plt.xticks(y_pos, http, fontsize=35)
		plt.legend(loc='best', fontsize=30)
		plt.ylabel('Usage (Tweets)', fontsize=30)
		#plt.yscale('log', fontsize=30)
		plt.yticks(fontsize=30)
		plt.xticks(rotation=45, rotation_mode="anchor", ha="right")
		plt.tight_layout()
		plt.savefig('/var/www/html/graphs/nouns.png')
	else:
		plt.figure(figsize=(10.0, 10.0),dpi=300)

		ax = plt.subplot(111)
		ax.spines['right'].set_visible(False)
		ax.spines['top'].set_visible(False)

		bars=plt.barh(y_pos, times,color="#36845b")
		for tick in ax.xaxis.get_major_ticks():
			tick.label.set_fontsize(25) 

		plt.yticks(y_pos, http, fontsize=25)
		plt.xticks(rotation=45, rotation_mode="anchor", ha="right")
		plt.legend(loc='best', fontsize=25)
		plt.xlabel('Usage (Tweets)', fontsize=25)
		#plt.yscale('log', fontsize=30)
		plt.tight_layout()
		plt.savefig('nouns.png')
Exemple #9
0
def sen(cursor):
	users_party=db_get_cols_from_table(cursor,"user_names",["user_id","job_type1"])
	users=[]
	party=[]
	for i in range(0,len(users_party)):
		users.append(users_party[i][0])
		party.append(users_party[i][1])

	tweets=[]
	done=0
	v=[]
	update=True
	days_in_past=365*2
	if update==True:
		all=[0] * days_in_past
		all_tot=[0] * days_in_past

		con=[0] * days_in_past
		con_tot=[0] * days_in_past

		lab=[0] * days_in_past
		lab_tot=[0] * days_in_past

		all_out=[0] * days_in_past
		con_out=[0] * days_in_past
		lab_out=[0] * days_in_past

		for i in range(0,int(len(users))):
			u=users[i]
			p=party[i]
			print(u,p,i,len(users))	
			cur_time=time.time()
			tweets=db_get_cols_from_table(cursor,u,["date","tweet","sen"])

			for ii in range(0,len(tweets)):
				t=tweets[ii][1]
				delta=int((cur_time-int(tweets[ii][0]))/60/60/24)
				if delta<days_in_past:

					string_s=tweets[ii][2]
					if string_s!="None":
						s=float(string_s)
						#s=sentiment(t)
						#print(ii,len(tweets),t,s)
						all[delta]+=s
						all_tot[delta]+=1

						if p.startswith("con")==True:
							con[delta]+=s
							con_tot[delta]+=1

						if p.startswith("lab")==True:
							lab[delta]+=s
							lab_tot[delta]+=1

			for i in range(0,len(all)):
				div=all_tot[i]
				if div!=0:
					all_out[i]=all[i]/div

				div=con_tot[i]
				if div!=0:
					con_out[i]=con[i]/div

				div=lab_tot[i]
				if div!=0:
					lab_out[i]=lab[i]/div


			f=open("sen_"+str(days_in_past)+"_days.txt", 'w')
			for i in range(0,len(all)):
				f.write(str(all_out[i])+" "+str(con_out[i])+" "+str(lab_out[i])+"\n")

			f.close()
def cal_retweets(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []
    update = False
    if update == True:
        for i in range(0, len(users)):
            u = users[i]
            print(u)
            cur_time = time.time()
            tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"])
            rt = 0
            origonal = 0
            for ii in range(0, len(tweets)):
                t = tweets[ii][1]
                delta = (cur_time - int(tweets[ii][0])) / 60 / 60 / 24
                if delta < 100.0:
                    if t.startswith("RT "):
                        rt = rt + 1
                    else:
                        origonal = origonal + 1
            if rt + origonal != 0:
                frac = 100.0 * rt / (rt + origonal)
            else:
                frac = 0.0
            db_update_record(cursor, "user_names", "user_id", u, "retweets",
                             str(frac))

            db_commit()

    tweets_per_day = db_get_cols_from_table(cursor, "user_names",
                                            ["retweets", "job_type1"])

    con = []
    lab = []
    lib = []
    snp = []

    for i in range(0, len(tweets_per_day)):
        party = tweets_per_day[i][1]
        if party.startswith("con") == True:
            con.append(int(tweets_per_day[i][0]))

        if party.startswith("lab") == True:
            lab.append(int(tweets_per_day[i][0]))

        if party.startswith("lib") == True:
            lib.append(int(tweets_per_day[i][0]))

        if party.startswith("snp") == True:
            snp.append(int(tweets_per_day[i][0]))

        if int(tweets_per_day[i][0]) != 0:
            v.append(int(tweets_per_day[i][0]))

    m = 100
    dx = 1.0
    x = 0.0
    xbins = []
    while (x < m):
        xbins.append(x)
        x = x + dx

    con[0] = 0.0
    lab[0] = 0.0
    snp[0] = 0.0
    #lib[0]=0.0

    for_web = False
    if for_web == True:
        plt.figure(figsize=(25.0, 6.0), dpi=300)
        plt.title("Re-tweets from MPs as % over last 100 days", fontsize=30)
        plt.gcf().subplots_adjust(bottom=0.15)

        plt.hist(v, bins=xbins, alpha=0.5, color='green')
        plt.hist(con, bins=xbins, alpha=0.8, color='blue')
        plt.hist(lab, bins=xbins, alpha=0.8, color='red')
        plt.hist(snp, bins=xbins, alpha=0.8, color='purple')
        plt.hist(lib, bins=xbins, alpha=0.8, color='yellow')

        plt.legend(('All', 'Con', 'Lab', 'SNP', "Lib"), fontsize=25)

        plt.ylabel('Number of MPs', fontsize=25)
        plt.xlabel('Percentage of tweets that are retweets', fontsize=25)

        plt.savefig("/var/www/html/graphs/retweets.png", bbox_inches='tight')
    else:

        matplotlib.rcParams['font.family'] = 'Open Sans'

        plt.figure(figsize=(6.0, 6.0), dpi=300)
        ax = plt.subplot(111)
        #plt.title("Re-tweets from MPs as % over last 100 days", fontsize=30)
        plt.gcf().subplots_adjust(bottom=0.15)

        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)

        plt.hist(v, bins=xbins, alpha=1.0, color='#36845b')
        #plt.hist(con, bins=xbins, alpha=0.8,color="#00539f")
        #plt.hist(lab, bins=xbins, alpha=0.8,color="#d50000")
        #plt.hist(snp, bins=xbins, alpha=0.8,color="#fff685")
        #plt.hist(lib, bins=xbins, alpha=0.8,color='yellow')

        #plt.legend( ( 'Con', 'Lab','SNP'), fontsize=25)

        plt.ylabel('Number of MPs', fontsize=25)
        plt.xlabel('Percentage retweets', fontsize=25)

        plt.savefig("retweet_dist.png", bbox_inches='tight')
Exemple #11
0
def clas(cursor, delta=172800 / 2):
    users = db_get_all_users(cursor)

    tweets = []
    #users=["JoJohnsonUK"]
    done = 0
    for i in range(271, 562):
        u = users[i]
        print(u, i, len(users))
        #ada
        if db_is_col(cursor, u, "clas") == False:
            db_add_col(cursor, u, "clas")
            print("adding")

        tweets = db_get_cols_from_table(cursor, u, ["tweet_id", "tweet"])

        for t in tweets:
            clas, tot, res = clas_clas_text(t[1])
            db_update_record(cursor, u, "tweet_id", t[0], "clas", clas)
            #print(clas)
            #print(t)

        db_commit()

        #adas
    types = None
    if tot != 0:
        m = len(tweets)
        if m > 500:
            m = 500

        for i in range(0, m):

            #print(tweets[i])
            out = clas_clas_text(tweets[i])
            if types == None:
                types = res
            else:
                #print(types)
                for ii in range(0, len(res)):
                    types[ii][1] = types[ii][1] + res[ii][1]
            if out != "unknown":
                clas = clas + 1

        path = "/var/www/html/interests/" + u + ".txt"
        #print(types)

        #asadasds

        if clas != 0:
            types = sorted(types, key=itemgetter(1), reverse=True)

            sum_ids = 0
            for i in range(0, len(types)):
                sum_ids = sum_ids + types[i][1]

            clas_perent = 100.0 * (clas / tot)
            print(u, types, clas_perent)
            f = open(path, 'w')
            for i in range(0, len(types)):
                f.write(types[i][0] + " " +
                        str(int((types[i][1] / sum_ids) * 100.0)) + "\n")

            #f.write("clas "+str(int(clas_perent))+"\n")

            f.close()
Exemple #12
0
def cal_tweets_per_day(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []
    update = False
    if update == True:
        for i in range(0, len(users)):
            u = users[i]
            print(u)
            cur_time = time.time()
            tweets = db_get_cols_from_table(cursor, u, ["date"])
            count = 0
            for ii in range(0, len(tweets)):
                delta = (cur_time - int(tweets[ii][0])) / 60 / 60 / 24
                if delta < 100.0:
                    count = count + 1

            db_update_record(cursor, "user_names", "user_id", u,
                             "tweets_per_day", str(count / 100.0))

            #v.append(count)

            db_commit()

    tweets_per_day = db_get_cols_from_table(cursor, "user_names",
                                            ["tweets_per_day", "job_type1"])

    con = []
    lab = []
    lib = []
    snp = []

    for i in range(0, len(tweets_per_day)):
        party = tweets_per_day[i][1]
        if party.startswith("con") == True:
            con.append(int(tweets_per_day[i][0]))

        if party.startswith("lab") == True:
            lab.append(int(tweets_per_day[i][0]))

        if party.startswith("lib") == True:
            lib.append(int(tweets_per_day[i][0]))

        if party.startswith("snp") == True:
            snp.append(int(tweets_per_day[i][0]))

        v.append(int(tweets_per_day[i][0]))

    m = 60
    dx = 1.0
    x = 0.0
    xbins = []
    while (x < m):
        xbins.append(x)
        x = x + dx

    for_web = False
    if for_web == True:
        plt.figure(figsize=(25.0, 6.0), dpi=300)
        plt.title("Tweets from MPs per day", fontsize=30)
        plt.gcf().subplots_adjust(bottom=0.15)

        plt.hist(v, bins=xbins, alpha=0.5, color='green')
        plt.hist(con, bins=xbins, alpha=0.8, color='blue')
        plt.hist(lab, bins=xbins, alpha=0.8, color='red')
        plt.hist(snp, bins=xbins, alpha=0.8, color='purple')
        plt.hist(lib, bins=xbins, alpha=0.8, color='yellow')

        plt.legend(('All', 'Con', 'Lab', 'SNP', "Lib"), fontsize=25)

        plt.ylabel('Number of MPs', fontsize=25)
        plt.xlabel('Number of tweets/day', fontsize=25)

        plt.savefig("/var/www/html/graphs/tweets_per_day.png",
                    bbox_inches='tight')
    else:

        matplotlib.rcParams['font.family'] = 'Open Sans'

        ###############All#############
        plt.clf()
        plt.figure(figsize=(6.0, 6.0), dpi=300)
        ax = plt.subplot(111)

        #plt.title("Tweets from MPs per day", fontsize=30)
        plt.gcf().subplots_adjust(bottom=0.15)

        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)

        ax.hist(v, bins=xbins, alpha=1.0, color='#36845b')

        plt.ylabel('Number of MPs', fontsize=20)
        plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all.png",
                    bbox_inches='tight')

        ###############Con#############

        ax = setup_graph()
        axes = plt.gca()
        axes.set_ylim([0, 40])
        plt.hist(con, bins=xbins, alpha=0.8, color="#00539f")

        #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20)

        #plt.ylabel('Number of MPs', fontsize=20)
        #plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all_con.png",
                    bbox_inches='tight')

        ###############Lab#############

        ax = setup_graph()
        axes = plt.gca()
        axes.set_ylim([0, 40])
        plt.hist(lab, bins=xbins, alpha=0.8, color="#d50000")

        #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20)

        #plt.ylabel('Number of MPs', fontsize=20)
        #plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all_lab.png",
                    bbox_inches='tight')

        ###############SNP#############

        ax = setup_graph()
        axes = plt.gca()
        axes.set_ylim([0, 40])
        plt.hist(snp, bins=xbins, alpha=0.8, color="#fff685")

        #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20)

        #plt.ylabel('Number of MPs', fontsize=20)
        #plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all_snp.png",
                    bbox_inches='tight')

        ###############Lib#############

        ax = setup_graph()

        plt.hist(lib, bins=xbins, alpha=0.8, color='#faa01a')

        #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20)

        #plt.ylabel('Number of MPs', fontsize=20)
        #plt.xlabel('Number of tweets/day', fontsize=20)

        plt.savefig("./tweets_per_day/tweets_per_day_all_lib.png",
                    bbox_inches='tight')
Exemple #13
0
def topics(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []
    update = True
    days_in_past = 200
    cur_time = time.time() - 7 * 24 * 60 * 60
    if update == True:
        brexit = [0] * days_in_past
        defence = [0] * days_in_past
        econ = [0] * days_in_past
        education = [0] * days_in_past
        health = [0] * days_in_past
        home_aff = [0] * days_in_past
        international = [0] * days_in_past
        misc = [0] * days_in_past
        party = [0] * days_in_past
        sci_env = [0] * days_in_past
        sport = [0] * days_in_past
        terror = [0] * days_in_past
        transport = [0] * days_in_past

        for i in range(0, int(len(users) / 1)):  #
            u = users[i]
            print(u)

            tweets = db_get_cols_from_table(cursor, u, ["date", "clas"])
            #internal=0
            #external=0

            for ii in range(0, len(tweets)):
                c = tweets[ii][1]
                delta = int((cur_time - int(tweets[ii][0])) / 60 / 60 / 24)
                if delta < days_in_past:
                    if c == "health":
                        health[delta] += 1
                    elif c == "transport":
                        transport[delta] += 1
                    elif c == "education":
                        education[delta] += 1
                    elif c == "sci_env" or c == "science":
                        sci_env[delta] += 1
                    elif c == "party":
                        party[delta] += 1
                    elif c == "brexit":
                        brexit[delta] += 1

    m = days_in_past
    dx = 1.0
    x = 0.0
    xbins = []
    while (x < m):
        xbins.append(x)
        x = x + dx

    matplotlib.rcParams['font.family'] = 'Open Sans'

    plt.figure(figsize=(25.0, 6.0), dpi=300)

    ax = plt.subplot(111)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    plt.gcf().subplots_adjust(bottom=0.15)

    plt.semilogy(xbins, health, alpha=1.0, color="#36845b")
    plt.semilogy(xbins, transport, alpha=1.0, color="#a3d9bc")
    plt.semilogy(xbins, education, alpha=1.0, color="#808080")
    plt.semilogy(xbins, sci_env, alpha=1.0, color="#305496")
    plt.semilogy(xbins, party, alpha=1.0, color="#ffc000")
    plt.tick_params(axis='y', labelsize=25)
    plt.tick_params(axis='x', labelsize=25)

    plt.legend(
        ('Health', "Transport", "Education", "Science/Env", "Political"),
        fontsize=25)
    plt.xlim((0, 225))
    plt.ylabel('Number of tweets', fontsize=25)
    plt.xlabel('Days in past', fontsize=25)

    plt.savefig("topics.png", bbox_inches='tight')
def http_retweets(cursor):
    users = db_get_all_users(cursor)

    tweets = []
    done = 0
    v = []
    update = False
    if update == True:
        for i in range(0, len(users)):
            u = users[i]
            print(u)
            cur_time = time.time()
            tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"])
            http = 0
            for ii in range(0, len(tweets)):
                t = tweets[ii][1]
                delta = (cur_time - int(tweets[ii][0])) / 60 / 60 / 24
                if delta < 100.0:
                    if t.count("http"):
                        http = http + 1

            if http != 0:
                frac = 100.0 * http / (len(tweets))
            else:
                frac = 0.0

            db_update_record(cursor, "user_names", "user_id", u, "http_tweets",
                             str(frac))

            db_commit()

    tweets_per_day = db_get_cols_from_table(cursor, "user_names",
                                            ["http_tweets", "job_type1"])

    print(tweets_per_day)
    con = []
    lab = []
    lib = []
    snp = []

    for i in range(0, len(tweets_per_day)):
        party = tweets_per_day[i][1]
        if party.startswith("con") == True:
            con.append(int(tweets_per_day[i][0]))

        if party.startswith("lab") == True:
            lab.append(int(tweets_per_day[i][0]))

        if party.startswith("lib") == True:
            lib.append(int(tweets_per_day[i][0]))

        if party.startswith("snp") == True:
            snp.append(int(tweets_per_day[i][0]))

        v.append(int(tweets_per_day[i][0]))

    m = 100
    dx = 1.0
    x = 0.0
    xbins = []
    while (x < m):
        xbins.append(x)
        x = x + dx

    plt.figure(figsize=(25.0, 6.0), dpi=300)
    plt.title("Percentage of tweets which contain web links", fontsize=30)
    plt.gcf().subplots_adjust(bottom=0.15)

    plt.hist(v, bins=xbins, alpha=0.5, color='green')
    plt.hist(con, bins=xbins, alpha=0.8, color='blue')
    plt.hist(lab, bins=xbins, alpha=0.8, color='red')
    plt.hist(snp, bins=xbins, alpha=0.8, color='purple')
    plt.hist(lib, bins=xbins, alpha=0.8, color='yellow')

    plt.legend(('All', 'Con', 'Lab', 'SNP', "Lib"), fontsize=25)

    plt.ylabel('Number of MPs', fontsize=25)
    plt.xlabel('Percent', fontsize=25)

    plt.savefig("/var/www/html/graphs/http_tweets.png", bbox_inches='tight')