timeInterval = ["", ""] fashion = [] awards = [] with con: cur = con.cursor() for q in range(len(sql_get_begEnd)): cur.execute(sql_get_begEnd[q]) row = cur.fetchone() timeInterval[q] = time.strptime( re.search(r'\s([\w:]+)', str(row[0])).group(1).encode('ascii', 'ignore'), "%H:%M:%S") eventTimeline = timeline(timeInterval[0], timeInterval[1]) # commence querying for award cats/winners for query in sql_awards: cur.execute(query) rows2 = cur.fetchall() for row in rows2: # RT from @eonline or @goldenglobes result1 = re.search( r'RT @\w+: ([Bb]est [\w\s,]*)[:-] (Drama -|Comedy or Musical -|Musical or Comedy)?([A-Za-z\s\"-]*)(http)?', row[1]) if result1: # get timestamp tStamp = time.strptime( re.search(r'\s([\w:]+)',
from textblob import TextBlob import re import tweepy import csv from access_grant import * from polarity import * from timeline import * from search import * access, api = api_1() print(access) print("1.Enter 1 if you want to analyze the Timeline of any User") print( "2.Enter 2 if you want analyze Public Actions on Twitter towards the User") options = int(input('Options:')) if options == 1: timeline = timeline() elif options == 2: search() else: print("Wrong Option")
else: print("Usage: " + sys.argv[0] + " percentdone <item_num> [PERCENT]") elif (action == "due"): if (len(args) > 1 and args[0].isdigit()): rmExtension(int(args[0]), "due") addExtension(int(args[0]), "due", formatDate(" ".join(args[1:]))) elif (len(args) > 0 and args[0].isdigit()): rmExtension(int(args[0]), "due") else: print("Usage: " + sys.argv[0] + " due <item_num> [DATE]") elif (action in ["starts","start"]): if (len(args) > 1 and args[0].isdigit()): rmExtension(int(args[0]), "start") addExtension(int(args[0]), "start", formatDate(" ".join(args[1:]))) elif (len(args) > 0 and args[0].isdigit()): rmExtension(int(args[0]), "start") else: print("Usage: " + sys.argv[0] + " starts <item_num> [DATE]") elif (action == "wait"): if (len(args) > 1 and args[0].isdigit()): rmExtension(int(args[0]), "wait") addExtension(int(args[0]), "wait", " ".join(args[1:])) elif (len(args) > 0 and args[0].isdigit()): rmExtension(int(args[0]), "wait") else: print("Usage: " + sys.argv[0] + " wait <item_num> [for what]") elif (action == "timeline" or action == "t"): timeline() else: help()
con = lite.connect("gg_tweets.sqlite3") timeInterval = ["",""] fashion = [] awards = [] with con: cur = con.cursor() for q in range(len(sql_get_begEnd)): cur.execute(sql_get_begEnd[q]) row = cur.fetchone() timeInterval[q] = time.strptime(re.search(r'\s([\w:]+)', str(row[0])).group(1).encode('ascii', 'ignore'), "%H:%M:%S") eventTimeline = timeline(timeInterval[0],timeInterval[1]) # commence querying for award cats/winners for query in sql_awards: cur.execute(query) rows2 = cur.fetchall() for row in rows2: # RT from @eonline or @goldenglobes result1 = re.search(r'RT @\w+: ([Bb]est [\w\s,]*)[:-] (Drama -|Comedy or Musical -|Musical or Comedy)?([A-Za-z\s\"-]*)(http)?', row[1]) if result1: # get timestamp tStamp = time.strptime(re.search(r'\s([\w:]+)', str(row[0])).group(1).encode('ascii', 'ignore'), "%H:%M:%S") # get raw data rawCat = result1.group(1).encode('ascii', 'ignore').strip("- ")
else: print "Usage: " + sys.argv[0] + " percentdone <item_num> [PERCENT]" elif (action == "due"): if (len(args) > 1 and args[0].isdigit()): rmExtension(int(args[0]), "due") addExtension(int(args[0]), "due", formatDate(" ".join(args[1:]))) elif (len(args) > 0 and args[0].isdigit()): rmExtension(int(args[0]), "due") else: print "Usage: " + sys.argv[0] + " due <item_num> [DATE]" elif (action in ["starts","start"]): if (len(args) > 1 and args[0].isdigit()): rmExtension(int(args[0]), "start") addExtension(int(args[0]), "start", formatDate(" ".join(args[1:]))) elif (len(args) > 0 and args[0].isdigit()): rmExtension(int(args[0]), "start") else: print "Usage: " + sys.argv[0] + " starts <item_num> [DATE]" elif (action == "wait"): if (len(args) > 1 and args[0].isdigit()): rmExtension(int(args[0]), "wait") addExtension(int(args[0]), "wait", " ".join(args[1:])) elif (len(args) > 0 and args[0].isdigit()): rmExtension(int(args[0]), "wait") else: print "Usage: " + sys.argv[0] + " wait <item_num> [for what]" elif (action == "timeline" or action == "t"): timeline() else: help()
from timeline import * from sklearn.cluster import KMeans from sklearn.cluster import AffinityPropagation as AP from sklearn.preprocessing import MinMaxScaler import numpy as np import matplotlib.pyplot as plt api = login() T, tweets = timeline(api) #buscamos datos del timeline T = cleanse_data( T ) #limpiamos los datos, es decir, preparar la data para normalizacion. En este caso asumimos que los booleanos se expresan como -1 (None), 0 (False) y 1 (True) T = np.array( [T[0], T[-1]] ) #en este caso podemos utilizar dos rasgos, utilizaremos el numero de retweets y si el tweet fue retwitteado o no ya que estos valores son numericos T[0] = MinMaxScaler().fit_transform( np.float64(T[0]) ) #como los rasgos de retwitteado o no ya fueron normalizados, procedemos a normalizar la cantidad de retweets para que tenga valores oscilando entre 0 y 1 T = make_2d_mat(T) #tenemos que convertir los vectores a matrices def kmeans(T, metodo): model = KMeans( n_clusters=2, init=metodo, precompute_distances=True ) #el numero de clusters tiene que ser mayor o igual al numero de rasgos que utilizamos, inicializamos con el metodo elegido (k-means++ y random) y establecemos que el algoritmo debe computar las distancias model.fit(