def load_tweets(DF, creds, debug=False): """ Carga los tweets desde un dataframe a una base de datos Args: df(Dataframe): DataFrame con datos a subir a la base de datos creds(dict): Diccionario con las credenciales de la base de datos """ logging.info('*** Cargando tweets ***') df = DF.copy() new_order = ['id', 'user_id', 'date', 'timezone', 'location', 'username', 'tweet', 'hashtags', 'link', 'retweet', 'user_rt', 'mentions'] df = df[new_order] df['hashtags'].replace('[','',inplace=True) df['hashtags'].replace(']','',inplace=True) lista_tweets = df.values.tolist() data_ready = '' for i, tweet in enumerate(lista_tweets, start=0): tweet_str = [] cuenta = '' for j, element in enumerate(tweet): if j == 5: cuenta = element if j == 6: sentiment = get_sentiment(element) element = str(element).replace("'", '') transform = "" + str(element).replace("['", '[').replace("']",']') tweet_str.append(transform) tweet_str.append(sentiment[1]) tweet_str.append(sentiment[2]) data_ready += "(" + str(tweet_str)[1:-1] + ")" if i % 10000 == 0 and data_ready != [] and i > 0: try: data_ready = data_ready.replace(")(",'), (') query = """INSERT INTO tweets (id, user_id , date , timezone , location , username , tweet , hashtags , link , retweet , user_rt , mentions, polarity , subjectivity ) VALUES {} ON CONFLICT (id) DO NOTHING;""".format(data_ready) if debug: logging.error('query: {}'.format(query)) conn = db_connection(creds) download_data(conn, query) data_ready = '' logging.info("Se guardaron tweets ({}-{}) de la cuenta {}".format(i-10000,len(lista_tweets), cuenta)) except Exception as error: logging.error("Error al tratar de insertar: %s" % (error)) elif i == len(lista_tweets)-1 and data_ready != []: try: data_ready = data_ready.replace(")(",'), (') query = """INSERT INTO tweets (id, user_id , date , timezone , location , username , tweet , hashtags , link , retweet , user_rt , mentions, polarity , subjectivity ) VALUES {} ON CONFLICT (id) DO NOTHING;""".format(data_ready) if debug: logging.error('query: {}'.format(query)) conn = db_connection(creds) download_data(conn, query) logging.info("Se guardaron los últimos tweets ({}-{})".format(i - len(lista_tweets) + 1,len(lista_tweets))) except Exception as error: logging.error("Error al tratar de insertar: %s" % (error)) logging.info('Se terminan de guardar todos los tweets de {}'.format(cuenta))
def create_new_stock_table(table_name, creds): """ Hace una nueva tabla de stocks según el ticker que le pongas Args: table_name(str): nombre del ticker conn(connection object): objeto de connección a la base de datos """ conn = db_connection(creds) query = """CREATE TABLE IF NOT EXISTS {} (id SERIAL, date date NOT NULL UNIQUE, high float, low float, open float, close float, volume float, adj_close float, PRIMARY KEY (id));""".format(table_name) download_data(conn, query)
def update_in_db(df, table_name, creds): matrix = np.array(df.to_records().view(type=np.matrix))[0] data = [] for i in range(len(matrix)): conv_date = pd.to_datetime(matrix[i][0]) date = "('" + str(conv_date.year) + "-" + str( conv_date.month) + "-" + str(conv_date.day) + "')::date" High = str(matrix[i][1]) Low = str(matrix[i][2]) Open = str(matrix[i][3]) Close = str(matrix[i][4]) Volume = str(matrix[i][5]) Adj_Clos = str(matrix[i][6]) prices = "(" + date + ", " + High + ", " + Low + ", " + Open + ", " + Close + ", " + Volume + "," + Adj_Clos + ")" data.append(prices) print(data) data = str(data).replace("[", "(").replace("]", ")").replace( '(', '', 1)[:-1].replace('"', '') table_name = table_name.replace('-', '_') query = """INSERT INTO {} (date, high, low, open, close, volume, adj_close) VALUES {} ON CONFLICT ON CONSTRAINT {}_date_key DO NOTHING;""".format( table_name.upper(), data, table_name.lower()) if i % 10000 == 0: try: conn = db_connection(creds) download_data(conn, query) data = [] logging.info("Se guardó: {}".format(table_name)) except Exception as error: logging.error("Error al tratar de insertar %s: %s" % (table_name, error)) elif i == len(matrix) - 1: try: conn = db_connection(creds) download_data(conn, query) logging.info("Se guardó: {}".format(table_name)) except Exception as error: logging.error("Error al tratar de insertar %s: %s" % (table_name, error))
def get_last_date(table_name, creds): """ Trae la fecha del último precio guardado """ try: query = "SELECT MAX(date) FROM {}".format(table_name) conn = db_connection(creds) df = download_data(conn, query) latest_date = str(df[0][0]) except Exception as e: logging.error('Error sacando la última fecha de %s: %s' % (table_name, e)) lates_date = '2010-01-01' return latest_date