import get_functions as gf # parametros (junto a los nombres en namesferia_[fecha].txt) dia = "2018-11-03" timezone = "America/Buenos_Aires" hora_inicio = "05:00:00" hora_fin = "22:00:00" omit_accounts = ['1.2.150830', '1.2.151476', '1.2.667678', '1.2.1095159'] # gob, propuesta, pamelaps y nodomardelplata # %% # lee nombres input with open('data/input/namesferia_' + dia + '.txt', 'r') as f: names_feria = pd.Series(f.read().splitlines()) # accounts de todas las cuentas Par accounts = gf.get_accounts(prefix='moneda-par') # STOP si no encuentra un nombre de la feria entre las cuentas par wrong_names = names_feria[~names_feria.isin(accounts.name)] if len(wrong_names) > 0: sys.exit('NON-EXISTENT ACCOUNT NAMES: ' + ', '.join(str(x) for x in list(wrong_names))) # accounts de participantes accounts_feria = accounts.loc[accounts.name.isin(names_feria)] # %% history de cada usuario (lee 80 tx max por usuario - 4*20) history_user = [ gf.get_user_history(user_id=x, max_page_num=4) for x in list(accounts_feria.id_user) ] # %% dataframe de txs
import numpy as np import pandas as pd import pickle import get_functions as gf tk_id = '1.3.1236' # id de token MONEDAPAR names_prefix = r'moneda-par' # prefijos de las cuentas de par with open('data/input/names_mdq.txt','r') as f: names_mdq = pd.Series(f.read().splitlines()) # nombres de mdq omit_accounts = ['1.2.150830','1.2.151476','1.2.667678'] # id cuenta del gob, propuesta y pamelaps dia = '2018-09-11' accounts = gf.get_accounts(prefix=names_prefix) accounts_mdq = accounts.loc[accounts.name.isin(names_mdq)] accounts_history = pickle.load(open("data/accounts_history_"+dia+".p", "rb")) accounts_history_mdq = accounts_history[accounts_mdq.id_user] txs_accounts_mdq = [gf.get_user_txs_fromhistory(json_account_history=i,token_id=tk_id) for i in accounts_history_mdq] txs_mdq = pd.concat(txs_accounts_mdq).drop_duplicates().sort_values('datetime', ascending=True) # merge con nombres de cuentas txs = pd.merge(txs_mdq, accounts, how='left', left_on='sender_id', right_on='id_user').drop('id_user',axis=1) txs = txs.rename(columns={'name':'sender_name'}) txs = pd.merge(txs, accounts, how='left', left_on='recipient_id', right_on='id_user').drop('id_user',axis=1) txs = txs.rename(columns = {'name':'recipient_name'}) # omite txs en las que participan omit_accounts txs = txs.loc[~(txs.sender_id.isin(omit_accounts) | txs.recipient_id.isin(omit_accounts)),:] # regenera index txs.index = range(len(txs.index)) # data['time'] = pd.to_datetime(data['time'])
import get_functions as gf #%% parameters dia = '2018-10-24' # history de transacciones txs_full = pickle.load(open("output/raw/accounts_txs_history_"+dia+".p", "rb")) #%% filtros # solo MONEDAPAR txs_par = txs_full.loc[txs_full.asset_name=="MONEDAPAR"] # omite txs en las que participan omit accounts (ver gf.filter_usersintx) txs_parf = gf.filter_usersintx(txs_par) # omite casos especiales txs_parf = gf.filter_specialtx(txs_parf) # merge con otra data y correcion de datetime txs_parf = gf.merge_txs_data(txs_parf, accounts_df=gf.get_accounts(), tokens_df=gf.token_data()) #%% variables year-month y year-month-day ym = txs_parf.datetime.dt.to_period(freq='m') ymd = txs_parf.datetime.dt.to_period(freq='d') txs_parf.loc[:,'ym'] = ym.values txs_parf.loc[:,'ymd'] = ymd.values # ym y ymd de todo el periodo (por si falta alguno) all_ym = pd.PeriodIndex(start=txs_parf.datetime.min(), end=dia, freq='m') all_ym_df = pd.Series(np.full_like(all_ym,0), index=all_ym, dtype='int64') all_ymd = pd.PeriodIndex(start=txs_parf.datetime.min(), end=dia, freq='d') all_ymd_df = pd.Series(np.full_like(all_ymd,0), index=all_ymd, dtype='int64') #%% number of txs (ym y ymd) # ym n_ym = txs_parf.groupby('ym').count().amount.add(all_ym_df, fill_value=0)
#%% parameters dia = '2018-11-01' # history de transacciones txs_full = pickle.load( open("output/raw/accounts_txs_history_" + dia + ".p", "rb")) #%% filtros # solo MONEDAPAR (obsoleto) # txs_par = txs_full.loc[txs_full.asset_name=="MONEDAPAR"] # omite txs en las que participan omit accounts (ver gf.filter_usersintx) txs_parf = gf.filter_usersintx(txs_full) # omite casos especiales txs_parf = gf.filter_specialtx(txs_parf) # merge con otra data y correcion de datetime txs_parf = gf.merge_txs_data(txs_parf, accounts_df=gf.get_accounts(), tokens_df=gf.token_data()) #%% variables year-month y year-month-day ym = txs_parf.datetime.dt.to_period(freq='m') ymd = txs_parf.datetime.dt.to_period(freq='d') txs_parf.loc[:, 'ym'] = ym.values txs_parf.loc[:, 'ymd'] = ymd.values # ym y ymd de todo el periodo (por si falta alguno) all_ym = pd.PeriodIndex(start=txs_parf.datetime.min(), end=dia, freq='m') all_ymd = pd.PeriodIndex(start=txs_parf.datetime.min(), end=dia, freq='d') #%% active users by ym y ymd # para cada ym y ymd, calcula unique de senders+recipientes # lo hago loop porque no se con sql :( # para ym