def deal_feature(file_in, file_out, file_data, ponzi=None): color.pInfo('Dealing with features') contracts = [] ins = pd.read_csv(file_in,encoding='utf-8') outs = pd.read_csv(file_out, encoding='utf-8') addr_in, val_ins, time_ins = sequence(ins) addr_out, val_outs, time_outs = sequence(outs) ins = [[addr_in[i],val_ins[i],time_ins[i]] for i in range(len(addr_in))] outs = [[addr_out[i],val_outs[i],time_outs[i]] for i in range(len(addr_out))] df_in = pd.DataFrame(ins, columns=['address','val_in','time_in']) df_out = pd.DataFrame(outs, columns=['address','val_out','time_out']) # for i in range(max(ins.shape[0],outs.shape[0])): # contract = [i, ponzi] if ponzi else [i] # if i in val_ins.keys(): # contract.append(time_ins[i]) # contract.append(val_ins[i]) # else: # contract.append('') # contract.append('') # if i in val_outs.keys(): # contract.append(time_outs[i]) # contract.append(val_outs[i]) # else: # contract.append('') # contract.append('') # contracts.append(contract) df = pd.concat([df_in,df_out],join='outer',axis=1) df.to_csv(file_data, index=False) color.pDone('Have generated '+file_data+'.')
def deal_out(addr_file,in_file,to_file): color.pInfo('Dealing with '+in_file) os.system('echo \"EOF\" >> '+in_file) address = readAddr(addr_file) transactions = [] with open(in_file,'r',encoding='utf-8') as f: line = f.readline().strip() index = 0 while(line!='EOF'): data = [] if line == '': line = f.readline().strip() continue if line[0] == '(': index = index+1 if line[0] =='2': attributes = line.split('|') for i in range(len(attributes)): attributes[i] = attributes[i].strip() data = [address[index], attributes[0], attributes[1]] transactions.append(data) line = f.readline().strip() color.pInfo('collected '+str(len(transactions))+' transactions.') df = pd.DataFrame(data=transactions, columns=names_transaction) df.to_csv(to_file,index=False) color.pDone('Done')
def collectAddr(p, n=N, timeout=120): log_file = os.path.join('log', 'collect.log') examLog(log_file) last = fetchLog(log_file) new = int(last) + N # os.makedirs('test_addr') out_file = os.path.join('result', 'addr_' + last + '.out') p.sendline('\o ' + out_file) p.expect('#') query = 'SELECT address FROM code WHERE address IN \ (SELECT to_address from external_transaction WHERE value!=\'0\' ORDER BY number DESC limit \ ' + str(N) + ' OFFSET ' + str(last) + ') ORDER BY number DESC;' p.sendline(query) color.pInfo('Excuting query \'' + query + '\', raising TimeOut \ exception in ' + str(timeout) + ' sec.') p.expect('#', timeout=timeout) color.pDone('Done query.') with open(out_file) as f: out = f.readlines() try: out = out[-2] except: color.pError('Failed to write the results') p.close() sys.exit(1) color.pDone('Collected address ' + out + '\nWritten in ' + out_file + ' .') writeLog(log_file, new)
def collectTxnIn(p, addr, timeout=200): import sql_query as sq color.pInfo('Collecting transactions into contract') query_in = [ 'select block_hash,value from external_transaction where to_address=\'', '\' and value!=\'0\';' ] name = os.path.basename(addr).split('.')[0] # send command to sql process out_file = os.path.join('result', name + '_in.out') color.pInfo('Sending incoming transaction query to psql server') p.sendline('\o ' + out_file) p.expect('#') sq.val_sql(addr, query_in, p) color.pDone('Have generated ' + out_file + '.') # send command to sql process txn_file = os.path.join('result', name + '_in.csv') time_file = os.path.join('result', name + '_time.out') block_hash = deal_sql.deal_in(addr, out_file, txn_file) color.pInfo('Sending incoming timestamp query to psql server') p.sendline('\o ' + time_file) p.expect('#') sq.timestamp_sql(block_hash, p) color.pDone('Have generated ' + time_file + '.') # collect the query result into txn features txn_file = os.path.join('result', name + '_in.csv') deal_sql.deal_in_timestamp(txn_file, time_file) return txn_file
def deal_in(addr_file, in_file, to_file): color.pInfo('Dealing with '+in_file) os.system('echo \"EOF\" >> '+in_file) address = readAddr(addr_file) transactions = [] block_hash = [] with open(in_file,'r',encoding='utf-8') as f: index = 0 line = f.readline().strip() while(line!='EOF'): if line == '': line = f.readline().strip() continue if line[0] == '(': index = index + 1 data = [] if line[0] =='\\': attributes = line.split('|') for i in range(len(attributes)): attributes[i] = attributes[i].strip() try: data = [address[index], '',attributes[1]] block_hash.append(attributes[0]) transactions.append(data) except: color.pError('out of index') print('index',index) print('attribute',attributes) break line = f.readline().strip() df = pd.DataFrame(data=transactions, columns=names_transaction) df.to_csv(to_file,index=False) color.pDone('Done') ''' Cause the external transaction does not have a timestamp clumns in its table, record block hashes, then pull timestamp of the block as the timestamp of transactions ''' return block_hash
def extract(database): # database_ponzi = path.join('feature','nponzi_feature_raw.csv') color.pInfo("Dealing with transaction data data") raw_data = pd.read_csv(database) raw_data = raw_data.fillna(0) tx_features = [] f_names = [ #'ponzi', 'address', 'nbr_tx_in', 'nbr_tx_out', 'Tot_in', 'Tot_out', 'mean_in', 'mean_out', 'sdev_in', 'sdev_out', 'gini_in', 'gini_out', 'avg_time_btw_tx', # 'gini_time_out', 'lifetime', ] for i in range(raw_data.shape[0]): # ponzi = raw_data.iloc[i]['ponzi'] address = raw_data.iloc[i]['address'] time_in = raw_data.iloc[i]['time_in'] time_out = raw_data.iloc[i]['time_out'] val_in = raw_data.iloc[i]['val_in'] val_out = raw_data.iloc[i]['val_out'] if val_in != '' or val_out != '': #f = tl.basic_features(ponzi, time_in, time_out, val_in, val_out) f = tl.basic_features(None, address, time_in, time_out, val_in, val_out) tx_features.append(f) tl.compute_time(t0) df_features = pd.DataFrame(tx_features, columns=f_names) name = os.path.basename(database).split('.')[0] f_file = os.path.join( 'feature', name.split('_')[0] + '_' + name.split('_')[1] + '_feature.csv') df_features.to_csv(f_file, index=None) color.pDone('Have written feature file ' + f_file + '.')
def collectTxnOut(p, addr, timeout=200): import sql_query as sq color.pInfo('Collecting transactions out of contract') query_out = [ 'select timestamp, value from internal_transaction where from_address=\'', '\' and value!=\'0\' limit 1000;\r', ] name = os.path.basename(addr).split('.')[0] # send command to sql process out_file = os.path.join('result', name + '_out.out') color.pInfo('Sending outcoming transaction query to psql server') p.sendline('\o ' + out_file) p.expect('#') sq.val_sql(addr, query_out, p) color.pDone('Have generated ' + out_file + '.') # collect the query result into txn features txn_file = os.path.join('result', name + '_out.csv') deal_sql.deal_out(addr, out_file, txn_file) return txn_file
def deal_in_timestamp(txn_file, time_file): color.pInfo('Dealing with ' + time_file) os.system('echo \"EOF\" >> ' + time_file) transactions = pd.read_csv(txn_file, low_memory=False) timestamps = [] num = 0 with open(time_file, 'r', encoding='utf-8') as f: line = f.readline().strip() while (line != 'EOF'): if line == '': line = f.readline().strip() continue if line[0] == '2': timestamps.append(line) num = num + 1 if num % 1000000 == 0: color.pDone('dealed ' + str(num) + ' timestamps') line = f.readline().strip() color.pInfo('adding timestamps to transaction') j = 0 last = transactions['address'][0] for i in range(transactions.shape[0]): if transactions['address'][i]: # not empty transactions.loc[i, 'timestamp'] = timestamps[j] if transactions['address'][i] != last: color.pInfo(transactions['address'][i] + ' transaction:' + str(i)) last = transactions['address'][i] j = j + 1 color.pInfo('writing to ' + txn_file + ' .') transactions.to_csv(txn_file, index=False) color.pDone('Done')
def collectTxnIn(p, addr, timeout=200): import sql_query as sq color.pInfo('Collecting transactions into contract') query_in = [ 'select block_hash,value from external_transaction where to_address=\'', '\' and value!=\'0\' limit 1000;', 'select timestamp, value from internal_transaction where to_address=\'', '\' and value!=\'0\' limit 1000;' ] name = os.path.basename(addr).split('.')[0] # send value, hash command to sql process out_file = os.path.join('result', name + '_in.out') color.pInfo('Sending incoming transaction query to psql server') p.sendline('\o ' + out_file) p.expect('#') sq.val_sql(addr, query_in[:2], p) color.pDone('Have generated ' + out_file + '.') # send time command to sql process txn_file = os.path.join('result', name + '_in.csv') time_file = os.path.join('result', name + '_time.out') block_hash = deal_sql.deal_in(addr, out_file, txn_file) color.pInfo('Sending incoming timestamp query to psql server') p.sendline('\o ' + time_file) p.expect('#') sq.timestamp_sql(block_hash, p) color.pDone('Have generated ' + time_file + '.') # collect the query result into txn features deal_sql.deal_in_timestamp(txn_file, time_file) # send internal command to sql process out_inter_file = os.path.join('result', name + '_internal.out') color.pInfo('Sending incoming transaction in internal_trx to psql server') p.sendline('\o ' + out_inter_file) p.expect('#') sq.val_sql(addr, query_in[-2:], p) color.pDone('Have generated ' + out_inter_file + '.') # collect the query result into txn features txn_file_inter = os.path.join('result', name + 'inter_in.csv') deal_sql.deal_out(addr, out_inter_file, txn_file_inter) df1 = pd.read_csv(txn_file) df2 = pd.read_csv(txn_file_inter) df = df1.append(df2) df.sort_values(by='address') df.to_csv(txn_file, index=None) color.pImportant('incoming txn shape' + str(df.shape)) return txn_file
def combine(f1, f2, w_file): df1 = pd.read_csv(f1) df2 = pd.read_csv(f2) df = pd.concat([df1, df2], axis=0) df.to_csv(w_file, index=False) color.pDone('Written ' + w_file + '.')