def deal_out(addr_file, in_file, to_file): color.pInfo('Dealing with ' + in_file) os.system('echo \"EOF\" >> ' + in_file) address = readAddr(addr_file) transactions = [] with open(in_file, 'r', encoding='utf-8') as f: line = f.readline().strip() index = 0 while (line != 'EOF'): data = [] if line == '': line = f.readline().strip() continue if line[0] == '(': index = index + 1 if line[0] == '2': attributes = line.split('|') for i in range(len(attributes)): attributes[i] = attributes[i].strip() data = [address[index], attributes[0], attributes[1]] transactions.append(data) line = f.readline().strip() color.pInfo('collected ' + str(len(transactions)) + ' transactions.') df = pd.DataFrame(data=transactions, columns=names_transaction) df.to_csv(to_file, index=False) color.pImportant(to_file + 'trx shape:' + str(df.shape)) color.pDone('Done')
def collectTxnIn(p, addr, timeout=200): import sql_query as sq color.pInfo('Collecting transactions into contract') query_in = [ 'select block_hash,value from external_transaction where to_address=\'', '\' and value!=\'0\' limit 1000;', 'select timestamp, value from internal_transaction where to_address=\'', '\' and value!=\'0\' limit 1000;' ] name = os.path.basename(addr).split('.')[0] # send value, hash command to sql process out_file = os.path.join('result', name + '_in.out') color.pInfo('Sending incoming transaction query to psql server') p.sendline('\o ' + out_file) p.expect('#') sq.val_sql(addr, query_in[:2], p) color.pDone('Have generated ' + out_file + '.') # send time command to sql process txn_file = os.path.join('result', name + '_in.csv') time_file = os.path.join('result', name + '_time.out') block_hash = deal_sql.deal_in(addr, out_file, txn_file) color.pInfo('Sending incoming timestamp query to psql server') p.sendline('\o ' + time_file) p.expect('#') sq.timestamp_sql(block_hash, p) color.pDone('Have generated ' + time_file + '.') # collect the query result into txn features deal_sql.deal_in_timestamp(txn_file, time_file) # send internal command to sql process out_inter_file = os.path.join('result', name + '_internal.out') color.pInfo('Sending incoming transaction in internal_trx to psql server') p.sendline('\o ' + out_inter_file) p.expect('#') sq.val_sql(addr, query_in[-2:], p) color.pDone('Have generated ' + out_inter_file + '.') # collect the query result into txn features txn_file_inter = os.path.join('result', name + 'inter_in.csv') deal_sql.deal_out(addr, out_inter_file, txn_file_inter) df1 = pd.read_csv(txn_file) df2 = pd.read_csv(txn_file_inter) df = df1.append(df2) df.sort_values(by='address') df.to_csv(txn_file, index=None) color.pImportant('incoming txn shape' + str(df.shape)) return txn_file
def connectPSQL(psql): import getpass p = pexpect.spawn(psql) logfile = open(os.path.join('log', 'psql.log'), 'ab') p.logfile = logfile p.expect('gby:') pwd = getpass.getpass('Password:'******'#') color.pImportant('Successfully connected to PostgreSQL!') return p
psql = 'psql --host 192.168.1.2 -U gby ethereum' addrs = ['116ponzi+116dapp_addr.csv'] #addrs = ['dapp1.csv','dapp2.csv','dapp3.csv','dapp4.csv'] #addrs = ['merged_'+str(i)+'.csv' for i in range(10)] # collect val and time sequence from addresses dirPath = 'address' # addrs = os.listdir(dirPath) p = connectPSQL(psql) times = [time.time()] #for addr in addrs: for i in range(1): addr = 'new1.csv' color.pImportant('addr file: ' + addr) full_path = os.path.join(dirPath, addr) in_csv = collectTxnIn(p, full_path) out_csv = collectTxnOut(p, full_path) times.append(time.time()) color.pImportant('collected all txns in ' + str(times[-1] - times[-2])) data_file = addr.split('.')[0] + '_database.csv' data_file = os.path.join('result', data_file) deal_sql.deal_feature(in_csv, out_csv, data_file) feature_file = feature.extract(data_file) times.append(time.time()) color.pImportant('dealed all datas in ' + str(times[-1] - times[-2])) color.pImportant('') '''
# collect val and time sequence from addresses dirPath = 'test_addr' addrs = os.listdir(dirPath) p = connectPSQL(psql) ''' for addr in addrs: if addr[0]!='d': color.pImportant('addr file: '+addr) full_path = os.path.join(dirPath,addr) data_file = 'test_'+addr.split('.')[0].split('_')[1]+'_database.csv' data_file = os.path.join('result',data_file) in_csv = collectTxnIn(p,full_path) out_csv = collectTxnOut(p,full_path) deal_sql.deal_feature(in_csv, out_csv, data_file) feature.extract(data_file) os.rename(full_path,os.path.join(dirPath,'done-'+addr)) ''' addr = 'add_ponzi_train.csv' color.pImportant('addr file: ' + addr) full_path = os.path.join(dirPath, addr) data_file = 'test_' + addr.split('.')[0].split('_')[1] + '_database.csv' data_file = os.path.join('result', data_file) in_csv = collectTxnIn(p, full_path) out_csv = collectTxnOut(p, full_path) deal_sql.deal_feature(in_csv, out_csv, data_file) feature.extract(data_file) p.close()