Beispiel #1
0
def deal_feature(file_in, file_out, file_data, ponzi=None):
    color.pInfo('Dealing with features')
    contracts = []

    ins = pd.read_csv(file_in,encoding='utf-8')
    outs = pd.read_csv(file_out, encoding='utf-8')

    addr_in, val_ins, time_ins = sequence(ins)
    addr_out, val_outs, time_outs = sequence(outs)
    
    ins = [[addr_in[i],val_ins[i],time_ins[i]] for i in range(len(addr_in))]
    outs = [[addr_out[i],val_outs[i],time_outs[i]] for i in range(len(addr_out))]
    
    df_in = pd.DataFrame(ins, columns=['address','val_in','time_in'])
    df_out = pd.DataFrame(outs, columns=['address','val_out','time_out'])

    # for i in range(max(ins.shape[0],outs.shape[0])):
    #     contract = [i, ponzi] if ponzi else [i]
    #     if i in val_ins.keys():
    #         contract.append(time_ins[i])
    #         contract.append(val_ins[i])
    #     else:
    #         contract.append('')
    #         contract.append('')
    #     if i in val_outs.keys():
    #         contract.append(time_outs[i])
    #         contract.append(val_outs[i])
    #     else:
    #         contract.append('')
    #         contract.append('')
    #     contracts.append(contract)
    df = pd.concat([df_in,df_out],join='outer',axis=1)
    df.to_csv(file_data, index=False)
    color.pDone('Have generated '+file_data+'.')
Beispiel #2
0
def deal_out(addr_file,in_file,to_file): 
    color.pInfo('Dealing with '+in_file)
    os.system('echo \"EOF\" >> '+in_file)

    address = readAddr(addr_file)
    transactions = []

    with open(in_file,'r',encoding='utf-8') as f:
        line = f.readline().strip()
        index = 0
        while(line!='EOF'):
            data = []
            if line == '':
                line = f.readline().strip()
                continue

            if line[0] == '(':
                index = index+1

            if line[0] =='2':
                attributes = line.split('|')
                for i in range(len(attributes)):
                    attributes[i] = attributes[i].strip()
                data = [address[index], attributes[0], attributes[1]]
                transactions.append(data)
            line = f.readline().strip()

    color.pInfo('collected '+str(len(transactions))+' transactions.')
    df = pd.DataFrame(data=transactions, columns=names_transaction)
    df.to_csv(to_file,index=False)
    color.pDone('Done')
Beispiel #3
0
def collectAddr(p, n=N, timeout=120):
    log_file = os.path.join('log', 'collect.log')
    examLog(log_file)
    last = fetchLog(log_file)
    new = int(last) + N

    # os.makedirs('test_addr')
    out_file = os.path.join('result', 'addr_' + last + '.out')
    p.sendline('\o ' + out_file)
    p.expect('#')

    query = 'SELECT address FROM code WHERE address IN \
    (SELECT to_address from external_transaction WHERE value!=\'0\' ORDER BY number DESC limit \
    ' + str(N) + ' OFFSET ' + str(last) + ') ORDER BY number DESC;'
    p.sendline(query)
    color.pInfo('Excuting query \'' + query + '\', raising TimeOut \
        exception in ' + str(timeout) + ' sec.')
    p.expect('#', timeout=timeout)
    color.pDone('Done query.')

    with open(out_file) as f:
        out = f.readlines()
    try:
        out = out[-2]
    except:
        color.pError('Failed to write the results')
        p.close()
        sys.exit(1)

    color.pDone('Collected address ' + out + '\nWritten in ' + out_file + ' .')
    writeLog(log_file, new)
Beispiel #4
0
def collectTxnIn(p, addr, timeout=200):
    import sql_query as sq

    color.pInfo('Collecting transactions into contract')
    query_in = [
        'select block_hash,value from external_transaction where to_address=\'',
        '\' and value!=\'0\';'
    ]
    name = os.path.basename(addr).split('.')[0]

    # send command to sql process
    out_file = os.path.join('result', name + '_in.out')
    color.pInfo('Sending incoming transaction query to psql server')
    p.sendline('\o ' + out_file)
    p.expect('#')
    sq.val_sql(addr, query_in, p)
    color.pDone('Have generated ' + out_file + '.')

    # send command to sql process
    txn_file = os.path.join('result', name + '_in.csv')
    time_file = os.path.join('result', name + '_time.out')
    block_hash = deal_sql.deal_in(addr, out_file, txn_file)

    color.pInfo('Sending incoming timestamp query to psql server')
    p.sendline('\o ' + time_file)
    p.expect('#')
    sq.timestamp_sql(block_hash, p)
    color.pDone('Have generated ' + time_file + '.')

    # collect the query result into txn features
    txn_file = os.path.join('result', name + '_in.csv')
    deal_sql.deal_in_timestamp(txn_file, time_file)

    return txn_file
Beispiel #5
0
def deal_in(addr_file, in_file, to_file): 
    color.pInfo('Dealing with '+in_file)
    os.system('echo \"EOF\" >> '+in_file)

    address = readAddr(addr_file)
    transactions = []
    block_hash = []

    with open(in_file,'r',encoding='utf-8') as f:
        index = 0
        line = f.readline().strip()
        while(line!='EOF'):
            if line == '':
                line = f.readline().strip()
                continue
    
            if line[0] == '(':
                index = index + 1

            data = []
            if line[0] =='\\':
                attributes = line.split('|')
                for i in range(len(attributes)):
                    attributes[i] = attributes[i].strip()
                try:
                    data = [address[index], '',attributes[1]]
                    block_hash.append(attributes[0])
                    transactions.append(data)
                except:
                    color.pError('out of index')
                    print('index',index)
                    print('attribute',attributes)
                    break
                    
            line = f.readline().strip()

    df = pd.DataFrame(data=transactions, columns=names_transaction)
    df.to_csv(to_file,index=False)
    color.pDone('Done')
    '''
    Cause the external transaction does not have a timestamp clumns in its table,
    record block hashes, 
    then pull timestamp of the block as the timestamp of transactions
    '''
    
    return block_hash
def extract(database):
    # database_ponzi = path.join('feature','nponzi_feature_raw.csv')
    color.pInfo("Dealing with transaction data data")

    raw_data = pd.read_csv(database)
    raw_data = raw_data.fillna(0)
    tx_features = []
    f_names = [  #'ponzi',
        'address',
        'nbr_tx_in',
        'nbr_tx_out',
        'Tot_in',
        'Tot_out',
        'mean_in',
        'mean_out',
        'sdev_in',
        'sdev_out',
        'gini_in',
        'gini_out',
        'avg_time_btw_tx',
        # 'gini_time_out',
        'lifetime',
    ]
    for i in range(raw_data.shape[0]):
        # ponzi = raw_data.iloc[i]['ponzi']
        address = raw_data.iloc[i]['address']
        time_in = raw_data.iloc[i]['time_in']
        time_out = raw_data.iloc[i]['time_out']
        val_in = raw_data.iloc[i]['val_in']
        val_out = raw_data.iloc[i]['val_out']
        if val_in != '' or val_out != '':
            #f = tl.basic_features(ponzi, time_in, time_out, val_in, val_out)
            f = tl.basic_features(None, address, time_in, time_out, val_in,
                                  val_out)
            tx_features.append(f)

    tl.compute_time(t0)

    df_features = pd.DataFrame(tx_features, columns=f_names)
    name = os.path.basename(database).split('.')[0]
    f_file = os.path.join(
        'feature',
        name.split('_')[0] + '_' + name.split('_')[1] + '_feature.csv')
    df_features.to_csv(f_file, index=None)
    color.pDone('Have written feature file ' + f_file + '.')
Beispiel #7
0
def collectTxnOut(p, addr, timeout=200):
    import sql_query as sq

    color.pInfo('Collecting transactions out of contract')
    query_out = [
        'select timestamp, value from internal_transaction where from_address=\'',
        '\' and value!=\'0\' limit 1000;\r',
    ]
    name = os.path.basename(addr).split('.')[0]

    # send command to sql process
    out_file = os.path.join('result', name + '_out.out')
    color.pInfo('Sending outcoming transaction query to psql server')
    p.sendline('\o ' + out_file)
    p.expect('#')
    sq.val_sql(addr, query_out, p)
    color.pDone('Have generated ' + out_file + '.')

    # collect the query result into txn features
    txn_file = os.path.join('result', name + '_out.csv')
    deal_sql.deal_out(addr, out_file, txn_file)

    return txn_file
Beispiel #8
0
def deal_in_timestamp(txn_file, time_file):
    color.pInfo('Dealing with ' + time_file)
    os.system('echo \"EOF\" >> ' + time_file)

    transactions = pd.read_csv(txn_file, low_memory=False)
    timestamps = []
    num = 0

    with open(time_file, 'r', encoding='utf-8') as f:
        line = f.readline().strip()
        while (line != 'EOF'):
            if line == '':
                line = f.readline().strip()
                continue

            if line[0] == '2':
                timestamps.append(line)
                num = num + 1
                if num % 1000000 == 0:
                    color.pDone('dealed ' + str(num) + ' timestamps')
            line = f.readline().strip()
    color.pInfo('adding timestamps to transaction')
    j = 0
    last = transactions['address'][0]
    for i in range(transactions.shape[0]):
        if transactions['address'][i]:
            # not empty
            transactions.loc[i, 'timestamp'] = timestamps[j]
            if transactions['address'][i] != last:
                color.pInfo(transactions['address'][i] + ' transaction:' +
                            str(i))

            last = transactions['address'][i]
            j = j + 1
    color.pInfo('writing to ' + txn_file + ' .')
    transactions.to_csv(txn_file, index=False)
    color.pDone('Done')
Beispiel #9
0
def collectTxnIn(p, addr, timeout=200):
    import sql_query as sq

    color.pInfo('Collecting transactions into contract')
    query_in = [
        'select block_hash,value from external_transaction where to_address=\'',
        '\' and value!=\'0\' limit 1000;',
        'select timestamp, value from internal_transaction where to_address=\'',
        '\' and value!=\'0\' limit 1000;'
    ]
    name = os.path.basename(addr).split('.')[0]

    # send value, hash command to sql process
    out_file = os.path.join('result', name + '_in.out')
    color.pInfo('Sending incoming transaction query to psql server')
    p.sendline('\o ' + out_file)
    p.expect('#')
    sq.val_sql(addr, query_in[:2], p)
    color.pDone('Have generated ' + out_file + '.')

    # send time command to sql process
    txn_file = os.path.join('result', name + '_in.csv')
    time_file = os.path.join('result', name + '_time.out')
    block_hash = deal_sql.deal_in(addr, out_file, txn_file)

    color.pInfo('Sending incoming timestamp query to psql server')
    p.sendline('\o ' + time_file)
    p.expect('#')
    sq.timestamp_sql(block_hash, p)
    color.pDone('Have generated ' + time_file + '.')

    # collect the query result into txn features
    deal_sql.deal_in_timestamp(txn_file, time_file)

    # send internal command to sql process
    out_inter_file = os.path.join('result', name + '_internal.out')
    color.pInfo('Sending incoming transaction in internal_trx to psql server')
    p.sendline('\o ' + out_inter_file)
    p.expect('#')
    sq.val_sql(addr, query_in[-2:], p)
    color.pDone('Have generated ' + out_inter_file + '.')

    # collect the query result into txn features
    txn_file_inter = os.path.join('result', name + 'inter_in.csv')
    deal_sql.deal_out(addr, out_inter_file, txn_file_inter)

    df1 = pd.read_csv(txn_file)
    df2 = pd.read_csv(txn_file_inter)
    df = df1.append(df2)
    df.sort_values(by='address')
    df.to_csv(txn_file, index=None)
    color.pImportant('incoming txn shape' + str(df.shape))

    return txn_file
def combine(f1, f2, w_file):
    df1 = pd.read_csv(f1)
    df2 = pd.read_csv(f2)
    df = pd.concat([df1, df2], axis=0)
    df.to_csv(w_file, index=False)
    color.pDone('Written ' + w_file + '.')