/
parser.py
145 lines (119 loc) · 6.17 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#/usr/bin/python
# borrowed from http://alexgorale.com/how-to-program-block-chain-explorers-with-python-part-1
import blocktools
import os
import fnmatch
import copy
import pdb
from hashlib import sha256
import struct
import binascii
import array
import pickle as pkl
import time
# define global variables
BLKDIR = '../Library/Application Support/Bitcoin/blocks/' # directory where blocks are stored in *.dat format
BLKHASH_FINAL = '00000000000000000355f2dae74e2f6eaee07314702efee154873f2c461b7591' # hash of last block in the all_blockheaders.pkl file
def end_of_blkfile(f, magic_id):
# check that magic_id contains 4 bytes of data
if (magic_id == '') or (len(magic_id) < 4):
return True
# if magic_is zero, then scan until finding next non-zero value
magic_id2 = struct.unpack('I', magic_id)[0]
n = 0
while magic_id2 == 0:
temp = f.read(4)
n += 4
if (temp == '') or (len(temp) < 4):
return True
else:
magic_id2 = struct.unpack('I', temp)[0]
# nto at end of file, so jump back to starting place and return false
f.seek(-n, 1)
return False
# returns a dictionary where keys are block hashes and
# values are a list containing a file pointer and previous block hash
# for all blocks in the blockchain (including orphans)
# NOTE: blkdir must end with '/'
def get_all_blockheaders(blkdir):
blocks = {}
blkfiles = fnmatch.filter(os.listdir(blkdir), 'blk*.dat')
# blkfiles = ['blk00346.dat']
for blkfile in blkfiles:
start_time = time.time()
with open(blkdir + blkfile, 'rb') as f:
n = 0 # start at zero bytes into file
while True:
f.seek(n, 0)
# get magic id and make sure we are actually at the start of a block
magic_id = f.read(4)
if end_of_blkfile(f, magic_id):
break
magic_id = struct.unpack('I', magic_id)[0]
while magic_id == 0:
temp = f.read(4)
magic_id = struct.unpack('I', temp)[0]
n += 4
if hex(magic_id) != '0xd9b4bef9':
n -= (block_size) # hack for blk000328.dat
f.seek(n, 0)
magic_id = struct.unpack('I', f.read(4))[0]
if hex(magic_id) != '0xd9b4bef9':
raise('MAGIC ID: not equal to expected value '+ hex(magic_id))
# get block size, hash, and previous hash
block_size = struct.unpack('I', f.read(4))[0] #blocktools.uint4(f)
block_prefix = f.read(80)
current_hash = sha256(sha256(block_prefix).digest()).digest()[::-1].encode('hex') #current_hash = sha256(sha256(block_prefix).digest()).hexdigest()
previous_hash = block_prefix[4:36][::-1].encode('hex') #struct.unpack('Q', block_prefix[4:36])[0]
# store block data in dictionary
blocks[current_hash] = {'blkfile': blkfile, 'byte_offset': n, 'previous_hash': previous_hash}
n += (block_size+8) # update current place in file
print "finished "+ blkfile +" in "+ str(time.time() - start_time) +" seconds"
print "final block hash in file = ", current_hash
return blocks
def get_mainchain_blocks(all_blocks, blkhash_final):
blocks = {}
return blocks
def sample_block_parser():
blkfile = '../Library/Application Support/Bitcoin/blocks/blk00000.dat'
with open(blkfile, 'rb') as blockfile:
print "Magic Number:\t %8x" % blocktools.uint4(blockfile)
print "Blocksize:\t %u" % blocktools.uint4(blockfile)
"""Block Header"""
print "Version:\t %d" % blocktools.uint4(blockfile)
print "Previous Hash\t %s" % blocktools.hashStr(blocktools.hash32(blockfile))
print "Merkle Root\t %s" % blocktools.hashStr(blocktools.hash32(blockfile))
print "Time\t\t %s" % str(blocktools.time(blockfile))
print "Difficulty\t %8x" % blocktools.uint4(blockfile)
print "Nonce\t\t %s" % blocktools.uint4(blockfile)
print "Tx Count\t %d" % blocktools.varint(blockfile)
print "Version Number\t %s" % blocktools.uint4(blockfile)
print "Inputs\t\t %s" % blocktools.varint(blockfile)
print "Previous Tx\t %s" % blocktools.hashStr(blocktools.hash32(blockfile))
print "Prev Index \t %d" % blocktools.uint4(blockfile)
script_len = blocktools.varint(blockfile)
print "Script Length\t %d" % script_len
script_sig = blockfile.read(script_len)
print "ScriptSig\t %s" % blocktools.hashStr(script_sig)
print "ScriptSig\t %s" % blocktools.hashStr(script_sig).decode('hex')
print "Seq Num\t\t %8x" % blocktools.uint4(blockfile)
print "Outputs\t\t %s" % blocktools.varint(blockfile)
print "Value\t\t %s" % str((blocktools.uint8(blockfile)*1.0)/100000000.00)
script_len = blocktools.varint(blockfile)
print "Script Length\t %d" % script_len
script_pubkey = blockfile.read(script_len)
print "Script Pub Key\t %s" % blocktools.hashStr(script_pubkey)
print "Lock Time %8x" % blocktools.uint4(blockfile)
print
def main():
# sample_block_parser()
# blocks = get_all_blockheaders(BLKDIR)
# f = open('../cardinal-blocktools-data/all_blockheaders.pkl', 'wb') # the final blockhash in this pkl is '00000000000000000355f2dae74e2f6eaee07314702efee154873f2c461b7591'
# pkl.dump(blocks, f)
# f.close()
f = open('../cardinal-blocktools-data/all_blockheaders.pkl', 'rb')
all_blocks = pkl.load(f)
f.close()
blocks = get_mainchain_blocks(all_blocks, BLKHASH_FINAL)
if __name__ == "__main__":
main()