def optimize(encoder): m, ids = index.load() n_of_buckets = max(len(m) / 100 + 1, len(m)) res = [bytearray() for i in xrange(n_of_buckets)] with open('back_index.bin', 'wb') as fout: ptr = 0 for key in sorted(m.keys()): values = map(lambda x: struct.unpack('I', x)[0], [x for x in chunks(m[key], 4)]) l = encoder.compress(values, fout) #res[key] = (ptr, l) res[key % n_of_buckets].extend(struct.pack('i', key)) res[key % n_of_buckets].extend(struct.pack('i', ptr)) res[key % n_of_buckets].extend(struct.pack('i', l)) ptr += l # check compression/decompression function # with open('back_index.bin', 'rb') as f: # for key in m: # rr = simple9.decompress(f, res[key][0], res[key][1]) # if not (set(sorted(m[key])) == rr): # print('ERROR') # print(sorted(m[key])) # print(rr) # print(res[key]) # raise Exception() dct = open('dict.bin', 'wb') dct_head = open('dict_head.bin', 'wb') it = 0 dct_head.write(struct.pack('I', n_of_buckets)) for i in xrange(n_of_buckets): dct_head.write(struct.pack('I', it)) dct.write(res[i]) it += len(res[i]) dct_head.write(struct.pack('I', it)) dct_head.close() dct.close()
def sync_index(from_file=None): if from_file==None: from_file = download_index() index_file = open(from_file, 'r', encoding="Latin-1") index_data = index.load(index_file) changed_or_new_sources, deleted_sources = compare_index(index_data) for s in changed_or_new_sources: sync_source(s) # index is saved in sync_sources; # no need to explicitly save here # delete removed sources # TODO backup also? if len(deleted_sources) > 0: bulkOp = db.sources.initialize_unordered_bulk_op() for s in deleted_sources: bulkOp.find({"index.code": deleted_source["code"]}).remove() #bulkOp.find({"_id": deleted_source["_id"]}).remove() bulk_result = bulkOp.execute() print(bulk_result)
import index from random import randrange as rnd index.load() for n in range(6): stars = sorted(index.pictures(), key=lambda p:p.rating, reverse=True)[:5+n*2] query = [] print 'Checkpoints:' for i in range(3+n): p = stars[rnd(len(stars))] query.append(p) stars.remove(p) print '{}. {}'.format(i+1, p.name) path = index.chain(query=query) print '\nPath:' print ' > '.join([p.name for p in path]) reference = [p for p in query if p in path] index.export_html(reference+[None]+path, 'path{}.html'.format(n))
# handlers implemented by browser f = handlers.get(event.keycode) if f: f(browser, event.keycode) if browser.redraw: browser.update(event.keycode) # print time(), 'leave keyhandler' #if len(browser.img.relates.keys()) > 0: #browser.img = browser.img.relates.keys()[0] #print browser.img.location # Ok Go index.load(recover=True) # create tkinter window root = tk.Tk() root.title('tumblr img browser') # make the root window the size of the image root.geometry("%dx%d+%d+%d" % (1024, 740, 0, 0)) root.bind("<Key>", key) # instantiate browser class browser = Browser(root) # screen size: w = root.winfo_screenwidth() h = root.winfo_screenheight() print 'screen size {}x{}'.format(w,h) # start the event loop
import index import source import os import urllib indexfile = open("samples/index/Michael Dyck's Contradance Index_ Sources.html", encoding="Latin-1") index_baseurl = "http://www.ibiblio.org/contradance/index/" indexdata = index.load(indexfile) set_to_download = set() for row in indexdata: code_url = urllib.request.urljoin(index_baseurl, row['code_link'], allow_fragments=False) first_entered = row['date first entered'] last_revised = row['date last revised'] set_to_download.add(code_url) for url in set_to_download: filename = os.path.basename(urllib.request.urlparse(url).path) urllib.request.urlretrieve(url, "samples/source/"+filename)
# -*- coding: utf-8 -*- import index as ix ix.load() imgs=sorted(ix.picture.pictures(), key=lambda p:p.rating) blogs=sorted(ix.tumblr.blogs(), key=lambda t:t.avg_img_rating()) print 'distributing blog scores...' scores=ix.tumblr.dist_scores() print 'sorting blogs by score' hi=sorted(scores.items(), key=lambda t:t[1]) stars = {} for p in imgs[-40:]: t = p.origin if t: stars[t] = stars.get(t,0)+p.rating print ' '.join(['name','stars','imgs','local/blog', 'links','in/out']), print 'avg* - SCORE' print '_'*75 for t,s in sorted(stars.items(),key=lambda x:x[1]): print u'{} - {}* {}/{}imgs ⇶{}/{}⇶'.format( t.name,s,len(t.proper_imgs),len(t.images), len(t.linked),len(t.links)), print '{:.2f}* - score {:.2f}'.format(t.avg_img_rating(), scores.get(t)) print 'ok'
def __init__(self): (self.index,self.graph) = index.load( conf.index_filename , conf.graph_filename ) self.revindex = {} for filename, i in self.index.items(): self.revindex[i] = filename print "Done loading"
#!/usr/bin/python import index import config as conf (index,g) = index.load( conf.index_filename , conf.graph_filename ) print "Done loading:" , len(index) , len(g.export())