def GET(self,id1,id2,id3,id4,type): # image_url_regex = r'/([a-z0-9]{2})/([a-z0-9]{2})/([a-z0-9]{19,36})(-[sc]\d{2,4})?\.(gif|jpg|jpeg|png)$' id = '{0}{1}{2}'.format(id1,id2,id3) from store import Store store = Store() file = store.get(id) if file is None: store.close() return render.error("not found",'/') org_path = '{0}/{1}/{2}.{4}'.format(id1,id2,id3,id4,type) org_file = '{0}/{1}'.format(THUMB_ROOT, org_path) if not os.path.exists(org_file): save_file(file, org_file) if id4 is None: dst_path = org_path dst_file = org_file else: dst_path = '{0}/{1}/{2}{3}.{4}'.format(id1,id2,id3,id4,type) dst_file = '{0}/{1}'.format(THUMB_ROOT, dst_path) #print(ids[3][1:]) size = int(id4[2:]) if size not in SUPPORTED_SIZE: print('unsupported size: {0}'.format(size)) store.close() return render.error("not found",'/') thumb_image(org_file, size, dst_file) # print(org_file) # print(dst_file) # print web.ctx.env server_soft = web.ctx.env['SERVER_SOFTWARE'] # print server_soft if server_soft[:5] == 'nginx' and os.name != 'nt': print("in") store.close() #start_response('200 OK', [('X-Accel-Redirect', '{0}/{1}'.format(THUMB_PATH, dst_path))]) web.header('X-Accel-Redirect', '{0}/{1}'.format(THUMB_PATH, dst_path)) return ; # print(file.type) web.header('Content-Type', str(file.type)) web.header('Content-Length', '{0.length}'.format(file)) web.header('Via','store') #print(headers) # TODO: response file content distfile = open(dst_file, 'rb') data = distfile.read() store.close() return data; #200OK #return [data] #fd = open(dst_file,'r') #return environ['wsgi.file_wrapper'](fd, 4096) return render.error("not found",'/')
def GET(self,page_str): print page_str page = int(page_str) limit = 12 start = 0 if page < 1: page = 1 start = limit * (page - 1) store = Store() gallery = store.browse(limit, start) if hasattr(store, 'close'): store.close() import datetime dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None result = json.dumps(gallery, default=dthandler)#[] if(hasattr(web.input(),'callback')): result = "("+result+")" callback= web.input()['callback'] result = callback + result return result
def POST(self): #print web.ctx.env #print web.input() #fileName = web.ctx.env['HTTP_X_FILE_NAME'] file = web.input()['file'] fileName = web.input()['filename'] # print file from store import Store store = Store() id = store.store(file, name=fileName) store.close() if(hasattr(web.input(),'callback')): location = web.input()['callback']+"?"+"("+json.dumps(id)+")" #web.header('Location', location) raise web.redirect(location) #raise web.seeother(location) return "("+json.dumps(id)+")"; # print file['user_file[]'] # web.debug(file['user_file']) # web.debug(file['user_file'].filename) # 这里是文件名 # web.debug(file['user_file'].value) # 这里是文件内容 # web.debug(file['user_file'].file.read()) # 或者使用一个文件对象 # print file return json.dumps(id)
class TestStore(unittest2.TestCase): def setUp(self): self.store = Store(name="scratch") self.ns = Namespace('http://example.com/#') def tearDown(self): self.store.close() def testSize(self): """ Tests the size of the repository """ self.assertEqual(len(self.store),0) def testAdd(self): bob = self.ns['bob'] name = self.ns['name'] value = Literal('Bob Bilbins') self.store.add((bob, name, value)) self.assertEqual(len(self.store),1) def testRemove(self): triple = (self.ns['alice'],self.ns['name'],Literal('Alice')) self.store.add(triple) self.assertEqual(len(self.store),1) self.store.remove(triple) self.assertEqual(len(self.store),0) def testTriples(self): """ Tests the search by triple. """ triple = (self.ns['alice'],self.ns['name'],Literal('Alice')) self.store.add(triple) for tri in self.store.triples((self.ns['alice'],None, None)): for i in range(3): self.assertEqual(tri[i], triple[i]) def testSimpleSparql(self): triple = (self.ns['alice'],self.ns['name'],Literal('Alice')) self.store.add(triple) for tri in self.store.query("SELECT ?s ?p ?o WHERE {?s ?p ?o .}"): for i in range(3): self.assertEqual(tri[i], triple[i]) def testNamespacedSparql(self): triple = (self.ns['alice'],self.ns['name'],Literal('Alice')) self.store.add(triple) self.store.add((self.ns['bob'],self.ns['name'],Literal('Bob'))) for tri in self.store.query("SELECT ?p ?o WHERE { ex:alice ?p ?o .}", initNs={'ex':self.ns}): for i in range(1,3): self.assertEqual(tri[i-1], triple[i]) def testBindedSparql(self): triple = (self.ns['alice'],self.ns['name'],Literal('Alice')) self.store.add(triple) self.store.add((self.ns['bob'],self.ns['name'],Literal('Bob'))) for tri in self.store.query("SELECT ?p ?o WHERE { ?s ?p ?o .}", initBindings={'s':self.ns['alice']}): for i in range(1,3): self.assertEqual(tri[i-1], triple[i]) def testDataTypes(self): birth = Literal('2006-01-03', datatype=_XSD_NS.date) comp = Literal('2006-01-01', datatype=_XSD_NS.date) triple = (self.ns['alice'],self.ns['birthdate'],birth) self.store.add(triple) for s, p, o in self.store.query("SELECT ?s ?p ?o WHERE {?s ?p ?o .}"): self.assertLess(comp,birth)
class DP: # list of authors that are remembered author_list = [] # list of authors to skip crawling for various reasons skip_list = ['[deleted]'] # internal sqlite3 store store = None def write_to_queue(self, data, prefix='tmp'): fh, filename = tempfile.mkstemp(dir=os.path.join(tmpdir, 'dp', 'queue'), prefix=prefix) os.close(fh) fp = open(filename, 'w') fp.write(data) fp.close() return os.path.split(filename)[1] def seed(self): self.store = Store('/collection/sharvey/reddit/') self.store.open() print 'Created seed queue' return self.write_to_queue('a,t3_1u4kuf', 'tmp_a_') def process_author(self, abspath, filename): filetype = filename.split('_') fp = open(os.path.join(abspath, filename)) blob = json.load(fp) fp.close() elements = parser.extract_listing_elements(blob) self.store.store_author(elements) return [] def process_snapshot(self, abspath, filename): filetype = filename.split('_') fp = open(os.path.join(abspath, filename)) blob = json.load(fp) fp.close() if filetype[0] == 'a': posts = blob['posts'] nav = blob['nav'] start_hit = False queue_file_list = [] queue_list = [] for sube in posts: utctime = int(sube['created_utc']) sttime = time.strftime('%Y%m%d', time.gmtime(utctime)) if (int(sttime) > int(dateend)): continue elif (int(sttime) < int(datestart)): start_hit = True break else: queue_list.append('p,'+sube['id']) queue_file_list.append(self.write_to_queue('\n'.join(queue_list), 'tmp_p_')) if start_hit is not True: if nav['after'] is not None: queue_file_list.append(self.write_to_queue('a,'+nav['after'], 'tmp_a_')) return queue_file_list elif filetype[0] == 'p': post = blob['post'] comments = blob['comments'] self.store.store_snapshot(post, comments) if crawl_author: queue_file_list = [] if post['author'] not in self.author_list and post['author'] not in self.skip_list: queue_file_list.append(self.write_to_queue('u,'+post['author'], 'tmp_u_')) self.author_list.append(post['author']) for comment in comments: if comment['author'] not in self.author_list and comment['author'] not in self.skip_list: queue_file_list.append(self.write_to_queue('u,'+comment['author'], 'tmp_u_')) self.author_list.append(comment['author']) return queue_file_list return [] def process_snapshots(self, abspath, filename_list): post_tuples = [] for filename in filename_list: filetype = filename.split('_') fp = open(os.path.join(abspath, filename)) blob = json.load(fp) fp.close() post_tuples.append( (blob['post'], blob['comments']) ) self.store.store_batch_snapshot(post_tuples) if crawl_author: queue_file_list = [] def run(self): seedfile = self.seed() os.rename(os.path.join(tmpdir, 'dp', 'queue', seedfile), os.path.join(tmpdir, 'server', 'queue', seedfile)) sleepcount = 0 while True: for filename in os.listdir(os.path.join(tmpdir, 'dp', 'staging')): sleepcount = 0 self.store.open() prefix = filename.split('.')[0] absfilename = os.path.join(tmpdir, 'dp', 'staging', filename) abspath = os.path.join(tmpdir, 'dp', 'staging', prefix) os.mkdir(abspath) tar = None try: tar = tarfile.open(absfilename) tar.extractall(abspath) tar.close() except: print 'Probably empty tar' os.unlink(absfilename) shutil.rmtree(abspath) continue if debug: os.rename(absfilename, os.path.join(tmpdir, 'dp', 'archive', filename)) else: os.unlink(absfilename) post_snapshots = [] print 'Server >> '+str(os.listdir(abspath)) for jsonfile in os.listdir(abspath): filetype = jsonfile.split('_') # format of request: # | a | <pid> # | p | <pid> # | u | <username> | <after> queue_list = '' if filetype[0] == 'a': queue_list = self.process_snapshot(abspath, jsonfile) elif filetype[0] == 'p': post_snapshots.append(jsonfile) elif filetype[0] == 'u': queue_list = self.process_author(abspath, jsonfile) for queue_file in queue_list: print queue_file print os.path.join(tmpdir, 'dp', 'queue', queue_file), os.path.join(tmpdir, 'server', 'queue', queue_file) os.rename(os.path.join(tmpdir, 'dp', 'queue', queue_file), os.path.join(tmpdir, 'server', 'queue', queue_file)) print 'Server << '+queue_file self.process_snapshots(abspath, post_snapshots) # cleanup dir shutil.rmtree(abspath) else: time.sleep(0.2) if sleepcount < 10: sleepcount += 1 else: self.store.close()
class TestStore(unittest2.TestCase): def setUp(self): self.store = Store(name="scratch") self.ns = Namespace('http://example.com/#') def tearDown(self): self.store.close() def testSize(self): """ Tests the size of the repository """ self.assertEqual(len(self.store), 0) def testAdd(self): bob = self.ns['bob'] name = self.ns['name'] value = Literal('Bob Bilbins') self.store.add((bob, name, value)) self.assertEqual(len(self.store), 1) def testRemove(self): triple = (self.ns['alice'], self.ns['name'], Literal('Alice')) self.store.add(triple) self.assertEqual(len(self.store), 1) self.store.remove(triple) self.assertEqual(len(self.store), 0) def testTriples(self): """ Tests the search by triple. """ triple = (self.ns['alice'], self.ns['name'], Literal('Alice')) self.store.add(triple) for tri in self.store.triples((self.ns['alice'], None, None)): for i in range(3): self.assertEqual(tri[i], triple[i]) def testSimpleSparql(self): triple = (self.ns['alice'], self.ns['name'], Literal('Alice')) self.store.add(triple) for tri in self.store.query("SELECT ?s ?p ?o WHERE {?s ?p ?o .}"): for i in range(3): self.assertEqual(tri[i], triple[i]) def testNamespacedSparql(self): triple = (self.ns['alice'], self.ns['name'], Literal('Alice')) self.store.add(triple) self.store.add((self.ns['bob'], self.ns['name'], Literal('Bob'))) for tri in self.store.query("SELECT ?p ?o WHERE { ex:alice ?p ?o .}", initNs={'ex': self.ns}): for i in range(1, 3): self.assertEqual(tri[i - 1], triple[i]) def testBindedSparql(self): triple = (self.ns['alice'], self.ns['name'], Literal('Alice')) self.store.add(triple) self.store.add((self.ns['bob'], self.ns['name'], Literal('Bob'))) for tri in self.store.query("SELECT ?p ?o WHERE { ?s ?p ?o .}", initBindings={'s': self.ns['alice']}): for i in range(1, 3): self.assertEqual(tri[i - 1], triple[i]) def testDataTypes(self): birth = Literal('2006-01-03', datatype=_XSD_NS.date) comp = Literal('2006-01-01', datatype=_XSD_NS.date) triple = (self.ns['alice'], self.ns['birthdate'], birth) self.store.add(triple) for s, p, o in self.store.query("SELECT ?s ?p ?o WHERE {?s ?p ?o .}"): self.assertLess(comp, birth)
from pprint import pprint from store import Store s = Store('data') db1 = s.database('db1') t1 = db1.table('t1', a='int', b='str', c='float', primary_key=['a', 'c']) with s.transaction(): for i in range(10): for j in range(10): t1.insert(a=i, b='2', c=float(j)) a = t1.get(1, 3.0) b = t1.get(2, 9.0) q = t1.select('b', 'c') q = q.where(t1.a >= 1, t1.a < 9, t1.c == 5.0) r = q.all() print a.get() print b.get() print r.get() s.close()
class DP: # list of authors that are remembered author_list = [] # list of authors to skip crawling for various reasons skip_list = ['[deleted]'] # internal sqlite3 store store = None def write_to_queue(self, data, prefix='tmp'): fh, filename = tempfile.mkstemp(dir=os.path.join( tmpdir, 'dp', 'queue'), prefix=prefix) os.close(fh) fp = open(filename, 'w') fp.write(data) fp.close() return os.path.split(filename)[1] def seed(self): self.store = Store('/collection/sharvey/reddit/') self.store.open() print 'Created seed queue' return self.write_to_queue('a,t3_1u4kuf', 'tmp_a_') def process_author(self, abspath, filename): filetype = filename.split('_') fp = open(os.path.join(abspath, filename)) blob = json.load(fp) fp.close() elements = parser.extract_listing_elements(blob) self.store.store_author(elements) return [] def process_snapshot(self, abspath, filename): filetype = filename.split('_') fp = open(os.path.join(abspath, filename)) blob = json.load(fp) fp.close() if filetype[0] == 'a': posts = blob['posts'] nav = blob['nav'] start_hit = False queue_file_list = [] queue_list = [] for sube in posts: utctime = int(sube['created_utc']) sttime = time.strftime('%Y%m%d', time.gmtime(utctime)) if (int(sttime) > int(dateend)): continue elif (int(sttime) < int(datestart)): start_hit = True break else: queue_list.append('p,' + sube['id']) queue_file_list.append( self.write_to_queue('\n'.join(queue_list), 'tmp_p_')) if start_hit is not True: if nav['after'] is not None: queue_file_list.append( self.write_to_queue('a,' + nav['after'], 'tmp_a_')) return queue_file_list elif filetype[0] == 'p': post = blob['post'] comments = blob['comments'] self.store.store_snapshot(post, comments) if crawl_author: queue_file_list = [] if post['author'] not in self.author_list and post[ 'author'] not in self.skip_list: queue_file_list.append( self.write_to_queue('u,' + post['author'], 'tmp_u_')) self.author_list.append(post['author']) for comment in comments: if comment['author'] not in self.author_list and comment[ 'author'] not in self.skip_list: queue_file_list.append( self.write_to_queue('u,' + comment['author'], 'tmp_u_')) self.author_list.append(comment['author']) return queue_file_list return [] def process_snapshots(self, abspath, filename_list): post_tuples = [] for filename in filename_list: filetype = filename.split('_') fp = open(os.path.join(abspath, filename)) blob = json.load(fp) fp.close() post_tuples.append((blob['post'], blob['comments'])) self.store.store_batch_snapshot(post_tuples) if crawl_author: queue_file_list = [] def run(self): seedfile = self.seed() os.rename(os.path.join(tmpdir, 'dp', 'queue', seedfile), os.path.join(tmpdir, 'server', 'queue', seedfile)) sleepcount = 0 while True: for filename in os.listdir(os.path.join(tmpdir, 'dp', 'staging')): sleepcount = 0 self.store.open() prefix = filename.split('.')[0] absfilename = os.path.join(tmpdir, 'dp', 'staging', filename) abspath = os.path.join(tmpdir, 'dp', 'staging', prefix) os.mkdir(abspath) tar = None try: tar = tarfile.open(absfilename) tar.extractall(abspath) tar.close() except: print 'Probably empty tar' os.unlink(absfilename) shutil.rmtree(abspath) continue if debug: os.rename(absfilename, os.path.join(tmpdir, 'dp', 'archive', filename)) else: os.unlink(absfilename) post_snapshots = [] print 'Server >> ' + str(os.listdir(abspath)) for jsonfile in os.listdir(abspath): filetype = jsonfile.split('_') # format of request: # | a | <pid> # | p | <pid> # | u | <username> | <after> queue_list = '' if filetype[0] == 'a': queue_list = self.process_snapshot(abspath, jsonfile) elif filetype[0] == 'p': post_snapshots.append(jsonfile) elif filetype[0] == 'u': queue_list = self.process_author(abspath, jsonfile) for queue_file in queue_list: print queue_file print os.path.join(tmpdir, 'dp', 'queue', queue_file), os.path.join( tmpdir, 'server', 'queue', queue_file) os.rename( os.path.join(tmpdir, 'dp', 'queue', queue_file), os.path.join(tmpdir, 'server', 'queue', queue_file)) print 'Server << ' + queue_file self.process_snapshots(abspath, post_snapshots) # cleanup dir shutil.rmtree(abspath) else: time.sleep(0.2) if sleepcount < 10: sleepcount += 1 else: self.store.close()
def POST(self,id): from store import Store store = Store() data = json.dumps(store.delete(id)) store.close() return data