def thread_control(urls,parent): allthreads = [] t_urls = [] t_count = 32 index = 0 global no_of_exceptions while index < len(urls): threads = [] while t_count > 0 and index < len(urls): shelfr = shelve.open('{}.db'.format(parent)) label,url = tuple(urls[index]) if url not in shelfr.keys(): print index if 'pdf' not in url: t = threading.Thread(target=get_url_data, args=(url,)) threads.append(t) allthreads.append(t) t.start() t_count -= 1 else: print "skipped" index += 1 shelfr.close() print threads for t1 in threads: t1.join(5.0) t_count += 1 t_url,t_data = queue.get() if t_data: t_lists = get_label_url(t_data,parent) shelfw = shelve.open("{}.db".format(parent), 'c') shelfw[t_url] = 'present' shelfw.close() return
def get_catalog(module_path,mode='r'): """ Return a function catalog (shelve object) from the path module_path If module_path is a directory, the function catalog returned is from that directory. If module_path is an actual module_name, then the function catalog returned is from its parent directory. mode uses the standard 'c' = create, 'n' = new, 'r' = read, 'w' = write file open modes available for anydbm databases. Well... it should be. Stuck with dumbdbm for now and the modes almost don't matter. We do some checking for 'r' mode, but that is about it. See catalog_path() for more information on module_path. """ if mode not in ['c','r','w','n']: msg = " mode must be 'c', 'n', 'r', or 'w'. See anydbm for more info" raise ValueError(msg) catalog_file = catalog_path(module_path) if (catalog_file is not None) \ and ((dumb and os.path.exists(catalog_file+'.dat')) \ or os.path.exists(catalog_file)): sh = shelve.open(catalog_file,mode) else: if mode=='r': sh = None else: sh = shelve.open(catalog_file,mode) return sh
def _get_cache(cachepath): if cachepath in _cache_shelves: return _cache_shelves[cachepath] try: cache = shelve.open(cachepath, protocol=2) except dbm.error: # dbm error on open - delete and retry print('Error (%s) opening %s - will attempt to delete and re-open.' % (sys.exc_info()[1], cachepath)) try: os.remove(cachepath) cache = shelve.open(cachepath, protocol=2) except Exception: print('Error on re-open: %s' % sys.exc_info()[1]) cache = None except Exception: # unknown error print('Could not open cache file %s, maybe name collision. ' 'Error: %s' % (cachepath, traceback.format_exc())) cache = None # Don't fail on bad caches if cache is None: print('Using in-memory shelf for cache file %s' % cachepath) cache = shelve.Shelf(dict()) _cache_shelves[cachepath] = cache return cache
def __init__( self, prefix, nruns=None ): self.prefix = prefix self.files = np.sort( np.array( glob.glob( prefix + "???/cmonkey_run.db" ) ) ) # get all cmonkey_run.db files self.cms = shelve.open('egrin2_shelf.db', protocol=2, writeback=False) keys = self.cms.keys() for f in self.files: index = os.path.dirname(f).replace(self.prefix,'') if index in keys: continue try: print f b = cm2(f) self.cms[ index ] = b except: print 'ERROR: %s' % (f) e = sys.exc_info()[0] print e if nruns is not None and len(self.cms) >= nruns: break #self.tables = { tname: self.concat_tables(tname) for tname in self.cms['001'].tables.keys() } self.tables = shelve.open('egrin2_tables_shelf.db', protocol=2, writeback=False) keys = self.tables.keys() for tname in self.cms['001'].tables.keys(): if tname in keys: continue tab = self.concat_tables(tname) self.tables[str(tname)] = tab
def harvest_program_pages(program_ids, from_cache=True): """ Download the source of program pages efficiently. :param program_ids: A collection of UQ Program IDs. :return: A list of web page sources """ if from_cache: with shelve.open('.cache/local_shelf') as db_read: sources = db_read.get('program_pages') if sources: return sources prog_url_template = 'https://www.uq.edu.au/study/program.html?acad_prog={}' prog_urls = (prog_url_template.format(prog_id) for prog_id in program_ids) print('About to download {} pages'.format(len(program_ids))) with futures.ThreadPoolExecutor(max_workers=10) as executor: page_futures = [executor.submit(requests.get, prog_url) for prog_url in prog_urls] print('Downloading...') # Wait to load all results results = futures.wait(page_futures) sources = [completed.result().content for completed in results.done] print('Downloaded {} pages'.format(len(sources))) with shelve.open('.cache/local_shelf') as db_write: db_write['program_pages'] = sources return sources
def main(argv): global query if len(argv) >= 1: query = argv[0] cached_items = shelve.open('episodes_cache') if not cached_items: cache_all(cached_items) cached_items.close() cached_items = shelve.open('episodes_cache') sorted_cached_items = sorted(cached_items, reverse=True) print('<?xml version="1.0" encoding="UTF-8"?>\n<items>') if cached_items: if must_update_cache(): print(item_template.format("", "Cache updated", "Make your search again", "no")) cache_page_1(cached_items) sorted_cached_items = sorted(cached_items, reverse=True) for item in sorted_cached_items: search_in = str(cached_items[item][0]).lower() + " " + \ str(cached_items[item][1]).lower() + " " + \ str(cached_items[item][2]).lower() if query.lower() in search_in: print(item_template.format(cached_items[item][0], cached_items[item][1], cached_items[item][2], 'yes')) else: print(item_template.format(0, "Error", "Could not read cache file", 'no')) print('</items>') cached_items.close()
def _open_cache(cls): cls.lock.acquire() try: cls.cache = shelve.open(cls.cache_filename) except UnicodeError as e: cls.log.exception("Warning: Failed to open "+cls.cache_filename+": "+unicode(e)) # This can happen with unicode characters in the path because the bdb module # on win converts it internally to ascii, which fails # The shelve module therefore does not support writing to paths containing non-ascii characters in general, # which means we cannot store data persistently. cls.cache = DummyShelve() except Exception as e: # 2 causes for this: # Some python distributions used to use the bsddb module as underlying shelve. # The bsddb module is now deprecated since 2.6 and is not present in some 2.7 distributions. # Therefore, the line above will yield an ImportError if it has been executed with # a python supporting bsddb and now is executed again with python with no support for it. # Since this may also be caused by a different error, we just try again once. # If there is an old database file that was created with a # deprecated dbm library, opening it will fail with an obscure exception, so we delete it # and simply retry. cls.log.exception("Warning: You probably have an old cache file; deleting and retrying: "+unicode(e)) if os.path.exists(cls.cache_filename): os.remove(cls.cache_filename) cls.cache = shelve.open(cls.cache_filename) cls.lock.release()
def thread_control(urls): t_urls = [] n=10 print len(urls) for i in range(len(urls)/n): t_urls.append(urls[n*(i):n*(i+1)]) t_urls.append(urls[(len(urls)/n)*n:len(urls)]) print t_urls for a in t_urls: threads= [] try: for url in a: print "this happens" shelfr = shelve.open("url.db") if url not in shelfr.keys(): print url t = threading.Thread(target=get_url_data, args=(url,)) threads.append(t) t.start() for t in threads: print"happens" t.join() # get results from thread t_url,t_data = queue.get() t_lists = get_links_from_url(t_data,t_url) response_list.append(t_lists) write_into_row_csv_reduhandler(t_lists) shelfw = shelve.open("url.db", 'c') shelfw[t_url] = t_url shelfw.close() shelfr.close() except: pass
def __init__(self, rememberer_name, config, saml_client, wayf, cache, sid_store=None, discovery="", idp_query_param="", sid_store_cert=None,): self.rememberer_name = rememberer_name self.wayf = wayf self.saml_client = saml_client self.conf = config self.cache = cache self.discosrv = discovery self.idp_query_param = idp_query_param self.logout_endpoints = [urlparse(ep)[2] for ep in config.endpoint( "single_logout_service")] try: self.metadata = self.conf.metadata except KeyError: self.metadata = None if sid_store: self.outstanding_queries = shelve.open(sid_store, writeback=True) else: self.outstanding_queries = {} if sid_store_cert: self.outstanding_certs = shelve.open(sid_store_cert, writeback=True) else: self.outstanding_certs = {} self.iam = platform.node()
def average_collapsed_loss(batch_param, dir_prefix="plots/", filename=None): if filename: master = shelve.open(filename) else: master = shelve.open(batch_param["path"] + "/run_master_corr.shelve") filelist = make_shelve_names(batch_param) n_modes = 26 n_pairs = 6 n_lags = 15 accumulator = np.zeros((n_modes, n_pairs, n_lags)) mean_accumulator = np.zeros((n_modes, n_lags)) stdev_accumulator = np.zeros((n_modes, n_lags)) for mode_number in range(0, n_modes): for pair in range(0, n_pairs): print mode_number, pair identifier = "loss" + repr(pair) + "_" + repr(mode_number) entry = master[identifier] accumulator[mode_number, pair, :] = entry["corr1D"] mean_accumulator[mode_number, :] = np.mean(accumulator[mode_number, :, :], axis=0) stdev_accumulator[mode_number, :] = np.std(accumulator[mode_number, :, :], axis=0, ddof=1) filename = dir_prefix + "modeloss_avg_" + repr(mode_number) title = "auto-power with " + repr(mode_number) + " modes removed" plot_collapsed( filename, entry["corr1D_lags"], mean_accumulator[mode_number, :], cross_power=False, title=title, errors=stdev_accumulator[mode_number, :], ) return (mean_accumulator, stdev_accumulator)
def __init__(self, indexname, truncate=None): dict.__init__(self) try: if truncate: # In python 1.52 and before, dumbdbm (under shelve) # doesn't clear the old database. files = [indexname + '.dir', indexname + '.dat', indexname + '.bak' ] for file in files: if os.path.exists(file): os.unlink(file) raise Exception("open a new shelf") self.data = shelve.open(indexname, flag='r') except: # No database exists. self.data = shelve.open(indexname, flag='n') self.data[self.__version_key] = self.__version else: # Check to make sure the database is the correct version. version = self.data.get(self.__version_key, None) if version is None: raise IOError("Unrecognized index format") elif version != self.__version: raise IOError("Version %s doesn't match my version %s" % (version, self.__version))
def _buildIndexCacheFile(self): import shelve import os print "Building %s:" % (self.shelfname,), tempname = self.shelfname + ".temp" try: indexCache = shelve.open(tempname) self.rewind() count = 0 while 1: offset, line = self.file.tell(), self.file.readline() if not line: break key = line[:string.find(line, ' ')] if (count % 1000) == 0: print "%s..." % (key,), import sys sys.stdout.flush() indexCache[key] = line count = count + 1 indexCache.close() os.rename(tempname, self.shelfname) finally: try: os.remove(tempname) except: pass print "done." self.indexCache = shelve.open(self.shelfname, 'r')
def _cache(fn, *args, **kwargs): if cache.disabled: return fn(*args, **kwargs) # A bit obscure, but simplest way to generate unique key for # functions and methods in python 2 and 3: key = '{}.{}'.format(fn.__module__, repr(fn).split('at')[0]) etag = '.'.join(_get_mtime(name) for name in depends_on) cache_dir = get_cache_dir() cache_path = Path(cache_dir).joinpath('thefuck').as_posix() try: with closing(shelve.open(cache_path)) as db: if db.get(key, {}).get('etag') == etag: return db[key]['value'] else: value = fn(*args, **kwargs) db[key] = {'etag': etag, 'value': value} return value except (shelve_open_error, ImportError): # Caused when switching between Python versions warn("Removing possibly out-dated cache") os.remove(cache_path) with closing(shelve.open(cache_path)) as db: value = fn(*args, **kwargs) db[key] = {'etag': etag, 'value': value} return value
def __init__(self, func, options): self.func = func self.options = options if 'input' not in self.options: self.options['input'] = [] self.options['input'] = _list(self.options['input']) filename = inspect.getfile(sys._getframe(2)) self.working_dir = os.path.dirname(os.path.realpath(filename)) # The database of remembered results memory_dir = self.memory_dir() db_file = '{}/{}'.format(memory_dir, self.func.func_name) self.results_db = shelve.open(db_file, writeback=True) flag_file = '{}/{}.flag'.format(memory_dir, self.func.func_name) if self.should_run(flag_file): # Clear the database of existing results since these must be out of date if os.path.exists(flag_file): os.unlink(flag_file) self.results_db.clear() open(flag_file, 'w').close() # touch the flag # Remember the byte code for the function so it gets run again if it changes current_code_string = marshal.dumps(func.func_code) if '-code-' not in self.results_db or current_code_string != self.results_db['-code-']: # nuke the old and create a new ... the old base based on old code if os.path.exists(flag_file): os.unlink(flag_file) self.results_db.clear() self.results_db = shelve.open(db_file) self.results_db['-code-'] = current_code_string open(flag_file, 'w').close() # touch the flag
def _cache(fn, *args, **kwargs): if cache.disabled: return fn(*args, **kwargs) # A bit obscure, but simplest way to generate unique key for # functions and methods in python 2 and 3: key = "{}.{}".format(fn.__module__, repr(fn).split("at")[0]) etag = ".".join(_get_mtime(name) for name in depends_on) cache_path = _get_cache_path() try: with closing(shelve.open(cache_path)) as db: if db.get(key, {}).get("etag") == etag: return db[key]["value"] else: value = fn(*args, **kwargs) db[key] = {"etag": etag, "value": value} return value except shelve_open_errors: # Caused when going from Python 2 to Python 3 and vice-versa warn("Removing possibly out-dated cache") os.remove(cache_path) with closing(shelve.open(cache_path)) as db: value = fn(*args, **kwargs) db[key] = {"etag": etag, "value": value} return value
def plot_userfreq(bin_size): f = shelve.open("%s_%s.dat" % (fupbin, bin_size)) t = shelve.open("%s_%s.dat" % (tupbin, bin_size)) event_array = [] event_plot = [] def add(x, y): return x + y for uname in f.keys(): if uname in t: feventcount = reduce(add, map(lambda bflist: bflist[1], f[uname])) teventcount = reduce(add, map(lambda bflist: bflist[1], t[uname])) eventcount = [feventcount, teventcount] event_array.append(eventcount) event_plot.append(uname) else: continue event_points = np.array(event_array) event_plot = np.array(event_plot) fig = plt.figure() ax = fig.add_subplot(111) print event_points[:, 0] print event_points[:, 1] ax.plot(event_points[:, 0], event_points[:, 1], "b.") ax.set_xscale("log") ax.set_yscale("log") ax.set_xlabel("# of Flickr Events {%s hr bins}" % (bin_size)) ax.set_ylabel("# of Twitter Events {%s hr bins}" % (bin_size)) ax.set_title("Flickr EVents Vesus Twitter Events {%s hr bins}" % (bin_size)) # ax.set_xticklabels(users,rotation='vertical') fig.savefig("%s/ft-freq_%s.pdf" % (fig_save, bin_size)) f.close() t.close()
def plot_hist_events(bin_size): f = shelve.open("%s_%s.dat" % (fupbin, bin_size)) t = shelve.open("%s_%s.dat" % (tupbin, bin_size)) fevents = [] # a list of events for every flcikr user per bin tevents = [] # a list of events for every twitter user per bin def add(x, y): return x + y map(lambda uname: fevents.append(reduce(add, map(lambda y: y[1], f[uname]))), f.keys()) map(lambda uname: tevents.append(reduce(add, map(lambda y: y[1], t[uname]))), t.keys()) fig1 = plt.figure(figsize=(10, 14)) fevents = np.array(fevents) tevents = np.array(tevents) ax1 = fig1.add_subplot(211) ax1.hist(fevents, np.unique(fevents), cumulative=-1) ax1.set_xscale("log") ax1.set_yscale("log") ax1.set_xlabel("# of flickr events {%s hr bins}" % (bin_size)) ax1.set_ylabel("# of users {%s hr bins}" % (bin_size)) ax1.set_title("flickr events frequencies {%s hr bins}" % (bin_size)) ax2 = fig1.add_subplot(212) ax2.hist(tevents, np.unique(tevents), cumulative=-1) ax2.set_xscale("log") ax2.set_yscale("log") ax2.set_xlabel("# of twitter events {%s hr bins}" % (bin_size)) ax2.set_ylabel("# of users {%s hr bins}" % (bin_size)) ax2.set_title("Twitter events frequencies {%s hr bins}" % (bin_size)) fig1.savefig("%s/flick-events-hist_%s.pdf" % (fig_save, bin_size)) f.close() t.close()
def plot_hist_bins(bin_size): f = shelve.open("%s_%s.dat" % (fupbin, bin_size)) t = shelve.open("%s_%s.dat" % (tupbin, bin_size)) fbins = [] tbins = [] map(lambda uname: fbins.extend(map(lambda y: y[0], f[uname])), f.keys()) map(lambda uname: tbins.extend(map(lambda y: y[0], t[uname])), t.keys()) fbins = np.array(fbins) tbins = np.array(tbins) fig1 = plt.figure(figsize=(10, 14)) ax1 = fig1.add_subplot(211) ax1.hist(fbins, bins=np.unique(fbins), cumulative=True) # ax1.set_xscale('log') ax1.set_yscale("log") ax1.set_xlabel("flickr bins {%s hr bins}" % (bin_size)) ax1.set_ylabel("# users active in bin {%s hr bins}" % (bin_size)) ax2 = fig1.add_subplot(212) ax2.hist(tbins, bins=np.unique(tbins), cumulative=True) # ax2.set_xscale('log') ax2.set_yscale("log") ax2.set_xlabel("twitter bins {%s hr bins}" % (bin_size)) ax2.set_ylabel("#users active in bin {%s hr bins}" % (bin_size)) ax2.set_title("flickr and twitter bins histograms {%s hr bins}" % (bin_size)) fig1.savefig("%s/ft-bins-hist_%s.pdf" % (fig_save, bin_size)) f.close() t.close()
def plot_userbins(bin_size): f = shelve.open("%s_%s.dat" % (fupbin, bin_size)) t = shelve.open("%s_%s.dat" % (tupbin, bin_size)) bin_array = [] bin_key = [] for uname in f.keys(): if uname in t: fbincount = len(f[uname]) tbincount = len(t[uname]) bincount = [fbincount, tbincount] bin_array.append(bincount) bin_key.append(uname) else: continue bins_plot = np.array(bin_array) fig = plt.figure() ax = fig.add_subplot(111) print bins_plot ax.plot(bins_plot[:, 0], bins_plot[:, 1], "b.") # ax.set_xscale('log') # ax.set_yscale('log') ax.set_xlabel("# of active Flickr bins {%s hr bins}" % (bin_size)) ax.set_ylabel("# of active twitter bins {%s hr bins}" % (bin_size)) # ax.set_xticklabels(users,rotation='vertical') ax.set_title("Flickr Bins versus Twitter Bins {%s hr bins}" % (bin_size)) fig.savefig("%s/ft-bins_%s.pdf" % (fig_save, bin_size)) f.close() t.close()
def _cache(fn, *args, **kwargs): if cache.disabled: return fn(*args, **kwargs) # A bit obscure, but simplest way to generate unique key for # functions and methods in python 2 and 3: key = '{}.{}'.format(fn.__module__, repr(fn).split('at')[0]) etag = '.'.join(_get_mtime(name) for name in depends_on) cache_path = _get_cache_path() try: with closing(shelve.open(cache_path)) as db: if db.get(key, {}).get('etag') == etag: return db[key]['value'] else: value = fn(*args, **kwargs) db[key] = {'etag': etag, 'value': value} return value except dbm.error: # Caused when going from Python 2 to Python 3 warn("Removing possibly out-dated cache") os.remove(cache_path) with closing(shelve.open(cache_path)) as db: value = fn(*args, **kwargs) db[key] = {'etag': etag, 'value': value} return value
def get_namespace(namespace, version, _cache={}): key = str(namespace + "-" + version) if key not in _cache: if SHELVE_CACHE: try: if os.path.getsize(SHELVE_CACHE) == 0: # created by tempfile, replace here os.remove(SHELVE_CACHE) except OSError: pass d = shelve.open(SHELVE_CACHE, protocol=2) if key in d: _cache[key] = d[key] d.close() else: d.close() ns = Namespace(namespace, version) # make sure we save a fully populated instance for k, v in type(ns).__dict__.items(): if isinstance(v, util.cached_property): getattr(ns, k) d = shelve.open(SHELVE_CACHE, protocol=2) d[key] = ns d.close() _cache[key] = ns else: _cache[key] = Namespace(namespace, version) return _cache[key]
def add_job(self, command, hour, minute, sec=0): logger.info("2. scheduler adding job command: %s at %s:%s:%s" % ( command, hour, minute, sec )) sched = Scheduler(standalone=True) #make a db file shelve.open( os.path.join( os.path.dirname(__file__), 'example.db' ) ) sched.add_jobstore(ShelveJobStore('example.db'), 'shelve') exec_time = datetime( date.today().year, date.today().month, date.today().day, int(hour), int(minute), int(sec) ) #test #exec_time = datetime.now() + timedelta(seconds=5) sched.add_date_job( job, exec_time, name='alarm', jobstore='shelve', args=[command] ) sched.start()
def download(self, url): cached = False refresh = False cache = shelve.open(os.path.join(os.environ["pydna_data_dir"], "web"), protocol=cPickle.HIGHEST_PROTOCOL, writeback=False) key = str(url) if os.environ["pydna_cache"] in ("compare", "cached"): try: cached = cache[key] except KeyError: if os.environ["pydna_cache"] == "compare": raise Exception("no result for this key!") else: refresh = True if refresh or os.environ["pydna_cache"] in ("compare", "refresh", "nocache"): response = urllib2.urlopen(url) result = response.read() if os.environ["pydna_cache"] == "compare": if result!=cached: module_logger.warning('download error') if refresh or os.environ["pydna_cache"] == "refresh": cache = shelve.open(os.path.join(os.environ["pydna_data_dir"],"genbank"), protocol=cPickle.HIGHEST_PROTOCOL, writeback=False) cache[key] = result elif cached and os.environ["pydna_cache"] not in ("nocache", "refresh"): result = cached cache.close() return result
def __init__(self, host, username, password, debug=False): """ Server class __init__ which expects an IMAP host to connect to @param host: gmail's default server is fine: imap.gmail.com @param username: your gmail account (i.e. [email protected]) @param password: we highly recommend you to use 2-factor auth here """ if not host: raise Exception('Missing IMAP host parameter in your config') try: self._server = IMAPClient(host, use_uid=True, ssl=True) except: raise Exception('Could not successfully connect to the IMAP host') setattr(self._server, 'debug', debug) # mails index to avoid unnecessary redownloading index = '.index_%s' % (username) index = os.path.join(_app_folder(), index) self._index = shelve.open(index, writeback=True) # list of attachments hashes to avoid dupes hashes = '.hashes_%s' % (username) hashes = os.path.join(_app_folder(), hashes) self._hashes = shelve.open(hashes, writeback=True) self._username = username self._login(username, password)
def fpstore(servername,fullhash,headerhashes,fullfn='staticfull',headersfn='staticheaders'): import shelve,logging log = logging.getLogger("fpstore") fulldb = shelve.open(fullfn) headersdb = shelve.open(headersfn) if fulldb.has_key(fullhash): log.debug("fulldb has this key already defined") if servername not in fulldb[fullhash]: log.debug("server not already in therefore appending") fulldb[fullhash].append(servername) else: log.debug("server known therefore not appending") else: log.debug("key not defined therefore creating") fulldb[fullhash] = [servername] for headerhash in headerhashes: if headersdb.has_key(headerhash): if servername not in headersdb[headerhash]: headersdb[headerhash].append(servername) else: headersdb[headerhash] = [servername] fulldb.sync() fulldb.close() headersdb.sync() headersdb.close()
def __init__(self, parent): Frame.__init__(self, parent) self.scrolltext = ScrolledText(self, width = 120, font = ("", 14, "normal"), height = 30) self.scrolltext.config(state = "disable") f = Frame(self) self.entry = Entry(f, width = 75, font = ("", 15, "normal")) # self.entry.grab_set() self.entry.focus_set() self.entry.grid(row = 0, column = 0) self.entry.bind("<Return>", lambda event, frame = self: frame._runCommand()) self.button1 = Button(f, text = "sumbmit", font = ("", 15, "normal"), command = lambda frame = self: frame._runCommand()) self.button1.grid(row = 0, column = 1, padx = 4, pady = 2) self.button2 = Button(f, text = "clear", font = ("", 15, "normal"), command = lambda frame = self: frame._deleteText()) self.button2.grid(row = 0, column = 2, padx = 4, pady = 2) f.grid(row = 1, column = 0) self.scrolltext.grid(row = 0, column = 0) with shelve.open("userconfigdb") as db: keys = tuple(db.keys()) if not keys: configButtonCmd() with shelve.open("userconfigdb") as db: self.sshIP = db[tuple(db.keys())[0]].hostaddress self._configButtonCmd()
def extract_saturation(input_db_name, output_db_name): input_db = shelve.open(input_db_name) output_db = shelve.open(output_db_name, 'c') for key in input_db: try: url = input_db[key]['url'] req = urllib.urlopen(url) arr = np.asarray(bytearray(req.read()), dtype=np.uint8) im = cv2.imdecode(arr, -1) width, height, depth = im.shape imsize = width * height hsv_im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV) sat = cv2.split(hsv_im)[1] val = float(np.sum(sat)) / float(imsize) output_db[key] = {'index':input_db[key]['index'], 'url':input_db[key]['url'], 'saturation':val} except: pass input_db.close() output_db.close()
def get_packet(self, session_id: int=0, *struct_labels): # type checking if not isinstance(session_id, int): raise TypeError("session_id must be an int and not %s" % str(session_id)) if len(struct_labels) < 1: raise ValueError("struct_labels must specify the path to a leaf.") # open log_index_file as read-only log_index_file = shelve.open(self.log_path, flag='r', protocol=3, writeback=False) # find the desired session dir if session_id > 0: if session_id > log_index_file[self.idx_num_session_key]: raise ValueError('session_id must be <= %d' % log_index_file[self.idx_num_session_key]) session_dir = log_index_file[str(session_id)] else: curr_session = int(log_index_file[self.idx_num_session_key]) if curr_session + session_id < 1: raise ValueError('Current session is only %d' % curr_session) session_dir = log_index_file[str(curr_session + session_id)] session_path = os.path.join(os.path.dirname(self.log_path), session_dir, session_dir) session_shelf = shelve.open(session_path, flag='r', protocol=3, writeback=False) return session_shelf[self.encode_struct(*struct_labels)]
def get_matlab_versions(overwrite=False): '''Get MATLAB versions from Wikipedia. Args: overwrite (bool) : Overwrite existing data Returns: Dictionary of MATLAB versions ''' # Get version file version_file = '%s/matlab-versions.shelf' % (trendpath.data_dir) # Used saved versions if version file exists and not overwrite if os.path.exists(version_file) and not overwrite: shelf = shelve.open(version_file) versions = shelf['versions'] shelf.close() return versions # Open Wikipedia page req = requests.get('http://en.wikipedia.org/wiki/MATLAB') soup = BS(req.text) # Find Release History table histtxt = soup.find(text=re.compile('release history', re.I)) histspn = histtxt.findParent('span') histtab = histspn.findNext('table', {'class' : 'wikitable'}) histrow = histtab.findAll('tr') # Initialize Matlab versions versions = {} for row in histrow[1:]: # Get <td> elements tds = row.findAll('td') # Get version number vernum = tds[0].text vernum = re.sub('matlab\s+', '', vernum, flags=re.I) # Get version name vernam = tds[1].text vernam = re.sub('r', 'r?', vernam, flags=re.I) vernam = re.sub('sp', '%s(?:sp|service pack)%s' % delimrep(2), \ vernam, flags=re.I) # Add to versions versions[vernum] = [vernum] if vernam: versions[vernum].append(vernam) # Save results to version file shelf = shelve.open(version_file) shelf['versions'] = versions shelf.close() # Return versions return versions
def GetTransitionModelFromShelf(self,yy,mm,dd,numDays,posNoise=None,currentNoise=None,shelfDirectory='.'): """ Loads up Transition models from the shelf for a given number of days, starting from a particular day, and a given amount of noise in position and/or a given amount of noise in the current predictions. We assume these models have been created earlier using ProduceTransitionModels. Args: * yy (int): year * mm (int): month * dd (int): day * numDays (int): number of days the model is being built over * posNoise (float): Amount of std-deviation of the random noise used in picking the start location * currentNoise (float): Amount of prediction noise in the ocean model * shelfDirectory (str): Directory in which the Transition models are located. Updates: * self.gm.FinalLocs: Stores the final locations """ self.posNoise = posNoise; self.currentNoise = currentNoise #import pdb; pdb.set_trace() if posNoise==None and currentNoise==None: gmShelf = shelve.open('%s/gliderModel_%04d%02d%02d_%d.shelf'%(shelfDirectory,yy,mm,dd,numDays), writeback=False ) if posNoise!=None: if currentNoise!=None: gmShelf = shelve.open('%s/gliderModel_%04d%02d%02d_%d_%.3f_RN_%.3f.shelf'%(shelfDirectory,yy,mm,dd,numDays,posNoise,currentNoise),writeback=False) else: gmShelf = shelve.open('%s/gliderModel_%04d%02d%02d_%d_%.3f.shelf'%(shelfDirectory,yy,mm,dd,numDays,posNoise), writeback=False) if posNoise==None and currentNoise!=None: gmShelf=shelve.open('%s/gliderModel_%04d%02d%02d_%d_RN_%.3f.shelf'%(shelfDirectory,yy,mm,dd,numDays,currentNoise), writeback=False) self.gm.TransModel = gmShelf['TransModel'] #if gmShelf.has_key('FinalLocs'): self.gm.FinalLocs = gmShelf['FinalLocs'] #if gmShelf.has_key('TracksInModel'): self.gm.TracksInModel = gmShelf['TracksInModel'] gmShelf.close() # Now that we have loaded the new transition model, we better update our graph. self.ReInitializeMDP()
def checksum_md5(filename, blocksize=2**20): m = hashlib.md5() with open( filename , "rb" ) as f: while True: buf = f.read(blocksize) if not buf: break m.update( buf ) return m.hexdigest() # path = "../networkingresearch/sitespeed-result/" # harfiles = os.listdir(path) dns_queries = shelve.open('dnsqueries.dat', protocol=-1) files_opened = shelve.open('filesopened.dat', protocol=-1, writeback=True) if 'files' not in files_opened: files_opened['files'] = [] site_stats = {} resolver = dns.resolver.Resolver() resolver.timeout = 4 resolver.lifetime = 4 harfiles = [] for root, dirs, files in os.walk("""/Users/enadel/Onedrive/Documents/CS 513/"""): for file in files: if file.endswith(".har"): harfiles.append(os.path.join(root, file))
def clearShares(self, sharedfiles, bsharedfiles, sharedfilesstreams, bsharedfilesstreams, wordindex, bwordindex, fileindex, bfileindex, sharedmtimes, bsharedmtimes): try: if sharedfiles: sharedfiles.close() try: os.unlink(os.path.join(self.data_dir, 'files.db')) except Exception: pass sharedfiles = shelve.open(os.path.join(self.data_dir, "files.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) if bsharedfiles: bsharedfiles.close() try: os.unlink(os.path.join(self.data_dir, 'buddyfiles.db')) except Exception: pass bsharedfiles = shelve.open(os.path.join(self.data_dir, "buddyfiles.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) if sharedfilesstreams: sharedfilesstreams.close() try: os.unlink(os.path.join(self.data_dir, 'streams.db')) except Exception: pass sharedfilesstreams = shelve.open(os.path.join( self.data_dir, "streams.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) if bsharedfilesstreams: bsharedfilesstreams.close() try: os.unlink(os.path.join(self.data_dir, 'buddystreams.db')) except Exception: pass bsharedfilesstreams = shelve.open(os.path.join( self.data_dir, "buddystreams.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) if wordindex: wordindex.close() try: os.unlink(os.path.join(self.data_dir, 'wordindex.db')) except Exception: pass wordindex = shelve.open(os.path.join(self.data_dir, "wordindex.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) if bwordindex: bwordindex.close() try: os.unlink(os.path.join(self.data_dir, 'buddywordindex.db')) except Exception: pass bwordindex = shelve.open(os.path.join(self.data_dir, "buddywordindex.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) if fileindex: fileindex.close() try: os.unlink(os.path.join(self.data_dir, 'fileindex.db')) except Exception: pass fileindex = shelve.open(os.path.join(self.data_dir, "fileindex.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) if bfileindex: bfileindex.close() try: os.unlink(os.path.join(self.data_dir, 'buddyfileindex.db')) except Exception: pass bfileindex = shelve.open(os.path.join(self.data_dir, "buddyfileindex.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) if sharedmtimes: sharedmtimes.close() try: os.unlink(os.path.join(self.data_dir, 'mtimes.db')) except Exception: pass sharedmtimes = shelve.open(os.path.join(self.data_dir, "mtimes.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) if bsharedmtimes: bsharedmtimes.close() try: os.unlink(os.path.join(self.data_dir, 'buddymtimes.db')) except Exception: pass bsharedmtimes = shelve.open(os.path.join(self.data_dir, "buddymtimes.db"), flag='n', protocol=pickle.HIGHEST_PROTOCOL) except Exception as error: log.addwarning(_("Error while writing database files: %s") % error) return None return sharedfiles, bsharedfiles, sharedfilesstreams, bsharedfilesstreams, wordindex, bwordindex, fileindex, bfileindex, sharedmtimes, bsharedmtimes
def readConfig(self): self.config_lock.acquire() self.sections['transfers']['downloads'] = [] if exists(os.path.join(self.data_dir, 'transfers.pickle')): # <1.2.13 stored transfers inside the main config try: handle = open(os.path.join(self.data_dir, 'transfers.pickle'), 'rb') except IOError as inst: log.addwarning( _("Something went wrong while opening your transfer list: %(error)s" ) % {'error': str(inst)}) else: try: self.sections['transfers'][ 'downloads'] = RestrictedUnpickler(handle).load() except (IOError, EOFError, ValueError) as inst: log.addwarning( _("Something went wrong while reading your transfer list: %(error)s" ) % {'error': str(inst)}) try: handle.close() except Exception: pass path, fn = os.path.split(self.filename) try: if not os.path.isdir(path): os.makedirs(path) except OSError as msg: log.addwarning("Can't create directory '%s', reported error: %s" % (path, msg)) try: if not os.path.isdir(self.data_dir): os.makedirs(self.data_dir) except OSError as msg: log.addwarning("Can't create directory '%s', reported error: %s" % (path, msg)) # Transition from 1.2.16 -> 1.4.0 # Do the cleanup early so we don't get the annoying # 'Unknown config option ...' message self.removeOldOption("transfers", "pmqueueddir") self.removeOldOption("server", "lastportstatuscheck") self.removeOldOption("server", "serverlist") self.removeOldOption("userinfo", "descrutf8") self.removeOldOption("ui", "enabletrans") self.removeOldOption("ui", "mozembed") self.removeOldOption("ui", "open_in_mozembed") self.removeOldOption("ui", "tooltips") self.removeOldOption("ui", "transalpha") self.removeOldOption("ui", "transfilter") self.removeOldOption("ui", "transtint") self.removeOldOption("language", "language") self.removeOldOption("language", "setlanguage") self.removeOldSection("language") # Transition from 1.4.1 -> 1.4.2 self.removeOldOption("columns", "downloads") self.removeOldOption("columns", "uploads") # Remove old encoding settings (1.4.3) self.removeOldOption("server", "enc") self.removeOldOption("server", "fallbackencodings") self.removeOldOption("server", "roomencoding") self.removeOldOption("server", "userencoding") # Remove soundcommand config, replaced by GSound (1.4.3) self.removeOldOption("ui", "soundcommand") # Remove old column widths in preparation for "group by folder"-feature self.removeOldOption("columns", "search") self.removeOldOption("columns", "search_widths") self.removeOldOption("columns", "downloads_columns") self.removeOldOption("columns", "downloads_widths") self.removeOldOption("columns", "uploads_columns") self.removeOldOption("columns", "uploads_widths") # Remove auto-retry failed downloads-option, this is now default behavior self.removeOldOption("transfers", "autoretry_downloads") # Remove old notification/sound settings self.removeOldOption("transfers", "shownotification") self.removeOldOption("transfers", "shownotificationperfolder") self.removeOldOption("ui", "soundenabled") self.removeOldOption("ui", "soundtheme") self.removeOldOption("ui", "tab_colors") self.removeOldOption("ui", "tab_icons") # Remove dropped offline user text color in search results self.removeOldOption("ui", "searchoffline") # Checking for unknown section/options unknown1 = [ 'login', 'passw', 'enc', 'downloaddir', 'uploaddir', 'customban', 'descr', 'pic', 'logsdir', 'roomlogsdir', 'privatelogsdir', 'incompletedir', 'autoreply', 'afterfinish', 'downloadregexp', 'afterfolder', 'default', 'chatfont', 'npothercommand', 'npplayer', 'npformat', 'private_timestamp', 'rooms_timestamp', 'log_timestamp' ] unknown2 = { 'ui': [ "roomlistcollapsed", "tab_select_previous", "tabclosers", "tab_colors", "tab_reorderable", "buddylistinchatrooms", "trayicon", "showaway", "usernamehotspots", "exitdialog", "tab_icons", "spellcheck", "modes_order", "modes_visible", "chat_hidebuttons", "tab_status_icons", "notexists", "speechenabled", "enablefilters", "width", "height", "xposition", "yposition", "labelmain", "labelrooms", "labelprivate", "labelinfo", "labelbrowse", "labelsearch" ], 'words': [ "completion", "censorwords", "replacewords", "autoreplaced", "censored", "characters", "tab", "cycle", "dropdown", "roomnames", "buddies", "roomusers", "commands", "aliases", "onematch" ] } for i in self.parser.sections(): for j in self.parser.options(i): val = self.parser.get(i, j, raw=1) if i not in self.sections: log.addwarning(_("Unknown config section '%s'") % i) elif j not in self.sections[i] and not (j == "filter" or i in ('plugins', )): log.addwarning( _("Unknown config option '%(option)s' in section '%(section)s'" ) % { 'option': j, 'section': i }) elif j in unknown1 or (i in unknown2 and j not in unknown2[i]): if val is not None and val != "None": self.sections[i][j] = val else: self.sections[i][j] = None else: try: self.sections[i][j] = literal_eval(val) except Exception: self.sections[i][j] = None log.addwarning( "CONFIG ERROR: Couldn't decode '%s' section '%s' value '%s'" % (str(j), str(i), str(val))) # Convert fs-based shared to virtual shared (pre 1.4.0) def _convert_to_virtual(x): if isinstance(x, tuple): return x virtual = x.replace('/', '_').replace('\\', '_').strip('_') log.addwarning( "Renaming shared folder '%s' to '%s'. A rescan of your share is required." % (x, virtual)) return (virtual, x) self.sections["transfers"]["shared"] = [ _convert_to_virtual(x) for x in self.sections["transfers"]["shared"] ] self.sections["transfers"]["buddyshared"] = [ _convert_to_virtual(x) for x in self.sections["transfers"]["buddyshared"] ] sharedfiles = None bsharedfiles = None sharedfilesstreams = None bsharedfilesstreams = None wordindex = None bwordindex = None fileindex = None bfileindex = None sharedmtimes = None bsharedmtimes = None shelves = [ os.path.join(self.data_dir, "files.db"), os.path.join(self.data_dir, "buddyfiles.db"), os.path.join(self.data_dir, "streams.db"), os.path.join(self.data_dir, "buddystreams.db"), os.path.join(self.data_dir, "wordindex.db"), os.path.join(self.data_dir, "buddywordindex.db"), os.path.join(self.data_dir, "fileindex.db"), os.path.join(self.data_dir, "buddyfileindex.db"), os.path.join(self.data_dir, "mtimes.db"), os.path.join(self.data_dir, "buddymtimes.db") ] _opened_shelves = [] _errors = [] for shelvefile in shelves: try: _opened_shelves.append( shelve.open(shelvefile, protocol=pickle.HIGHEST_PROTOCOL)) except Exception: _errors.append(shelvefile) try: os.unlink(shelvefile) _opened_shelves.append( shelve.open(shelvefile, flag='n', protocol=pickle.HIGHEST_PROTOCOL)) except Exception as ex: print(("Failed to unlink %s: %s" % (shelvefile, ex))) sharedfiles = _opened_shelves.pop(0) bsharedfiles = _opened_shelves.pop(0) sharedfilesstreams = _opened_shelves.pop(0) bsharedfilesstreams = _opened_shelves.pop(0) wordindex = _opened_shelves.pop(0) bwordindex = _opened_shelves.pop(0) fileindex = _opened_shelves.pop(0) bfileindex = _opened_shelves.pop(0) sharedmtimes = _opened_shelves.pop(0) bsharedmtimes = _opened_shelves.pop(0) if _errors: log.addwarning( _("Failed to process the following databases: %(names)s") % {'names': '\n'.join(_errors)}) files = self.clearShares(sharedfiles, bsharedfiles, sharedfilesstreams, bsharedfilesstreams, wordindex, bwordindex, fileindex, bfileindex, sharedmtimes, bsharedmtimes) if files is not None: sharedfiles, bsharedfiles, sharedfilesstreams, bsharedfilesstreams, wordindex, bwordindex, fileindex, bfileindex, sharedmtimes, bsharedmtimes = files log.addwarning( _("Shared files database seems to be corrupted, rescan your shares" )) self.sections["transfers"]["sharedfiles"] = sharedfiles self.sections["transfers"]["sharedfilesstreams"] = sharedfilesstreams self.sections["transfers"]["wordindex"] = wordindex self.sections["transfers"]["fileindex"] = fileindex self.sections["transfers"]["sharedmtimes"] = sharedmtimes self.sections["transfers"]["bsharedfiles"] = bsharedfiles self.sections["transfers"]["bsharedfilesstreams"] = bsharedfilesstreams self.sections["transfers"]["bwordindex"] = bwordindex self.sections["transfers"]["bfileindex"] = bfileindex self.sections["transfers"]["bsharedmtimes"] = bsharedmtimes # Setting the port range in numerical order self.sections["server"]["portrange"] = ( min(self.sections["server"]["portrange"]), max(self.sections["server"]["portrange"])) self.config_lock.release()
cookie=SimpleCookie() http_cookie_header=environ.get('HTTP_COOKIE') curdate= (str(datetime.now()).split('.'))[0] message='' form_data= FieldStorage() forum=escape(form_data.getfirst('forum','')) post=escape(form_data.getfirst('post','')) logedin=False logedin=False if http_cookie_header: cookie.load(http_cookie_header) if 'sid' in cookie: sessionId = cookie['sid'].value sessionStore = open('../session_stores/session-' + sessionId, writeback=False) if sessionStore.get('authenticated'): logedin =True if logedin==True: logout="<form action='logout.py'><input type='submit' value='Log Out'></form>" else: logout="<form action='logIn.py'><input type='submit' value='Log In'></form>" try: connection =db.connect('cs1.ucc.ie','USER','PASSWORD','TABLE') cursor=connection.cursor(db.cursors.DictCursor) if len(forum)<1: cursor.execute('''SELECT Distinct forumName from forum''') forums=cursor.fetchall()
def create_shelve_file(): stu = Student('tom', 10) db = shelve.open('shelve_create_file') db['s'] = stu db.close()
def __init__(self, filename): self.store = shelve.open(filename)
def save_shelve(mat, fname, key): # save object in shelve-db ''' Save 'mat' under name 'key' in file 'fname.''' db = shelve.open(fname) db[key] = mat db.close()
def write(self, entity_id, state, attributes): with shelve.open(self._filename) as db: db[entity_id] = json.dumps({ STATE:state, ATTRIBUTES:attributes, })
def __init__(self, chat_id): self._dialog = shelve.open(DIALOG, writeback=True) self._id = str(chat_id) self._state = None self._check_state()
def read(self, entity_id): with shelve.open(self._filename) as db: if entity_id in db.keys(): return json.loads(db[entity_id]) else: return {}
def count_rows(): db = SQLighter(database_name) rowsnum = db.count_rows() with shelve.open(shelve_name) as storage: storage['rows_count'] = rowsnum
def __init__(self): super(CountMsgHandler, self).__init__() self.regexp = re.compile(self.Priv_Regexp + "(?P<msg>.*)") self.dictionary = shelve.open("countword")
def set_user_game(chat_id, estimated_answer): with shelve.open(shelve_name) as storage: storage[str(chat_id)] = estimated_answer
# ------------------------------------------------------------ # Filename : # Funcition : 多重剪切板 # Usage: # py.exe mcb.pyw save <keyword> - Saves clipboard to keyword. # py.exe mcb.pyw <keyword> - Loads keyword to clipboard. # py.exe mcb.pyw list - Loads all keywords to clipboard. # CREATE : # DESC : # HISTORY : # ------------------------------------------------------------ import os, shelve, pyperclip, sys os.chdir('D:\\temp\\project') mcbShelf = shelve.open('mcb') if len(sys.argv) == 3 and sys.argv[1].lower == 'save': # TODO: Save clipboard content. # TODO: 输入 3 个参数,用key保存剪切板内容 mcbShelf[sys.argv[2]] = pyperclip.paste() elif len(sys.argv) == 2: # TODO: 输入 2 个参数 # TODO: List keywords and load content. if sys.argv[1].lower == 'list': pyperclip.copy(str(list(mcbShelf.keys()))) elif sys.argv[1] in mcbShelf: pyperclip.copy(mcbShelf[sys.argv[1]]) mcbShelf.close()
if (len(sys.argv) > 2): new = sys.argv[2] if (len(sys.argv) < 2): print("Usage: python3 updateDB.py <old shelve>") print(" - to read a db file") print("Usage: python3 updateDB.py <old shelve> <new shelve>") print(" - to create a new db file from the old") print() quit() print('Opening:', old) data = {} olddb = shelve.open(old) keys = list(olddb.keys()) for k in keys: data[k] = olddb[k] olddb.close() if (len(sys.argv) == 2): print(data) else: print('Found:', keys) if (len(sys.argv) == 3): print('Creating:', new) newdb = shelve.open(new) for k in keys:
def finish_user_game(chat_id): with shelve.open(shelve_name) as storage: del storage[str(chat_id)]
def del_gist_id(): db = shelve.open('gistcheck.db') fpath = editor.get_path() if fpath in db: del db[fpath] db.close()
def get_rows_count(): with shelve.open(shelve_name) as storage: rowsnum = storage['rows_count'] return rowsnum
def get_gist_id(): db = shelve.open('gistcheck.db') gist_id = db.get(editor.get_path(),None) db.close() return gist_id
#!/usr/bin/env python3 """Opening an existing shelf. """ #end_pymotw_header import shelve with shelve.open('test_shelf.db') as s: existing = s['key1'] print(existing)
import shelve import pickle db = shelve.open(r"C:\Users\labuser\Desktop\simple-db.db") m = { "first" : "lee" , "last" : "finn", "email" : "*****@*****.**" } db["finn"] = pickle.dumps(m) mm = pickle.loads(db["finn"]) db.close()
def set_gist_id(gist_id): gist_id = extract_gist_id(gist_id) db = shelve.open('gistcheck.db') db[editor.get_path()] = gist_id db.close()
import sys import shelve from beem import Steem from beem.amount import Amount from datetime import datetime, timedelta import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt # 200,401,648 STEEM # 403,671,878,952 VESTS # 0.0004964 S/V s = shelve.open("accounts.shelf") accounts = s['accounts'] s.close() stm = Steem() steem_per_mvests = stm.get_steem_per_mvest() exclude = ['steem', 'steemit'] cur_date = datetime(2018, 11, 23) intervals = [3, 7, 14, 28, 26 * 7, 52 * 7, 5 * 52 * 7, 100 * 52 * 7] labels = [ '<3 days ago', '3-7 days ago', '1-2 weeks ago', '2-4 weeks ago', '4-26 weeks ago', '0.5-1 year ago', 'more than\na year ago', 'never' ] sp_dist = {} acc_dist = {}
from person import Record import shelve filename = raw_input('Data txt file?') or 'peopletext.txt' # or sys.argv[1] startrec = '<begin>' endrec = '<end>' dbase = shelve.open(filename.replace('.txt', '.db')) myfile = open(filename) while True: line = myfile.readline() if not line: break if line.startswith(startrec): newrec = Record() while True: line = myfile.readline() if line.startswith(endrec): break field, value = line.split(':') value = value.strip() setattr(newrec, field, value) # store record dbase[str(newrec.id())] = newrec dbase.close()
from sklearn.grid_search import GridSearchCV from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline from sklearn.svm import SVC from sklearn.metrics import accuracy_score, confusion_matrix from sklearn.metrics import classification_report train_data = "../data/train.csv" test_data = "../data/test.csv" model_name = "station_profile_all_svm" dbfile = "../data/models" # shelve database file. data = pd.read_csv(train_data) # Encode station with integers with shelve.open(dbfile) as db: le = db['labelEncoder'] data.DepStation = le.transform(data.DepStation) data.ArrStation = le.transform(data.ArrStation) # Subsample the data data = data.sample(frac=.128) X = data[["UserID", "DepHour", "DepStation", "DepWeek", "DepMonth", "DepMin"]] y = data["ArrStation"] # Type conversion. Just to avoid warnings. X.DepHour = X.DepHour.astype(float) X.DepStation = X.DepStation.astype(float) X.DepWeek = X.DepWeek.astype(float) X.DepMonth = X.DepMonth.astype(float)
import shelve db_name = 'local.db' with shelve.open(db_name) as db: db['Country'] = ('Ukraine', 'USA', 'UK') for c in db.items(): print(c) for c in db.keys(): print(c) for c in db.values(): print(c)
def writeReport(): try: if os.path.exists("summary.txt"): os.remove('summary.txt') if os.path.exists("all_content.txt"): os.remove('all_content.txt') s = shelve.open('urlText.db') f = open('summary.txt', 'w') f1 = open('all_content.txt', 'w') f.write("There are {len(s)} pages found\n") f.write( '------------------above are pages found------------------------------------------------\n' ) longest = 0 longest_url = '' subdomain = defaultdict(int) for url, content in s.items(): parsed = urlparse(url) if re.match('.+\.ics\.uci\.edu', parsed.netloc) and parsed.netloc != 'www.ics.uci.edu': subdomain[parsed.netloc.lower()] += 1 for word in stopWord.split(): if word in content: break else: continue f1.write(content + '\n') if longest < len(content.split()): longest = len(content.split()) longest_url = url for k, v in sorted(subdomain.items(), key=lambda x: x[0]): f.write(f'http://{k}, {v}\n') f.write( f'------------------above are {len(subdomain)} subdomains------------------------------------------------\n' ) f.write( f'The page that has most words is {longest_url}, and it has {longest} words\n' ) f.write( '------------------above are longest page----------------------------------------------\n' ) f1.close() i = 1 the_dict = computeWordFrequencies(tokenize('all_content.txt')) for key, value in sorted(the_dict.items(), key=lambda x: -x[1]): if key in stopWord: continue if i > 50: break i += 1 f.write(f'{key}->{value}\n') f.write( '------------------above are 50 top words except English stop word---------------------\n' ) except: f.write("Error occurs\n") finally: s.close() f.close()
def dump(self, a, identifier): """Dump NumPy array a with identifier.""" identifier = identifier.strip() fd = shelve.open(self.filename) fd[identifier] = a fd.close()
def _open(self): self._ensure_path() self._shelve = shelve.open(self.preferences_path, protocol=2, writeback=True)
class pidgin2imap: logdir = "~/.purple/logs/" logdir = os.path.expanduser(logdir) persistantDataFile = "~/.pidgin2imapdb" persistantData = shelve.open(os.path.expanduser(persistantDataFile)) if not persistantData.has_key("filelist"): persistantData["filelist"] = [] fileList = persistantData["filelist"] def __init__(self, imapObject, debug): self.imapObject = imapObject self.debug = debug def parseLogs(self): if not os.path.exists(self.logdir): print "No logfiles available" return False if self.debug: print "[PIDGIN] LOGFILEDIR: " + self.logdir protocols = [] protocols = os.listdir(self.logdir) if len(protocols) == 0: print "no protocols available" return False for protocol in protocols: accounts = os.listdir(os.path.join(self.logdir, protocol)) for account in accounts: rosterItems = os.listdir( os.path.join(self.logdir, protocol, account)) for contact in rosterItems: #don't archive offline/online notifications if contact == ".system": continue logItems = os.listdir( os.path.join(self.logdir, protocol, account, contact)) for logfile in logItems: fileName = os.path.join(self.logdir, protocol, account, contact, logfile) if fileName in self.fileList: pass else: if self.debug: print "[PIDGIN] filename not in List" self.fileList.append(fileName) in_file = open(fileName, "r") body = "" for line in in_file: body += line dateString = logfile[:-4] year = int(dateString[:4]) month = int(dateString[5:7]) day = int(dateString[8:10]) hour = int(dateString[11:13]) minute = int(dateString[13:15]) second = int(dateString[15:17]) timeZone = dateString[17:25] date = datetime.datetime(year, month, day, hour, minute, second, 0) self.imapObject.log2imap(contact, account, "[pidgin]", body, date, None) self.persistantData["filelist"] = self.fileList self.persistantData.close()