def update_sqlite_from_csv(db_name, csv_name): earliest_day = datetime.datetime.strptime(csv_name[-25:-4].split("_")[0], DATE_FORMAT) """ #TODO: more robust date exraction try: latest_day = datetime.datetime.strptime(csv_name[-25:-4].split("_")[1], DATE_FORMAT) interval = (latest_day - earliest_day).days except: interval = 1 #update_wrc(update_key, db_name, earliest_day) """ with SqliteDict(db_name) as db: with open(csv_name, 'r') as csv_file: reader = csv.DictReader(csv_file) for row in reader: team_name = row["Tm"] for stat_name in ENABLED_STATS: update_key = "{}_{}".format(1, stat_name) # ugly hack if stat_name == "wRC": new_stat = row[stat_name + "+"] else: new_stat = row[stat_name] if team_name not in db or db[team_name] is None: team_data = { "name": team_name, update_key: { earliest_day: new_stat } } else: #team name in db and not None team_data = db[team_name] if update_key not in team_data: team_data[update_key] = {earliest_day: new_stat} else: # update key in team data team_data[update_key][earliest_day] = new_stat db[team_name] = team_data db.commit() db.commit()
def save_ids_to_cache(l2_ids, l2_locs, cache_file): """Save computed locations back to the l2 cache. Parameters ---------- l2_ids : list-like List of N layer 2 ids l2_locs : np.array Nx3 array of locations cache_file : str """ ii = 0 with SqliteDict(cache_file, encode=orjson.dumps, flag="c") as cache_dict: for k, v in zip(l2_ids, l2_locs): if not np.any(np.isnan(v)): cache_dict[str(k)] = v.tolist() cache_dict.commit() pass
def download_blogtree(db_path, everything=True, full_text=False, uniq=False, number_of_downloaders=4): blog_getter = iter_blogposts if everything else first_blog_page with SqliteDict(db_path, autocommit=False) as blogpost_db: already_downloaded = set(blogpost_db.keys()) no_blogs = get_number_of_blogs() if everything else "" empty_blog_queue = Queue() sqlite_writer_queue = Queue() writer = Process(target=sqlitedict_writer, args=(db_path, no_blogs, number_of_downloaders, sqlite_writer_queue)) writer.start() workers = [ Process(target=blog_downloader, args=(empty_blog_queue, sqlite_writer_queue)) for _ in range(number_of_downloaders) ] for worker in workers: worker.start() for cnt, blog in enumerate(blog_getter()): if uniq and blog.url in already_downloaded: print "skipping", cnt + 1, blog.title continue empty_blog_queue.put(blog) # put signals for workers that job is done for _ in range(number_of_downloaders): empty_blog_queue.put(WorkerDone()) # wait for workers for worker in workers: worker.join() # wait for writer writer.join()
async def post(self): request = self.request data = await request.post() try: mydict = SqliteDict('./my_db.sqlite', autocommit=True) if mydict.get(data["url"]) is not None: return web.Response(text=str(mydict.get(data["url"]))) image = await fetch(session, data["url"]) nsfw_prob = classify(image) text = nsfw_prob.astype(str) mydict[data["url"]] = text return web.Response(text=text) except KeyError: return HTTPBadRequest(text="Missing `url` POST parameter") except OSError as e: if "cannot identify" in str(e): raise HTTPUnsupportedMediaType(text="Invalid image") else: raise e
async def leaderboard(self, message): with SqliteDict('./db/level.sqlite', autocommit=True) as level_db: await message.channel.send("sorting levels...") sorted_levels = {} leaderboard = "```\nLeaderboard:\n\n" sorted_keys = sorted(level_db, key=level_db.get) for e in sorted_keys: sorted_levels[e] = level_db[e] for i in range(min(10, len(level_db))): user = await self.fetch_user(sorted_keys[len(level_db) - i - 1]) leaderboard += str( i + 1) + ". " + user.name + "#" + user.discriminator + ": " + str( parse_level( sorted_levels.get( sorted_keys[len(level_db) - i - 1]))) + "\n" leaderboard += "...\n" + message.author.name + "#" + message.author.discriminator + ": " + str( get_level(message.author)) + "\n```" await message.channel.send((leaderboard))
def flush(self, save_index=False, save_model=False, clear_buffer=False): """Commit all changes, clear all caches.""" if save_index: if self.fresh_index is not None: self.fresh_index.save(self.location('index_fresh')) if self.opt_index is not None: self.opt_index.save(self.location('index_opt')) if save_model: if self.model is not None: self.model.save(self.location('model')) self.payload.commit() if clear_buffer: if hasattr(self, 'fresh_docs'): try: self.fresh_docs.terminate() # erase all buffered documents + file on disk except: pass self.fresh_docs = SqliteDict(journal_mode=JOURNAL_MODE) # buffer defaults to a random location in temp self.fresh_docs.sync()
def test_recording_system_metadata(self): prob = Problem() prob.root = ConvergeDiverge() prob.root.add_metadata('string', 'just a test') prob.root.add_metadata('ints', [1, 2, 3]) prob.driver.add_recorder(self.recorder) self.recorder.options['record_metadata'] = True prob.setup(check=False) prob.cleanup() # closes recorders # check the system metadata recording sqlite_metadata = SqliteDict(filename=self.filename, flag='r', tablename='metadata') system_metadata = sqlite_metadata['system_metadata'] self.assertEqual(len(system_metadata), 2) self.assertEqual(system_metadata['string'], 'just a test') self.assertEqual(system_metadata['ints'], [1, 2, 3]) sqlite_metadata.close()
def index(): workflow_paths = None if GITHUB_REPO_NAME: workflow_paths = get_workflow_files_from_public_repo(GITHUB_REPO_NAME) else: if WORKFLOW_SEARCH_DIR is not None: local_search_path = WORKFLOW_SEARCH_DIR else: with SqliteDict(DB_PATH, autocommit=True) as dict_DB: local_search_path = dict_DB['workflow_search_path'] page = render_template( 'index.html', repo_name=GITHUB_REPO_NAME, workflow_paths=workflow_paths, local_search_path=local_search_path, search_path_editable=(WORKFLOW_SEARCH_DIR is None), ) return page
def __init__(self): self.__db_file_name = ConfigurationParser.get_config_value( ConfigurationKeys.AVG_TWEETS_COUNTER, ConfigurationKeys.DB_FILE_NAME) self.__db_table_name = ConfigurationParser.get_config_value( ConfigurationKeys.AVG_TWEETS_COUNTER, ConfigurationKeys.DB_TABLE_NAME) self.__date_format = ConfigurationParser.get_config_value( ConfigurationKeys.AVG_TWEETS_COUNTER, ConfigurationKeys.DATE_FORMAT) with SqliteDict(self.__db_file_name, self.__db_table_name) as tweets_counter: if 'start_time' not in tweets_counter: time_now = datetime.now() tweets_counter['start_time'] = time_now.strftime( self.__date_format) tweets_counter.commit()
def __getitem__(self, data_idx): assert data_idx >= 0 assert data_idx < len(self) # bisect-right is currently the rightmost value greater than global_idx # so that - 1 is the index of the cell we want in # because of the above asserts, we're good. db_idx = bisect_right(self.sizes, data_idx) - 1 local_idx = data_idx - self.sizes[db_idx] if db_idx not in self.connections: self.connections[db_idx] = SqliteDict(self.db_paths[db_idx], flag='r') try: return self.connections[db_idx][local_idx] except KeyError as ke: print("FAILED TO GET", ke) print(db_idx) print(local_idx) print(len(self.connections[db_idx])) print(self.db_paths[db_idx]) raise ke
def get_locs_local(l2_ids, cache_file): l2means = [] is_cached = [] with SqliteDict(cache_file, decode=orjson.loads, flag="c") as cache_dict: for l2id in l2_ids: loc = cache_dict.get(str(l2id), None) if loc is not None: l2means.append(loc) is_cached.append(True) else: is_cached.append(False) if len(l2means) > 0: l2means = np.vstack(l2means) else: l2means = np.zeros((0, 3), dtype=float) return l2means, np.array(is_cached)
def test_special_keys(self): """integer, float and/or tuple keys""" db = SqliteDict() db['1'] = 1 db[1] = 'ONE' db[('a', 1)] = 'testtuple' db[frozenset([1, 2, '2'])] = 'testfrozenset' assert db[1] == 'ONE' assert db['1'] == 1 assert db[('a', 1)] == 'testtuple' assert db[frozenset([1, 2, '2'])] == 'testfrozenset' # This tests the reverse conversion keys = list(db.keys()) assert len(keys) == 4 assert '1' in keys assert 1 in keys assert ('a', 1) in keys assert frozenset([1, 2, '2']) in keys
def __init__(self, tree, site, token): self.newroot = tk.Toplevel() self.newroot.title('快速收录') self.newroot.iconbitmap("favicon.ico") self.newroot.wm_attributes('-topmost', 1) win_width = self.newroot.winfo_screenwidth() win_higth = self.newroot.winfo_screenheight() width_adjust = (win_width - 800) / 2 higth_adjust = (win_higth - 250) / 2 self.newroot.geometry("%dx%d+%d+%d" % (800, 250, width_adjust, higth_adjust)) # 窗体日志 self.ttlog = ttlog(master=self.newroot) self.ttlog.place(x=10, y=70, width=780, height=150) # 提示内容 self.content = tk.Label(self.newroot, text="正在快速收录中,请不要中断操作,请耐心等待......") self.content.place( x=10, y=30, ) self.content2 = tk.Label(self.newroot, text="") self.content2.place( x=10, y=60, ) self.tree = tree self.site = site self.token = token self.mydict = SqliteDict('./my_db.sqlite', autocommit=True) # 开启处理线程 self.p = Thread(target=self.main) self.p.setDaemon(True) self.p.start() self.ttlog.log("快速收录-->开启普通收录线程.....") # 点击关闭右上角 self.newroot.protocol("WM_DELETE_WINDOW", self.close)
def import_file(self, filename): if filename.endswith("pkl"): files = load(filename) for f in files: try: self.files[f['hash']] except KeyError as e: self.files[f['hash']] = [f] print("adding {}".format(f['name'])) else: files = SqliteDict(filename, autocommit=False) for k in files: try: self.files[k] except KeyError as e: self.files[k] = files[k] print("adding {}".format(f['name'])) self.rebuild_names_db() self.save()
def __init__(self, source_data, db): """ """ # process current records in db self.db = SqliteDict(db, autocommit=True) # process csv file with smart_open(source_data) as h: self.data = [] fields = h.readline().strip().split(',') required_fields = [ 'task_id', 'model_id', 'topic_no', 'task', 'answer' ] if fields != required_fields: raise ValueError('invalid csv file: %s does not match %s' % (fields, required_fields)) for line in h.readlines(): record = dict(zip(fields, line.strip().split(','))) self.data.append(record)
async def server(websocket, path): # is SqliteDict threadsafe? looks like it # can replace the sqlite with redis to scale anyway with closing(SqliteDict('./cache.sqlite', autocommit=True)) as memoize: while 1: req = json.loads(await websocket.recv()) assert sorted(req.keys()) == sorted(['id', 'jsonrpc', 'method', 'params']) key = json.dumps((req['method'], req['params'])) print(f"< {key}") if key not in memoize: print("> cache miss, fetch...") resp = json.loads(await gethnode(json.dumps(req))) assert sorted(resp.keys()) == sorted(['id', 'jsonrpc', 'result']) #print(f">> {resp}") memoize[key] = resp['result'] presp = {'id':req['id'], 'jsonrpc':req['jsonrpc'], 'result':memoize[key]} presp = json.dumps(presp) #print(f"> {presp}") print(f"> {len(presp)}") await websocket.send(presp)
def __open_file(config_filename, force_write): """ :param config_filename: :param force_write: :param quiet: :return: """ if prompt_for_overwrite(config_filename, force_write): try: config_dirname = os.path.dirname(config_filename) if not os.path.isdir(config_dirname): os.makedirs(config_dirname) if os.path.exists(config_filename): os.remove(config_filename) return SqliteDict(config_filename) except Exception as e: print_exception(e) else: return None
def __init__(self): for att in ['name', 'author', 'version']: assert hasattr(self, att), f'Missing attribute {att}' self.dirs = AppDirs( appname=self.name, appauthor=self.author, version=self.version, ) self.site_config_dir = Path(self.dirs.site_config_dir) os.makedirs(self.site_config_dir, exist_ok=True) self.db_service = SqliteDict( filename=self.site_config_dir / 'service.db', autocommit=True, ) curr_pid = self.db_service.get('pid', -1) curr_port = self.db_service.get('port', self.preferred_port) self.db_service['pid'] = curr_pid self.db_service['port'] = curr_port
def __init__(self, dag_params: dict = None, state_db: str = ":memory:"): """FeatureDAG constructor Args: dag_params (dict, optional): Parameters that are made available to any node in the DAG. Defaults to None. state_db (str, optional): The name of the sqlite database to store the DAG state in. If not supplied then a temporary in memory database is used. Defaults to ":memory:". """ self._nodes = set() self._node_connections = set() self._node_dot_attr = {} self._dot = None self._ipython_display_handle = None self._dag_params = dag_params self._state_dict = SqliteDict( state_db, autocommit=True, encode=str, decode=str, tablename="state" )
def drop_index(self, keep_model=True): """Drop all indexed documents. If `keep_model` is False, also dropped the model.""" modelstr = "" if keep_model else "and model " logger.info("deleting similarity index " + modelstr + "from %s" % self.basename) # delete indexes for index in [self.fresh_index, self.opt_index]: if index is not None: index.terminate() self.fresh_index, self.opt_index = None, None # delete payload if self.payload is not None: self.payload.close() fname = self.location('payload') try: if os.path.exists(fname): os.remove(fname) logger.info("deleted %s" % fname) #except Exception, e: except Exception as e: logger.warning("failed to delete %s" % fname) self.payload = SqliteDict(self.location('payload'), autocommit=True, journal_mode=JOURNAL_MODE) # optionally, delete the model as well if not keep_model and self.model is not None: self.model.close() fname = self.location('model') try: if os.path.exists(fname): os.remove(fname) logger.info("deleted %s" % fname) #except Exception, e: except Exception as e: logger.warning("failed to delete %s" % fname) self.model = None self.flush(save_index=True, save_model=True, clear_buffer=True)
def all_routes(api, dbname): """Create an SqliteDict with all routes and their stops.""" log = logging.getLogger(__name__) # All routes with tracking rtdicts = api.routes()['route'] allroutes = SqliteDict(dbname, tablename="routes") for rtdict in rtdicts: rtobject = Route.fromapi(api, rtdict) log.debug("Processing route {}".format(rtobject.name)) if rtobject.number in allroutes: log.debug("SKIP: Route already in database.") continue else: rtdict = { 'name': rtobject.name, 'number': rtobject.number, 'inbound': { s.id: { 'location': s.location, 'name': s.name } for s in rtobject.inbound_stops }, 'outbound': { s.id: { 'location': s.location, 'name': s.name } for s in rtobject.outbound_stops } } allroutes[str(rtobject.number)] = rtdict log.debug("Committing changes...") allroutes.commit() return dbname
def _risk_level_fba(entity): if app.config["FBA_RISK_SCORE_ENABLE"]: risk_level = -1 iso_timestamp = get_current_iso8601_format() fba_risk_level_dict = SqliteDict( "{}/{}".format(app.config["DB_DIR"], FBA_DB_FILE), decode=json.loads ) if entity in fba_risk_level_dict: risk_level = fba_risk_level_dict[entity] fba_risk_level_dict.close() response = { "user_id": entity, "timestamp": iso_timestamp, "risk_level": risk_level, } return jsonify(response), HTTPStatus.OK else: response = { "fba_risk_score_fetch_enable": "Set this flag to True and restart the service in order to enable this endpoint." } return jsonify(response), HTTPStatus.NOT_IMPLEMENTED
def create_index_fulltext(doi_title_abstract): schema = Schema( article_doi=ID(stored=True), article_title=ID(stored=True), article_fulltext=TEXT(analyzer=StemmingAnalyzer(), stored=True) ) ix = index.create_in("C:\\Users\\sahme\\PycharmProjects\\Automating_SLR\\django_project\\indexdir_fulltext", schema) writer = ix.writer() # database = SqliteDict('./SLR_database_updated.sqlite', autocommit=True) database2 = SqliteDict('./latest_screening_db.sqlite', autocommit=True) for i in doi_title_abstract: if i in database2['fulltext']: writer.add_document(article_doi=u"" + i, article_title=u"" + doi_title_abstract[i], article_fulltext=u"" + database2['fulltext'][i]) writer.commit() return ix
def sqlite_to_array(num): emp_sql = SqliteDict(f"{args.src}empath_value{num}.sqlite", tablename="value", flag="r") t_ini = time() ids = [] emp_values = [] c = 0 for key, value in emp_sql.items(): if c % 1000000 == 0: print("iteration number ", c, "at", round((time()-t_ini)/60, 2), "minutes") c += 1 ids.append(key) emp_values.append(tuple(value.values())) if c % 10000000 == 0: save_arrays(emp_values, ids, c) ids = [] emp_values = [] save_arrays(num, emp_values, ids, c)
def stat(dbname): database = SqliteDict(dbname) print(f"{len(database)} records") counts = {} l = 0 for key in database.keys(): player, stones = eval(key) val = database[key] for hole_idx in val: diff_total, count = val[hole_idx] if count not in counts: counts[count] = 0 counts[count] += 1 # l += 1 # if l > 10: # break print("distribution of moves ever played") for a, b in sorted(list(counts.items()), key = lambda v: v[1]): print(f" {b} {a}")
def get_statistics(id): with SqliteDict('./dbbookmark.sqlite', autocommit=True) as mydict: if id in mydict: etag = mydict[id]["etag"] request_etag = request.headers.get('ETag') if request_etag is None: request_etag = -1 if int(etag) == int(request_etag): return Response("", status="304 Not Modified, ETag: " f"{etag}", mimetype='application/json') else: return Response(f"{etag}", status="200 OK, ETag: " f"{etag}", mimetype='application/json') else: return Response("Not Found", status=404, mimetype='application/json')
def crawl(): options = webdriver.ChromeOptions() prefs = {"profile.managed_default_content_settings.images": 2} options.add_experimental_option("prefs", prefs) #options.add_argument("--start-maximized") #options.add_experimental_option("detach", True) driver = webdriver.Chrome("../../chromedriver.exe", options=options) driver.get("http://www.sauspiel.de") #login elem = driver.find_element_by_id("ontop_login") elem.clear() elem.send_keys("xxx") elem = driver.find_element_by_id("login_inline_password") elem.clear() elem.send_keys("xxx") elem.send_keys(Keys.RETURN) #setup database games = SqliteDict('games.sqlite', autocommit=True) normal_games = 0 for i in range(330000000, 1000000000): print(i) #view game driver.get('https://www.sauspiel.de/spiele/' + str(i)) gt = GameTranscript() try: gt.fast_parse(driver) except: print("could not parse game") continue if len(gt.sonderregeln) == 0: normal_games += 1 print("found " + str(normal_games) + " normal games") games[i] = gt
def download_videos(tags_dict, output_path, subsample=None): """ List urls of videos to indoor/outdoor according to tags. :param tags_dict: {tag: tag hash} :param output_path: path where videos should be stored :param subsample: number of samples per tag to be used :return: [urls] """ make_dirs(output_path) # mechanism to be able to continue in the download cache = SqliteDict(os.path.join(output_path, 'cache.sqlite'), autocommit=True) for tag, _hash in tags_dict.items(): logging.info(f'downloading videos for tag: {tag}') response = requests.get(VIDEOS_CATEGORY_API_URL.format(_hash + '.js')) if response.status_code != 200: logging.warning(f'Unable to find videos for tag {tag}') continue _, video_hashes = literal_eval(response.text.strip('p;')) # possibility to work only with subsample data if subsample is not None and subsample < len(video_hashes): video_hashes = random.sample(video_hashes, k=subsample) for video_hash in tqdm(video_hashes): response = requests.get( VIDEO_URL_API.format(video_hash[:2], video_hash + '.js')) if response.status_code != 200: logging.warning( f'Unable to find video hashes for {video_hash}') continue _, youtube_hash = literal_eval(response.text.strip('i;')) url = VIDEO_BASE_PATH.format(youtube_hash) if url not in cache: try: YouTube(url).streams.first().download(output_path) cache[url] = True # NOTE KeyError is too general, but unfortunately thrown by # the API when missing streamingData except (VideoUnavailable, RegexMatchError, KeyError): logging.warning(f'Unable to download video from {url}')
def helper_fn(batch_fn): # Use this to iterate over the file # For each claim, first tokenise it and then retrieve_top_five_docs total_itrs = 0 start_time = time.time() top_5_docs_dict = {} cid = None # Get a local connection to the db although global would work too ividx_dct = SqliteDict('ividx_with_freq.sqlite', decode=decompress_set) print('ENTERED PROCESS') sys.stdout.flush() train_btch_f = open(batch_fn, 'rb') train_btch = pickle.load(train_btch_f) train_btch_f.close() for cid, tokens in train_btch.items(): if total_itrs == 3000: break if total_itrs%50 == 0: end_time = time.time() total_time = end_time - start_time print(total_itrs, total_time) sys.stdout.flush() start_time = time.time() total_itrs += 1 res = retrieve_top_five_docs(tokens, ividx_dct) top_5_doc_items = [inverted_doc_name_dict[str(itx)] for itx in res] top_5_docs_dict[cid] = top_5_doc_items ividx_dct.close() return(top_5_docs_dict, cid)
def __init__(self): # https://pypi.org/project/sqlitedict/ # If you don’t use autocommit (default is no autocommit for performance), then don’t forget to call mydict.commit() when done with a transaction: self.mydict = SqliteDict('../my_db.sqlite', autocommit=True) if 'targetT' not in self.mydict: self.mydict['targetT'] = 21 if 'roomT' not in self.mydict: self.mydict['roomT'] = 23 if 'comfortT' not in self.mydict: self.mydict['comfortT'] = 24 if 'economT' not in self.mydict: self.mydict['economT'] = 14 if 'waterT' not in self.mydict: self.mydict['waterT'] = 35 if 'serversStatus' not in self.mydict: self.mydict['serversStatus'] = 0 if 'electroMeterT1' not in self.mydict: self.mydict['electroMeterT1'] = 0 if 'electroMeterT2' not in self.mydict: self.mydict['electroMeterT2'] = 0 if 'warmMeter' not in self.mydict: self.mydict['warmMeter'] = 0 if 'waterColdMeter' not in self.mydict: self.mydict['waterColdMeter'] = 0 if 'waterHotMeter' not in self.mydict: self.mydict['waterHotMeter'] = 0 if 'weatherT' not in self.mydict: self.mydict['weatherT'] = 1 if 'flags1' not in self.mydict: self.mydict['flags1'] = 0 if 'flags2' not in self.mydict: self.mydict['flags2'] = 0 if 'timetable' not in self.mydict: self.mydict['timetable'] = {} if 'keepAliveToken' not in self.mydict: self.mydict['keepAliveToken'] = {} if 'keepAliveOPCUA' not in self.mydict: self.mydict['keepAliveOPCUA'] = {} if 'keepAlivePLC' not in self.mydict: self.mydict['keepAlivePLC'] = {}