def test_nested(): storage = CachingMiddleware(MemoryStorage) storage() # Initialization # Write contents storage.write(element) # Verify contents assert element == storage.read()
from tinydb.middlewares import CachingMiddleware from tinydb.storages import JSONStorage from tinydb import TinyDB, Query import webbrowser, random db = TinyDB('db.json', storage=CachingMiddleware(JSONStorage)) def search(keyword: str): keysearch = Query() res = db.search(keysearch.keywords.any(keyword)) for i in res: print(i['url']) def search_list(keyword: str): keysearch = Query() res = db.search(keysearch.keywords.any(keyword)) key = [] for i in res: key.append(i['url']) return key go = True helpmessage = """ Help: search - interactive search dialog, prints all found links <any other word> - opens a random link! help - print help
def _opendb(self): self.middleware = CachingMiddleware(JSONStorage) self.middleware.WRITE_CACHE_SIZE = 500 self.db = TinyDB(self.conn_str, storage=self.middleware, default_table=self.default_table)
def setup_caching_write_many(): global storage storage = CachingMiddleware(MemoryStorage) storage.WRITE_CACHE_SIZE = 3 storage() # Initialize MemoryStorage
def __init__(self) -> None: self._db = TinyDB("db.json", storage=CachingMiddleware(JSONStorage))
def average_image_group_by_angle(file_index_fn, table_name="hdf5_proc", angle=0.0, dataset_for_averaging="data", variable="repetition", description="", dataset_store="data", date=None, sample=None, energy=None): """Average images by repetition for a single angle. If date, sample and/or energy are indicated, only the corresponding images for the given date, sample and/or energy are processed. All data images of the same angle, for the different repetitions are averaged. """ root_path = os.path.dirname(os.path.abspath(file_index_fn)) file_index_db = TinyDB(file_index_fn, storage=CachingMiddleware(JSONStorage)) db = file_index_db if table_name is not None: file_index_db = file_index_db.table(table_name) files_query = Query() file_index_db = filter_file_index(file_index_db, files_query, date=date, sample=sample, energy=energy, angle=angle, ff=False) all_file_records = file_index_db.all() averages_table = db.table("hdf5_averages") # We only have files for a single angle if variable == "repetition": dates_samples_energies_jjs_angles = [] for record in all_file_records: dates_samples_energies_jjs_angles.append( (record["date"], record["sample"], record["energy"], record["jj_u"], record["jj_d"], record["angle"])) dates_samples_energies_jjs_angles = list( set(dates_samples_energies_jjs_angles)) for date_sample_energy_jj_angle in dates_samples_energies_jjs_angles: date = date_sample_energy_jj_angle[0] sample = date_sample_energy_jj_angle[1] energy = date_sample_energy_jj_angle[2] jj_u = date_sample_energy_jj_angle[3] jj_d = date_sample_energy_jj_angle[4] angle = date_sample_energy_jj_angle[5] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.jj_u == jj_u) & (files_query.jj_d == jj_d) & (files_query.angle == angle)) img_records = file_index_db.search(query_cmd) num_repetitions = len(img_records) files = get_file_paths(img_records, root_path) complete_group_to_average = [num_repetitions] group_to_average = [] for file in files: group_to_average.append(file) complete_group_to_average.append(group_to_average) complete_group_to_average.append(date_sample_energy_jj_angle) record = average_and_store( complete_group_to_average, dataset_for_averaging=dataset_for_averaging, variable=variable, description=description, dataset_store=dataset_store) if record not in averages_table.all(): averages_table.insert(record) #import pprint #pobj = pprint.PrettyPrinter(indent=4) #print("----") #print("average records") #for record in records: # pobj.pprint(record) #pobj.pprint(averages_table.all()) db.close()
def storage(): _storage = CachingMiddleware(MemoryStorage) return _storage() # Initialize MemoryStorage
from werkzeug.datastructures import FileStorage from sure_tosca.models.base_model_ import Model from sure_tosca.models.node_template import NodeTemplateModel as NodeTemplateModel from sure_tosca.models.node_template_map import NodeTemplateMapModel from sure_tosca.models.tosca_template import ToscaTemplateModel as ToscaTemplateModel from sure_tosca.service import tosca_helper # db = TinyDB(storage=CachingMiddleware(MemoryStorage)) db_dir_path = tempfile.gettempdir() tosca_templates_db_file_path = os.path.join(db_dir_path, "tosca_templates.json") tosca_templates_db = TinyDB(tosca_templates_db_file_path) # tosca_templates_db = TinyDB(storage=CachingMiddleware(MemoryStorage)) node_template_db = TinyDB(storage=CachingMiddleware(MemoryStorage)) dsl_definitions_db = TinyDB(storage=CachingMiddleware(MemoryStorage)) relationship_template_db = TinyDB(storage=CachingMiddleware(MemoryStorage)) interface_types_db = TinyDB(storage=CachingMiddleware(MemoryStorage)) logger = logging.getLogger(__name__) if not getattr(logger, 'handler_set', None): logger.setLevel(logging.INFO) h = logging.StreamHandler() formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') h.setFormatter(formatter) logger.addHandler(h) logger.handler_set = True root_key = 'root_key'
def main(): logger.info("Start Main!") create_plots: bool = False # Select True if you want a plot for every prediction window # SET USER DATA user_data = UserData(bginitial=100.0, cratio=5, idur=4, inputeeffect=None, sensf=41, simlength=13, predictionlength=180, stats=None) # Get Database db = TinyDB(db_path, storage=CachingMiddleware(JSONStorage)) logging.info("length of db: {}".format(len(db))) logging.info("Valid examples: {}".format( len(db.search(where('valid') == True)))) logging.info("With result: {}".format( len(db.search(where('result').exists())))) with_result = db.search(where('result').exists()) lstm_result = list( filter( lambda x: any( list(map(lambda y: "LSTM" in y['predictor'], x['result']))), with_result)) logging.info("lstm res {}".format(len(lstm_result))) lstm_cleaned = list(map(clean_lstm, lstm_result)) db.write_back(lstm_cleaned) db.storage.flush() with_result = db.search(where('result').exists()) lstm_result = list( filter( lambda x: any( list(map(lambda y: "LSTM" in y['predictor'], x['result']))), with_result)) logging.info("lstm res {}".format(len(lstm_result))) exit() all = db.search(where('valid') == True) s = pd.Series(list(map(lambda x: x['id'], all))) list( map( lambda id: print("id {} has {} items".format( id, len(list(filter(lambda x: x['id'] == id, all))))), s.unique())) exit() get_arima_order_summary(db) exit() with_result = db.search(where('result').exists()) arima_result = list( filter( lambda x: any( list( map(lambda y: y['predictor'] == 'Arima Predictor', x[ 'result']))), with_result)) logging.info("arima results: {}".format(len(arima_result))) logging.info("length of db: {}".format(len(db))) #all = db.all() outliers = list( filter( lambda item: any( list( map(lambda result: abs(result['errors'][0]) > 100, item[ 'result']))), with_result)) logging.info("number of outliers: {}".format(len(outliers))) list(map(plot, outliers)) exit() logging.info("results with optimizer: {} ".format( len( list( filter( lambda x: any( list( map(lambda y: 'Optimizer' in y['predictor'], x[ 'result']))), with_result))))) for item in with_result: item['result'] = list( filter(lambda x: 'Optimizer' not in x['predictor'], item['result'])) db.write_back([item]) db.storage.flush() exit() logging.info("with result: {}".format(len(with_result))) results = list(map(lambda x: x['result'], with_result)) seven = list(filter(lambda x: len(x) == 7, results)) logging.info("with 7 results: {}".format(len(seven))) le = pd.Series(list(map(len, with_result))) logging.info(le.describe()) exit() # Filter Items with LSMT Result lstm_result = list(filter(check_lstm, with_result)) logging.info("number of results with lstm {}".format(len(lstm_result))) for item in lstm_result: item['result'] = list( filter(lambda x: x['predictor'] != 'LSTM Predictor', item['result'])) db.write_back([item]) db.storage.flush() exit() # with_result = db.search(where('doc_id') in list(range(19650, 19700))) # res = list(map(lambda x: db.get(doc_id=x),range(19600,19700))) # res = list(filter(lambda x: (x is not None), res)) # logging.debug(len(res)) #list(map(plot, res)) # doc_ids = list(map(lambda x: x.doc_id, res)) # db.remove(doc_ids=doc_ids) # db.storage.flush() # logging.info("length of db: {}".format(len(db))) # exit() # exit() outliers = list( filter(lambda x: x['result'][0]['errors'][0] > 75, with_result)) logging.debug(len(outliers)) # logging.debug(doc_ids) list(map(plot, outliers)) logging.info('end') exit() for item in db: print(item) for x in item: print(x) exit()
def get_cashed_write_db(file_name: str) -> Database: """Returns database instance with cached interface""" return Database(file_name, storage=CachingMiddleware(JSONStorage), sort_keys=True, indent=4)
def align_images(file_index_fn, table_name="hdf5_proc", dataset_for_aligning="data", dataset_reference="data", roi_size=0.5, variable="zpz", align_method='cv2.TM_CCOEFF_NORMED', date=None, sample=None, energy=None, cores=-2, query=None, jj=True): """Align images of one experiment by zpz. If date, sample and/or energy are indicated, only the corresponding images for the given date, sample and/or energy are cropped. The crop of the different images will be done in parallel: all cores but one used (Value=-2). Each file, contains a single image to be cropped. """ start_time = time.time() root_path = os.path.dirname(os.path.abspath(file_index_fn)) file_index_db = TinyDB(file_index_fn, storage=CachingMiddleware(JSONStorage)) db = file_index_db if table_name is not None: file_index_db = file_index_db.table(table_name) files_query = Query() file_index_db = filter_file_index(file_index_db, files_query, date=date, sample=sample, energy=energy, ff=False) if query is not None: file_records = file_index_db.search(query) else: file_records = file_index_db.all() n_files = len(file_records) couples_to_align = [] # The goal in this case is to align all the images for a same date, # sample, energy and angle, and a variable zpz. if variable == "zpz": dates_samples_energies_angles = [] for record in file_records: dates_samples_energies_angles.append( (record["date"], record["sample"], record["energy"], record["angle"])) dates_samples_energies_angles = list( set(dates_samples_energies_angles)) for date_sample_energy_angle in dates_samples_energies_angles: date = date_sample_energy_angle[0] sample = date_sample_energy_angle[1] energy = date_sample_energy_angle[2] angle = date_sample_energy_angle[3] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.angle == angle)) if query is not None: query_cmd &= query h5_records = file_index_db.search(query_cmd) # pobj = pprint.PrettyPrinter(indent=4) # print("group for align") # for rec in h5_records: # pobj.pprint(rec["filename"]) _get_couples_to_align(couples_to_align, h5_records, root_path) # The goal in this case is to align all the images for a same date, # sample, jj_offset and angle, and a variable repetition. # This is used in the magnetism experiments where many repetitions are # necessary for each of the angles. 2 different JJ positions are # usually used in this kind of experiments, which allows to set the # two different circular polarizations (right and left) elif variable == "repetition" and jj: dates_samples_energies_jjs_angles = [] for record in file_records: dates_samples_energies_jjs_angles.append( (record["date"], record["sample"], record["energy"], record["jj_u"], record["jj_d"], record["angle"])) dates_samples_energies_jjs_angles = list( set(dates_samples_energies_jjs_angles)) for date_sample_energy_jj_angle in dates_samples_energies_jjs_angles: date = date_sample_energy_jj_angle[0] sample = date_sample_energy_jj_angle[1] energy = date_sample_energy_jj_angle[2] jj_u = date_sample_energy_jj_angle[3] jj_d = date_sample_energy_jj_angle[4] angle = date_sample_energy_jj_angle[5] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.jj_u == jj_u) & (files_query.jj_d == jj_d) & (files_query.angle == angle)) h5_records = file_index_db.search(query_cmd) # pobj = pprint.PrettyPrinter(indent=4) # print("group for align") # for rec in h5_records: # pobj.pprint(rec["filename"]) _get_couples_to_align(couples_to_align, h5_records, root_path) elif variable == "repetition" and not jj: dates_samples_energies = [] for record in file_records: dates_samples_energies.append( (record["date"], record["sample"], record["energy"])) dates_samples_energies = list(set(dates_samples_energies)) for date_sample_energy in dates_samples_energies: date = date_sample_energy[0] sample = date_sample_energy[1] energy = date_sample_energy[2] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy)) h5_records = file_index_db.search(query_cmd) # pobj = pprint.PrettyPrinter(indent=4) # print("group for align") # for rec in h5_records: # pobj.pprint(rec["filename"]) _get_couples_to_align(couples_to_align, h5_records, root_path) if couples_to_align: Parallel(n_jobs=cores, backend="multiprocessing")( delayed(align_and_store_from_fn)( couple_to_align, dataset_reference=dataset_reference, dataset_for_aligning=dataset_for_aligning, align_method=align_method, roi_size=roi_size) for couple_to_align in couples_to_align) print("--- Align %d files took %s seconds ---\n" % (n_files, (time.time() - start_time))) db.close()
def __init__(self, Chat_class, token, db_filename): self.bot = telepot.Bot(token) self.db = TinyDB(db_filename, storage=CachingMiddleware(JSONStorage)) self.chat = Chat_class(self.bot, self.db, 0) self.Chat_class = Chat_class
def __init__(self): self.c = TinyDB('cache.json', storage=CachingMiddleware(JSONStorage)) obj = getattr(self.c, "_storage") # 把这个值设置的比较小,可以让数据随时同步到disk,虽然这个方法有点扯 obj.WRITE_CACHE_SIZE = 1 self.item = namedtuple('item', ("is_dir", 'name', "st_mtime", "size", 'permission'))
def __init__(self, table_name='simulation'): self.logger = logging.getLogger(__name__) self.db_path = os.environ['DB_FILEPATH'] self.db = TinyDB(self.db_path, storage=CachingMiddleware(JSONStorage)) self.table = self.db.table(table_name)
def get_database(): path = os.path.join(os.path.expanduser('~'), ".whatportis_db.json") return TinyDB(path, storage=CachingMiddleware(JSONStorage))
coloredlogs.install( level='INFO', fmt= '%(asctime)s %(filename)s[%(lineno)d]:%(funcName)s %(levelname)s %(message)s' ) logging.info("Yeah") ### CHECK GPU print(tf.test.gpu_device_name()) print(device_lib.list_local_devices()) ### LOAD DATABASE path = os.getenv('T1DPATH', '../') db_path = path + 'data/tinydb/db1.json' db = TinyDB(db_path, storage=CachingMiddleware(JSONStorage)) print("database loaded with {} entries".format(len(db))) ### PREPARE DATA def convert_to_int(x): try: return float(x) except: logging.error("error {}".format(x)) def sort_index(item): item.index = list(map(lambda x: convert_to_int(x), item.index)) return item.sort_index()
def get_db(): if 'db' not in g: g.db = TinyDB(DATABASE, storage=CachingMiddleware(JSONStorage)) return g.db
def __init__(self, username): # Make a database connection and return it self.db = TinyDB(ROOT_DIR / "scripts" / "database" / "db.json", indent=2, storage=CachingMiddleware(JSONStorage)) self.user = Query() self.username = username self.result_dir = ROOT_DIR / "scripts" / "results" / username
def average_image_groups(file_index_fn, table_name="hdf5_proc", dataset_for_averaging="data", variable="zpz", description="", dataset_store="data", date=None, sample=None, energy=None, cores=-2, jj=True): """Average images of one experiment by zpz. If date, sample and/or energy are indicated, only the corresponding images for the given date, sample and/or energy are processed. The average of the different groups of images will be done in parallel: all cores but one used (Value=-2). All data images of the same angle, for the different ZPz are averaged. """ """ TODO: In the future it should be made available, the average by variable == repetition and just after by variable == zpz. Finally this three features should exist: - average by same angle and different zpz positions (DONE) - average by same angle, same zpz and different repetition (ONGOING) - average by same angle, first by same zpz and different repetition, and afterwards by same angle and different zpz positions (TODO) """ start_time = time.time() root_path = os.path.dirname(os.path.abspath(file_index_fn)) file_index_db = TinyDB(file_index_fn, storage=CachingMiddleware(JSONStorage)) db = file_index_db if table_name is not None: file_index_db = file_index_db.table(table_name) files_query = Query() file_index_db = filter_file_index(file_index_db, files_query, date=date, sample=sample, energy=energy, ff=False) all_file_records = file_index_db.all() n_files = len(all_file_records) averages_table = db.table("hdf5_averages") averages_table.purge() groups_to_average = [] if variable == "zpz": dates_samples_energies_angles = [] for record in all_file_records: dates_samples_energies_angles.append( (record["date"], record["sample"], record["energy"], record["angle"])) dates_samples_energies_angles = list( set(dates_samples_energies_angles)) for date_sample_energy_angle in dates_samples_energies_angles: date = date_sample_energy_angle[0] sample = date_sample_energy_angle[1] energy = date_sample_energy_angle[2] angle = date_sample_energy_angle[3] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.angle == angle)) img_records = file_index_db.search(query_cmd) num_zpz = len(img_records) central_zpz = 0 for img_record in img_records: central_zpz += img_record["zpz"] central_zpz /= round(float(num_zpz), 1) files = get_file_paths(img_records, root_path) central_zpz_with_group_to_average = [central_zpz] group_to_average = [] for file in files: group_to_average.append(file) central_zpz_with_group_to_average.append(group_to_average) central_zpz_with_group_to_average.append(date_sample_energy_angle) groups_to_average.append(central_zpz_with_group_to_average) elif variable == "repetition" and jj: dates_samples_energies_jjs_angles = [] for record in all_file_records: dates_samples_energies_jjs_angles.append( (record["date"], record["sample"], record["energy"], record["jj_u"], record["jj_d"], record["angle"])) dates_samples_energies_jjs_angles = list( set(dates_samples_energies_jjs_angles)) for date_sample_energy_jj_angle in dates_samples_energies_jjs_angles: date = date_sample_energy_jj_angle[0] sample = date_sample_energy_jj_angle[1] energy = date_sample_energy_jj_angle[2] jj_u = date_sample_energy_jj_angle[3] jj_d = date_sample_energy_jj_angle[4] angle = date_sample_energy_jj_angle[5] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy) & (files_query.jj_u == jj_u) & (files_query.jj_d == jj_d) & (files_query.angle == angle)) img_records = file_index_db.search(query_cmd) num_repetitions = len(img_records) files = get_file_paths(img_records, root_path) complete_group_to_average = [num_repetitions] group_to_average = [] for file in files: group_to_average.append(file) complete_group_to_average.append(group_to_average) complete_group_to_average.append(date_sample_energy_jj_angle) groups_to_average.append(complete_group_to_average) elif variable == "repetition" and not jj: dates_samples_energies = [] for record in all_file_records: dates_samples_energies.append( (record["date"], record["sample"], record["energy"])) dates_samples_energies = list(set(dates_samples_energies)) for date_sample_energy in dates_samples_energies: date = date_sample_energy[0] sample = date_sample_energy[1] energy = date_sample_energy[2] # Raw image records by given date, sample and energy query_cmd = ((files_query.date == date) & (files_query.sample == sample) & (files_query.energy == energy)) img_records = file_index_db.search(query_cmd) num_repetitions = len(img_records) files = get_file_paths(img_records, root_path) complete_group_to_average = [num_repetitions] group_to_average = [] for file in files: group_to_average.append(file) complete_group_to_average.append(group_to_average) complete_group_to_average.append(date_sample_energy) groups_to_average.append(complete_group_to_average) if groups_to_average[0][1]: records = Parallel(n_jobs=cores, backend="multiprocessing")( delayed(average_and_store)( group_to_average, dataset_for_averaging=dataset_for_averaging, variable=variable, description=description, dataset_store=dataset_store, jj=jj) for group_to_average in groups_to_average) averages_table.insert_multiple(records) print("--- Average %d files by groups, took %s seconds ---\n" % (n_files, (time.time() - start_time))) # import pprint # pobj = pprint.PrettyPrinter(indent=4) # print("----") # print("average records") # for record in records: # pobj.pprint(record) db.close()
def get_database(): return TinyDB(path, storage=CachingMiddleware(JSONStorage))
def storage(): return CachingMiddleware(MemoryStorage)()
def test_caching_read(): db = TinyDB(storage=CachingMiddleware(MemoryStorage)) assert db.all() == []
from tinydb import Query, TinyDB from tinydb.middlewares import CachingMiddleware from tinydb.storages import JSONStorage from config import AppConfig from db.middleware import ReadOnlyMiddleware app = Flask(__name__) app.config.from_object(AppConfig) sslify = SSLify(app, permanent=True, subdomains=True) gistdb = TinyDB( "db/gistdb.json", default_table="gist", storage=ReadOnlyMiddleware(CachingMiddleware(JSONStorage)) ) @app.context_processor def url_for_webpack_asset_processor(): def url_for_webpack_asset(asset_name): with app.open_resource( "static/dist/webpack-assets-manifest.json" ) as file: manifest = json.load(file) return url_for( "static", filename="dist/{0}".format(manifest.get(asset_name)) )
def __init__(self, db='debug.json'): self.db = TinyDB(db, storage=CachingMiddleware(JSONStorage))
from tinydb import TinyDB, Query from tinydb.storages import JSONStorage from tinydb.middlewares import CachingMiddleware db = TinyDB("db_hardware.json", storage=CachingMiddleware(JSONStorage)) db.purge() axis_table = db.table("axis") axis_table.purge() axis_table.insert_multiple([ { "id": "x", "limit": 450, "feed_rate": 50000, "acc": 1500, "park": 5 }, { "id": "y", "limit": 400, "feed_rate": 50000, "acc": 1500, "park": 5 }, { "id": "z", "limit": 120, "feed_rate": 50000, "acc": 1500, "park": 59 },
def tests(): # Create TinyDB database and insert information db_tiny = TinyDB('tiny.json') queryTiny = Query() count = 0 print("\nadding information into TinyDB...\n") while count < 5000: db_tiny.insert({'id': count, 'text': str(random.random())}) count = count + 1 print("adding information into TinyDB complete\n") # Create SQLite database and insert information db_lite = sqlite3.connect('lite.db') queryLite = db_lite.cursor() queryLite.execute('''CREATE TABLE Strings (id int PRIMARY KEY,text text)''') print("adding information into sqlite...\n") count = 0 while count < 5000: queryLite.execute("INSERT INTO Strings (id, text) VALUES (" + str(count) + "," + str(random.random()) + ")") count = count + 1 print("adding information into sqlite complete\n") db_lite.commit() # First test queries print("First test\n") start_time = time.clock() db_tiny.search(queryTiny.id == 4800) # Query to TinyDB timeTiny = (time.clock() - start_time) queryLite.execute('''Select * FROM Strings WHERE id=4800''') timeLite = ((time.clock() - start_time) - timeTiny) print(timeTiny) print(timeLite) # Create TinyDB database with CachingMiddleware and insert information db_tiny_middle = TinyDB('tiny_middleware.json', storage=CachingMiddleware(JSONStorage)) queryTinyMiddle = Query() count = 0 print("\nadding information into TinyDB CachingMiddleware...\n") while count < 5000: db_tiny_middle.insert({'text': str(random.random()), 'id': count}) count = count + 1 print("adding information into TinyDB CachingMiddleware complete\n") # Second test queries print("Second test\n") start_time = time.clock() db_tiny_middle.search(queryTinyMiddle.id == 4800) # Query to TinyDB timeTiny_2 = (time.clock() - start_time) queryLite.execute('''Select * FROM Strings WHERE id=4800''') timeLite_2 = ((time.clock() - start_time) - timeTiny_2) db_lite.close() return [ timeTiny * 1000, timeLite * 1000, timeTiny_2 * 1000, timeLite_2 * 1000 ]
import sys import os from tinydb import TinyDB, Query from tinydb.storages import JSONStorage from tinydb.middlewares import CachingMiddleware db = TinyDB("db_jobs.json", storage=CachingMiddleware(JSONStorage)) def dict_builder(keys, values): d = {} for i in range(0, len(keys)): try: d[keys[i]] = values[i] except Exception as e: print(keys) print(values) print(e) raise e return d def load(file): components = [] with open(file) as f: fields = ['id', 'x', 'y', 'rotation', 'value', 'package'] for line in f: print(line.strip()) d = dict_builder( fields, line.strip().replace(' ', ' ').replace(' ', ' ').split(',')) x = d.pop('x')
def __init__(self): self.db = TinyDB('db.json', storage=CachingMiddleware(JSONStorage)) self.players = self.db.table('players')
def test_access_storage(): assert isinstance(TinyDB(storage=MemoryStorage).storage, MemoryStorage) assert isinstance( TinyDB(storage=CachingMiddleware(MemoryStorage)).storage, CachingMiddleware)
path = '/home/eckel/data/dataset/archive/' database_path = '/home/eckel/master/finale/database/' domain = sys.argv[1] files = [f for f in os.listdir(path) if domain.lower() in f] print(files) f = open('/home/eckel/data/quad-stats/producturls.csv', 'r') reader = csv.reader(f, delimiter=',') product_urls = {line[0]: line[1:len(line)] for line in reader} max_sample_size = 10000 current_sample_size = 0 number_of_files = len(files) samples_per_file = max_sample_size / number_of_files db = TinyDB(database_path + domain + '.json', storage=CachingMiddleware(JSONStorage)) CachingMiddleware.WRITE_CACHE_SIZE = 500 for file in files: print(file) file_path = path + file cur_num = 0 curr_products = product_urls[domain.lower() + '.com'] curr_products_html = {} curr_nonproducts_html = {} with gzip.open(file_path, mode='rb') as gzf: for record in warc.WARCFile(fileobj=gzf): url = record['WARC-Target-URI'].strip() html = record.payload.read() if url in curr_products: curr_products_html[url] = html
with ThreadPoolExecutor(max_workers=8) as p: future_tasks = [p.submit(Requests, i) for i in urls] result = [obj.result() for obj in future_tasks if obj.result() is not None] return result from tinydb import TinyDB, where from tinydb.storages import JSONStorage from tinydb.middlewares import CachingMiddleware from collections import namedtuple import os Port = namedtuple("Port", ["name", "port", "protocol", "description"]) __BASE_PATH__ = os.path.dirname(os.path.abspath(__file__)) __DATABASE_PATH__ = os.path.join(__BASE_PATH__, 'ports.json') __DB__ = TinyDB(__DATABASE_PATH__, storage=CachingMiddleware(JSONStorage)) def GetPortInfo(port, like=False): """ 判断端口服务,传入参数为 字符串类型的数字 返回服务名称 'http',没有则返回 '检测失效' """ where_field = "port" if port.isdigit() else "name" if like: ports = __DB__.search(where(where_field).search(port)) else: ports = __DB__.search(where(where_field) == port) try: return ports[0]['name'] # flake8: noqa (F812)
(Query()['InChI'] == identifier) | (Query()['InChIKey'] == identifier) | (Query()['Original_SMILES'] == identifier) | (Query()['Structure_SMILES'] == identifier) | (Query()['Canonical_SMILES'] == identifier) | (Query()['Isomeric_SMILES'] == identifier) ) return [(record.doc_id, record) for record in results] if __name__ == "__main__": # Access local tinyDB db = TinyDB('src/data/tinydb_db.json', sort_keys=True, indent=4, storage=CachingMiddleware(JSONStorage)) # Using caching for faster performance try: identifiers = [ # '64-19-7', # acetic acid >>> pKa = 4.76 at 25 °C # '2950-43-8', # Hydroxylamine-O-sulfonic acid, no result # 'InChI=1S/C6H6S/c7-6-4-2-1-3-5-6/h1-5,7H', # thiophenol # 'C1=CC=C(C=C1)O', # phenol >>> pKa = 9.9 '596-09-8', '141-78-6', '110-82-7', '67-63-0', '75-09-2', ] for identifier in identifiers: