Example #1
0
def load_into_sqlite():

    # Update data by id
    by_id = data_by_id()

    with _LogTime("loading data into SQLite"):
        d = sqlitedict.SqliteDict(SQLITE_DB,
                                  table=SQLITE_BY_ID_TABLE,
                                  init=by_id)
    with _LogTime("computing prefixes"):
        pd = {}
        td = {}
        n = 0
        for record in by_id.itervalues():
            (ps, ts) = prefixes_and_tokens(record)
            v = record["id"]
            for p in ps:
                if not pd.has_key(p):
                    pd[p] = set()
                pd[p].add(v)
            for t in ts:
                if not td.has_key(t):
                    td[t] = set()
                td[t].add(v)
            n += 1
            if not n % 10000:
                print n

    with _LogTime("writing prefixes to SQLite"):
        pds = sqlitedict.SqliteDict(SQLITE_DB, table="prefixes", init=pd)
        tds = sqlitedict.SqliteDict(SQLITE_DB, table="tokens", init=td)
Example #2
0
    def test_readonly(self):
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = sqlitedict.SqliteDict(filename=fname)
        orig_db['key'] = 'value'
        orig_db['key_two'] = 2
        orig_db.commit()
        orig_db.close()

        readonly_db = sqlitedict.SqliteDict(filename=fname, flag='r')
        self.assertTrue(readonly_db['key'] == 'value')
        self.assertTrue(readonly_db['key_two'] == 2)

        def attempt_write():
            readonly_db['key'] = ['new_value']

        def attempt_update():
            readonly_db.update(key='value2', key_two=2.1)

        def attempt_delete():
            del readonly_db['key']

        def attempt_clear():
            readonly_db.clear()

        def attempt_terminate():
            readonly_db.terminate()

        attempt_funcs = [
            attempt_write, attempt_update, attempt_delete, attempt_clear,
            attempt_terminate
        ]

        for func in attempt_funcs:
            with self.assertRaises(RuntimeError):
                func()
Example #3
0
def user_page_crawl():
    res = []
    pool = Pool(8)
    idxd = sqlitedict.SqliteDict('./idx_db.db')
    upgd = sqlitedict.SqliteDict('./upg_db.db', autocommit=True)

    def get_upg(uid, upgd):
        jo = get_uid_page(uid)
        upgd[source] = jo
        print
        dump_json(len(jo['data']), i=2)

    for k, idx_js in idxd.items():
        rj = idx_js
        source = rj['source']
        print
        k, dump_json(source)
        # print dump_json(rj,i=2)
        # print rj.keys()
        try:
            uid = rj['media_url'].split('/')[-2]
        except:
            continue
        pool.spawn(get_upg, uid, upgd)
    pool.join()
Example #4
0
    def test_default_reuse_existing_flag_c(self):
        """Re-opening of a database does not destroy it."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = sqlitedict.SqliteDict(filename=fname)
        orig_db['key'] = 'value'
        orig_db.commit()
        orig_db.close()

        next_db = sqlitedict.SqliteDict(filename=fname)
        self.assertIn('key', next_db.keys())
        self.assertEqual(next_db['key'], 'value')
Example #5
0
def match(q, limit=None):
    results = []
    bp = sqlitedict.SqliteDict(SQLITE_DB, table="prefixes")
    bi = sqlitedict.SqliteDict(SQLITE_DB, table="shareable_medication_by_id")
    bt = sqlitedict.SqliteDict(SQLITE_DB, table="tokens")
    #bp = _bp()
    #bi = _bi()
    tokens = tokenize(q)
    for t in tokens[:-1]:
        # Skip tokens of length 1 because the list is too long
        # TODO: In the future, have a globally popular list of
        # ~200 things for each letter to return instead of the
        # overwhelmingly huge list that would be returned now
        if len(t) < 2:
            continue

        # Take the ones where the second part of the tuple is True
        # which means that the string matches a complete token, i.e.
        # "viagra" not "via" (which would just be the beginning of a string)
        results.append(bt[t])

    # For the last token, we take both partial tokens and complete tokens,
    # i.e. "via" and "viagra" both match something that has "viagra" in it
    # TODO: In the future, make it so its not always the last token that
    # is partial but wherever the cursor is
    if len(tokens[-1]) >= 2:
        results.append(bp[tokens[-1]])

    if not results:
        results.append(set())

    rr1 = set.intersection(*results)

    # If the matching we've done doesn't have any hits, then
    # fallback to looking at the first things you type
    if not limit or len(rr1) < limit:
        r2 = []
        for t in tokens:
            if len(t) >= 2:
                r2.append(bp[t])
        if not r2:
            r2.append(set())
        rr2 = set.intersection(*r2)
        print "rr2=%r" % rr2
        rr2.difference_update(rr1)
    else:
        rr2 = set()

    rr = [bi[x] for x in rr1] + [bi[x] for x in rr2]
    if limit is None:
        return rr
    else:
        return rr[:limit]
Example #6
0
    def test_overwrite_using_flag_n(self):
        """Re-opening of a database with flag='c' destroys it all."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = sqlitedict.SqliteDict(filename=fname, tablename='sometable')
        orig_db['key'] = 'value'
        orig_db.commit()
        orig_db.close()

        # verify,
        next_db = sqlitedict.SqliteDict(filename=fname,
                                        tablename='sometable',
                                        flag='n')
        self.assertNotIn('key', next_db.keys())
def stat_feed():
    from collections import Counter
    cnt = Counter()
    idxd = sqlitedict.SqliteDict('./idx_db.db')
    upgd = sqlitedict.SqliteDict('./upg_db.db')
    for k, idx_js in upgd.items():
        print dump_json(k.decode('utf8')), idx_js.keys()
    for k, idx_js in idxd.items():
        uj = idx_js
        # cnt[dump_json([uj['source'],uj['api_meta'][0]])]+=1
        cnt[dump_json([uj['api_meta'][0]])] += 1
        # cnt[dump_json([uj['api_meta']])]+=1
    for k, v in cnt.most_common(3):
        print k, v
    print 'category_cnt', len(cnt), 'item_cnt', len(idxd)
Example #8
0
    def test_readonly(self):
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = sqlitedict.SqliteDict(filename=fname)
        orig_db['key'] = 'value'
        orig_db.commit()
        orig_db.close()

        readonly_db = sqlitedict.SqliteDict(filename=fname, flag='r')
        self.assertTrue(readonly_db['key'] == 'value')

        def attempt_write():
            readonly_db['key'] = ['new_value']

        with self.assertRaises(RuntimeError):
            attempt_write()
Example #9
0
    def check_length(self):
        # Load the current sqlitedict
        db = sqlitedict.SqliteDict(self.db_name, 'iterations')

        # Get the number of current iterations
        # Minus one because OpenMDAO uses 1-indexing
        self.num_iters = int(db.keys()[-1].split('|')[-1])
Example #10
0
def cache_topics(rss):
    """Add topics to cache file."""
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                        CACHE_FILE)
    with sqlitedict.SqliteDict(path) as db:
        feed_link = rss['args'].source
        dates = db.get(feed_link, {})

        for topic in rss['topics']:
            date = dateutil.parser.parse(topic['date'])
            date = date.strftime('%Y%m%d')

            news = dates.get(date, {})

            link = topic['link']
            if link in news:
                logging.info('Topic with link {} already cached'.format(link))
                continue

            logging.info('Cache topic with link {} from {}'.format(
                link, feed_link))
            news[link] = topic
            dates[date] = news

        db[feed_link] = dates
        db.commit()
Example #11
0
    def __init__(self):
        super().__init__()
        self.setupUi()
        self.dictionarydb = sqlitedict.SqliteDict("DBS2.db", autocommit=True)
        self.Database = self.dictionarydb.get('Data', [])

        "self.dictionarydb.clear() #database clear"
Example #12
0
    def openfile(cls, fpath, tablename='vntree0', flag='c'):
        """Class method that opens (load) a vn4 (sqlite) file.

        :param fpath: the file path for the vn4 file. 
        :type fpath: str         
        :returns: root node of tree or `False` if failure. 
        :rtype: Node or bool
        """
        if not os.path.isfile(fpath):
            logger.error("%s.openfile: arg `fpath`=«%s» not valid." % (cls.__name__, fpath))
            return False
        try:
            #with sqlitedict.SqliteDict(fpath, encode=sqlitedict_encode, decode=sqlitedict_decode) as _vndict:
            with sqlitedict.SqliteDict(fpath, tablename=tablename, flag=flag) as _vndict:
                print(f"_vndict={_vndict}")
                pkldata = _vndict["_vntree"]
                #rootnode = _vndict["_vntree"] 
            #print(f"type(pkldata)={type(pkldata)}")
            #print(pkldata)
            rootnode = cls(treedict=pkldata)
            rootnode._vntree_fpath = os.path.abspath(fpath)
        except Exception as err:
            logger.error("%s.openfile: data in file «%s» not valid: %s" % (cls.__name__, fpath, err))
            return False
        for _n in rootnode:
            _n.load_data()
        return rootnode 
Example #13
0
def main(quiet=True):
    try:
        get_ipython
    except NameError:
        nested = 0
        cfg = load_default_config()
        cfg.TerminalInteractiveShell.prompts_class = CustPrompt
    else:
        print 'Running nested copies of IPython. Augmenting configuration...'
        cfg = load_default_config()
        nested = 1

    from IPython.terminal.embed import InteractiveShellEmbed

    cfg.TerminalInteractiveShell.confirm_exit = False
    cfg.TerminalInteractiveShell.debug = True

    ipshell = InteractiveShellEmbed.instance(
        config=cfg, banner1='Welcome to the sqlenv IPython Shell...\n')

    # Setup sqlitedict as `sqdb`
    sqdb_path = os.path.join(os.path.abspath('.'), '.local_db.sqlite')
    sqdb = sqlitedict.SqliteDict(sqdb_path)

    ipshell()
Example #14
0
def read_sqlite_dict_file(filepath, *, with_dill=False, **kwargs):
    if with_dill:
        sqlitedict = tricks.module_sqlitedict_with_dill(dill_detect_trace=True)
    else:
        import sqlitedict
    with sqlitedict.SqliteDict(filepath, **kwargs) as sd:
        return dict(sd)
Example #15
0
    def test_irregular_tablenames(self):
        """Irregular table names need to be quoted"""
        db = sqlitedict.SqliteDict(':memory:', tablename='9nine')
        db['key'] = 'value'
        db.commit()
        self.assertEqual(db['key'], 'value')
        db.close()

        db = sqlitedict.SqliteDict(':memory:', tablename='outer space')
        db['key'] = 'value'
        db.commit()
        self.assertEqual(db['key'], 'value')
        db.close()

        with self.assertRaisesRegexp(ValueError, r'^Invalid tablename '):
            sqlitedict.SqliteDict(':memory:', '"')
Example #16
0
 def test_commit_nonblocking(self):
     """Coverage for non-blocking commit."""
     # given,
     with sqlitedict.SqliteDict(autocommit=True) as d:
         # exercise: the implicit commit is nonblocking
         d['key'] = 'value'
         d.commit(blocking=False)
Example #17
0
 def test_with_statement(self):
     """Verify using sqlitedict as a contextmanager . """
     with sqlitedict.SqliteDict() as d:
         self.assertTrue(isinstance(d, sqlitedict.SqliteDict))
         self.assertEqual(dict(d), {})
         self.assertEqual(list(d), [])
         self.assertEqual(len(d), 0)
Example #18
0
 def set_plan_dict(self, plan_dict):
     id_curr = max([int(k) for k in plan_dict])
     with sqlitedict.SqliteDict(filename=self.filename) as sqldict:
         sqldict['plan_dict'] = plan_dict
         sqldict['id_curr'] = id_curr + 1
         sqldict.commit()
     self.plan_dict_cache = None
Example #19
0
def index_crawl():
    pool = Pool(8)
    cols = 'news_finance,news_entertainment,news_tech,news_game,news_sports,news_travel,news_car,news_hot,news_military,news_fashion,news_history,news_world,news_discovery,news_regime,news_baby,news_essay'.split(
        ',')
    car_cols = 'car_new_arrival,SUV,car_guide,car_usage'
    idxd = sqlitedict.SqliteDict('./idx_db.db', autocommit=True)

    def fetch_one_col(col, i, idxd):
        try:
            maxhot = str(ts2unix(get_date()) - i * 2000)
            jo = get_index_page(col, maxhot)
            ilist = extract_index_user_list(jo)
            for art in jo['data']:
                key = 'idx_%s' % (art['item_id'])
                art['api_meta'] = [col, maxhot]
                idxd[key] = art
        except Exception as e:
            print
            e.message, col

    for col in cols[:]:
        for i in range(0, 90):
            pool.spawn(fetch_one_col, col, i, idxd)
    pool.join()
    idxd.close()
Example #20
0
 def getDB(self):
     '''
         returns and initialize sigleton instance of db
     '''
     if not self.__created :
         self.__db=sql.SqliteDict(self.__dbpath,autocommit=True)
         self.__created=True
     return self.__db
Example #21
0
 def save(self, id, value):
     plan_dict = self.plan_dict
     if id in plan_dict:
         with sqlitedict.SqliteDict(filename=self.filename) as sqldict:
             sqldict[id] = {'id': id, 'x': plan_dict[id], 'value': value}
             sqldict.commit()
     else:
         raise AttributeError(f'Такого ключа для сохранения нет {id}')
Example #22
0
 def test_directory_notfound(self):
     """Verify RuntimeError: directory does not exist."""
     # given: a non-existent directory,
     folder = tempfile.mkdtemp(prefix='sqlitedict-test')
     os.rmdir(folder)
     # exercise,
     with self.assertRaises(RuntimeError):
         sqlitedict.SqliteDict(filename=os.path.join(folder, 'nonexistent'))
Example #23
0
    def check_length(self):
        # Load the current sqlitedict
        db = sqlitedict.SqliteDict(self.db_name, 'iterations')
        cr = self.case_reader = SqliteCaseReader(self.db_name)

        # Get the number of current iterations
        # Minus one because OpenMDAO uses 1-indexing
        self.num_iters = int(cr.driver_cases.list_cases()[-1].split('|')[-1])
Example #24
0
def process(dbname, path, options):
    with sqlitedict.SqliteDict(dbname, autocommit=True) as db:
        if is_tar_gzip(path):
            return process_tgz(db, path, options)
        elif os.path.isfile(path):
            return process_file(db, path, options)
        elif os.path.isdir(path):
            raise NotImplementedError()
Example #25
0
    def populate_db(self, ids: np.array, sents: np.array):
        db_file = p.abspath(self.sub_dir / f'{self.seed_name}.sqlite')
        id_to_sent = sqld.SqliteDict(db_file, autocommit=True)

        for i in range(ids.shape[0]):
            id_to_sent[str(ids[i])] = str(sents[i])

        id_to_sent.close()
Example #26
0
 def __init__(self, path, *, compressed=True):
     self.compressed = compressed
     tablename = 'responses_gzip' if compressed else 'responses'
     self.db = sqlitedict.SqliteDict(path,
                                     tablename=tablename,
                                     autocommit=True,
                                     encode=self.encode,
                                     decode=self.decode)
Example #27
0
 def __enter__(self):
     try:
         import sqlitedict
     except ImportError:
         error('failed to import sqlitedict; try `pip3 install sqlitedict`')
         raise
     self.db = sqlitedict.SqliteDict(self.dbname, autocommit=True)
     return self
Example #28
0
 def test_with_statement(self):
     ''' test_with_statement
     '''
     with sqlitedict.SqliteDict() as d:
         self.assertTrue(isinstance(d, sqlitedict.SqliteDict))
         self.assertEqual(dict(d), {})
         self.assertEqual(list(d), [])
         self.assertEqual(len(d), 0)
Example #29
0
 def _get_linkdb(self, autocommit=False):
     return sqlitedict.SqliteDict(
         filename=str(self._conf_root / 'glink.db'),
         tablename='links',
         journal_mode='WAL',
         autocommit=autocommit,
         encode=partial(json.dumps, ensure_ascii=False),
         decode=json.loads
     )
Example #30
0
 def test_as_str(self):
     """Verify SqliteDict.__str__()."""
     # given,
     db = sqlitedict.SqliteDict()
     # exercise
     db.__str__()
     # test when db closed
     db.close()
     db.__str__()