Exemple #1
0
    def assertMetadataRecorded(self, expected):
        if self.comm.rank != 0:
            return

        db = SqliteDict(self.filename, self.tablename_metadata)
        _assertMetadataRecorded(self, db, expected)
        db.close()
Exemple #2
0
Fichier : efa.py Projet : deti/efa
def adjust_evernote_font():
    """
    Call for Evernote
    """
    note_info = SqliteDict(conf.db.db_file, autocommit=True)

    notes_in_evernote = list()
    for note in get_notes(get_notebooks()):
        guid = note.guid
        notes_in_evernote.append(guid)
        if guid not in note_info.keys() \
                or note_info[guid][FONT_SIZE] != conf.font_size \
                or note_info[guid][LINE_HEIGHT] != conf.line_height:
            adjust_note(note)
            note_info[guid] = {FONT_SIZE: conf.font_size,
                               LINE_HEIGHT: conf.line_height}

    guids_to_forget = [guid for guid in note_info.keys()
                       if guid not in notes_in_evernote]

    for guid in guids_to_forget:
        logging.debug("Delete guid from DB: {}".format(guid))
        del note_info[guid]

    note_info.close()
Exemple #3
0
    def __init__(self, bucket_name, storage_path=None):
        ''' Bucker init

        - if the bucket exists, meta parameter will be ignored

        '''
        if bucket_name and isinstance(bucket_name, (str, unicode)) and re.match(r"^[a-z0-9\.\-_]+$", bucket_name, re.I):
            self._name = bucket_name.strip()
        else:
            raise falcon.HTTPInvalidParam(
                "The parameter shall contain only alpha-numeric characters, value: '%s'" % bucket_name, 
                param_name='name'
            )

        self._bucket_path = None
        if storage_path and os.path.exists(storage_path):
            self._bucket_path = os.path.join(storage_path, self._name)
        else:
            raise falcon.HTTPInternalServerError(
                title='IncorrectStoragePath',
                description='The storage path is incorrect, "%s"' % storage_path
            )

        if self._bucket_path and os.path.exists(self._bucket_path):
            self._meta = SqliteDict(os.path.join(self._bucket_path,'metadata.sqlite'), 'bucket', autocommit=True)
        else:
            self._meta = SqliteDict(':memory:', 'bucket', autocommit=True)
Exemple #4
0
def _import_sql_data(data_dir):
    file_path = os.path.join(data_dir, DATA_FILE)

    # Find out what format we have
    with sqlite3.connect(file_path) as conn:
        try:
            conn.execute('select count(*) from zipgun_info')
            zipgun_info = SqliteDict(file_path, tablename='zipgun_info')
            version = zipgun_info.get('version', 0)
        except sqlite3.OperationalError:
            version = 0

    if version == 0:
        country_postal_codes = SqliteDict(file_path)
    elif version == 1:
        country_postal_codes = {}
        for country_code in zipgun_info['country_codes']:
            if country_code in country_postal_codes:
                raise ValueError('Duplicate entry found for {}'.format(
                    country_code))
            country_postal_codes[country_code] = SqliteDict(
                file_path, tablename='zg_{}'.format(country_code),
                journal_mode='OFF')
        zipgun_info.close()
    else:
        raise ValueError('Unknown data file version {}'.format(version))
    return country_postal_codes
Exemple #5
0
def _persist_v0(file_path, zg):
    print 'Creating db...'
    persisted = SqliteDict(file_path, autocommit=False)
    print 'Updating data...'
    persisted.update(zg.country_postal_codes)
    print 'Committing data...'
    persisted.commit()
Exemple #6
0
    def assertIterationDataRecorded(self, expected, tolerance, root):
        if self.comm.rank != 0:
            return

        db = SqliteDict(self.filename, self.tablename_iterations)
        _assertIterationDataRecorded(self, db, expected, tolerance)
        db.close()
Exemple #7
0
def main(data_dir):
    print 'Loading data...'
    zg = Zipgun(data_dir, force_text=True)
    print 'Creating db...'
    persisted = SqliteDict(os.path.join(data_dir, DATA_FILE), autocommit=False)
    print 'Updating data...'
    persisted.update(zg.country_postal_codes)
    print 'Committing data...'
    persisted.commit()
 def test_reopen_conn(self):
     """Verify using a contextmanager that a connection can be reopened."""
     fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
     db = SqliteDict(filename=fname)
     with db:
         db['key'] = 'value'
         db.commit()
     with db:
         db['key'] = 'value'
         db.commit()
    def test_tablenames(self):
        fname = norm_file('tests/db/tablenames-test-1.sqlite')
        SqliteDict(fname)
        self.assertEqual(SqliteDict.get_tablenames(fname), ['unnamed'])

        fname = norm_file('tests/db/tablenames-test-2.sqlite')
        with SqliteDict(fname,tablename='table1') as db1:
            self.assertEqual(SqliteDict.get_tablenames(fname), ['table1'])
        with SqliteDict(fname,tablename='table2') as db2:
            self.assertEqual(SqliteDict.get_tablenames(fname), ['table1','table2'])
        
        tablenames = SqliteDict.get_tablenames('tests/db/tablenames-test-2.sqlite')
        self.assertEqual(tablenames, ['table1','table2'])
Exemple #10
0
def basic_usage():
    """SqliteDict引擎会将任意的value转换为
    """
    mydict = SqliteDict("test.sqlite", autocommit=True)
    mydict["integer_value"] = 1
    mydict["real_value"] = 2.2
    mydict["text_value"] = "abc"
    mydict["date_value"] = date.today()
    mydict["datetime_value"] = datetime.now()
    
    # if you don't use with SqliteDict("test.sqlite") as mydict: ...
    # you have to close the connection explicitly
    mydict.close() 
Exemple #11
0
    def assertDatasetEquals(self, expected, tolerance):
        # Close the file to ensure it is written to disk.
        self.recorder.close()
        # self.recorder.out = None

        sentinel = object()

        db = SqliteDict( self.filename, self.tablename )


        for coord, expect in expected:
            iter_coord = format_iteration_coordinate(coord)

            groupings = (
                ("Parameters", expect[0]),
                ("Unknowns", expect[1]),
                ("Residuals", expect[2])
            )

            #### Need to get the record with the key of 'iter_coord'
            actual_group = db[iter_coord]
            timestamp = actual_group['timestamp']

            self.assertTrue(self.t0 <= timestamp and timestamp <= self.t1 )

            for label, values in groupings:
                actual = actual_group[label]
                # If len(actual) == len(expected) and actual <= expected, then
                # actual == expected.
                self.assertEqual(len(actual), len(values))
                for key, val in values:
                    found_val = actual.get(key, sentinel)
                    if found_val is sentinel:
                        self.fail("Did not find key '{0}'".format(key))
                    
                    if isinstance(found_val, _ByObjWrapper):
                        found_val = found_val.val

                    try:
                        assert_rel_error(self, found_val, val, tolerance)
                    except TypeError as error:
                        self.assertEqual(found_val, val)

            del db[iter_coord]
            ######## delete the record with the key 'iter_coord'

        # Having deleted all found values, the file should now be empty.
        ###### Need a way to get the number of records in the main table
        self.assertEqual(len(db), 0)

        db.close()
    def __init__(self, out, **sqlite_dict_args):
        super(SqliteRecorder, self).__init__()

        if MPI and MPI.COMM_WORLD.rank > 0 :
            self._open_close_sqlitedict = False
        else:
            self._open_close_sqlitedict = True

        if self._open_close_sqlitedict:
            sqlite_dict_args.setdefault('autocommit', True)
            self.out = SqliteDict(filename=out, flag='n', tablename='openmdao', **sqlite_dict_args)
            self.out_derivs = SqliteDict(filename=out, flag='w', tablename='openmdao_derivs', **sqlite_dict_args)

        else:
            self.out = None
Exemple #13
0
    def assertDatasetEquals(self, expected, tolerance):
        # Close the file to ensure it is written to disk.
        self.recorder.close()
        # self.recorder.out = None

        sentinel = object()

        db = SqliteDict( self.filename, self.tablename )

        ###### Need a way to get a list of the group_names in the order in which they were written and put it in  a variable named order
        order = db['order']
        del db['order']

        for coord, expect in expected:
            iter_coord = format_iteration_coordinate(coord)

            self.assertEqual(order.pop(0), iter_coord)

            groupings = (
                ("Parameters", expect[0]),
                ("Unknowns", expect[1]),
                ("Residuals", expect[2])
            )

            #### Need to get the record with the key of 'iter_coord'
            actual_group = db[iter_coord]

            for label, values in groupings:
                actual = actual_group[label]
                # If len(actual) == len(expected) and actual <= expected, then
                # actual == expected.
                self.assertEqual(len(actual), len(values))
                for key, val in values:
                    found_val = actual.get(key, sentinel)
                    if found_val is sentinel:
                        self.fail("Did not find key '{0}'".format(key))
                    assert_rel_error(self, found_val, val, tolerance)
            del db[iter_coord]
            ######## delete the record with the key 'iter_coord'

        # Having deleted all found values, the file should now be empty.
        ###### Need a way to get the number of records in the main table
        self.assertEqual(len(db), 0)

        # As should the ordering.
        self.assertEqual(len(order), 0)

        db.close()
    def __init__(self, basename, use_locks=True):
        """
        All data will be stored under directory `basename`. If there is a server
        there already, it will be loaded (resumed).

        The server object is stateless in RAM -- its state is defined entirely by its location.
        There is therefore no need to store the server object.
        """
        if not os.path.isdir(basename):
            raise ValueError("%r must be a writable directory" % basename)
        self.basename = basename
        self.lock_update = threading.RLock() if use_locks else gensim.utils.nocm
        try:
            self.fresh_index = SimIndex.load(self.location('index_fresh'))
        except:
            self.fresh_index = None
        try:
            self.opt_index = SimIndex.load(self.location('index_opt'))
        except:
            self.opt_index = None
        try:
            self.model = SimModel.load(self.location('model'))
        except:
            self.model = None
        self.payload = SqliteDict(self.location('payload'), autocommit=True, journal_mode=JOURNAL_MODE)
        # save the opened objects right back. this is not necessary and costs extra
        # time, but is cleaner when there are server location changes (see `check_moved`).
        self.flush(save_index=True, save_model=True, clear_buffer=True)
        logger.info("loaded %s" % self)
Exemple #15
0
    def __init__(self, basename, use_locks=False):
        """
        All data will be stored under directory `basename`. If there is a server
        there already, it will be loaded (resumed).

        The server object is stateless in RAM -- its state is defined entirely by its location.
        There is therefore no need to store the server object.
        """
        if not os.path.isdir(basename):
            raise ValueError("%r must be a writable directory" % basename)
        self.basename = basename
        self.use_locks = use_locks
        self.lock_update = threading.RLock() if use_locks else gensim.utils.nocm
        try:
            self.fresh_index = SimIndex.load(self.location('index_fresh'))
        except:
            logger.debug("starting a new fresh index")
            self.fresh_index = None
        try:
            self.opt_index = SimIndex.load(self.location('index_opt'))
        except:
            logger.debug("starting a new optimized index")
            self.opt_index = None
        try:
            self.model = SimModel.load(self.location('model'))
        except:
            self.model = None
        self.payload = SqliteDict(self.location('payload'), autocommit=True, journal_mode=JOURNAL_MODE)
        self.flush(save_index=False, save_model=False, clear_buffer=True)
        logger.info("loaded %s" % self)
Exemple #16
0
    def test_driver_records_model_viewer_data(self):
        size = 3

        prob = Problem(Group(), impl=impl)

        G1 = prob.root.add('G1', ParallelGroup())
        G1.add('P1', IndepVarComp('x', np.ones(size, float) * 1.0))
        G1.add('P2', IndepVarComp('x', np.ones(size, float) * 2.0))

        prob.root.add('C1', ABCDArrayComp(size))

        prob.root.connect('G1.P1.x', 'C1.a')
        prob.root.connect('G1.P2.x', 'C1.b')

        prob.driver.add_recorder(self.recorder)

        self.recorder.options['record_metadata'] = True
        prob.setup(check=False)

        prob.cleanup()

        # do some basic tests to make sure the model_viewer_data was recorded correctly
        if self.comm.rank == 0:
            db = SqliteDict(self.filename, self.tablename_metadata)
            model_viewer_data = db['model_viewer_data']
            tr = model_viewer_data['tree']
            self.assertEqual(set(['name', 'type', 'subsystem_type', 'children']), set(tr.keys()))

            names = []
            for ch1 in tr['children']:
                # each is an ordereddict
                names.append(ch1["name"] )
                for ch2 in ch1["children"]:
                    names.append(ch2["name"] )
                    if "children" in ch2:
                        for ch3 in ch2["children"]:
                            names.append(ch3["name"] )

            expected_names = ['G1', 'P1', 'x', 'P2', 'x', 'C1', 'a', 'b',
                        'in_string', 'in_list', 'c', 'd', 'out_string', 'out_list']

            self.assertEqual( sorted(expected_names), sorted(names))

            cl = model_viewer_data['connections_list']
            for c in cl:
                self.assertEqual(set(['src', 'tgt']), set(c.keys()))
            db.close()
Exemple #17
0
    def test_recording_system_metadata(self):
        prob = Problem()
        prob.root = ConvergeDiverge()
        prob.root.add_metadata("string", "just a test")
        prob.root.add_metadata("ints", [1, 2, 3])
        prob.driver.add_recorder(self.recorder)
        self.recorder.options["record_metadata"] = True
        prob.setup(check=False)
        prob.cleanup()  # closes recorders

        # check the system metadata recording
        sqlite_metadata = SqliteDict(filename=self.filename, flag="r", tablename="metadata")
        system_metadata = sqlite_metadata["system_metadata"]
        self.assertEqual(len(system_metadata), 2)
        self.assertEqual(system_metadata["string"], "just a test")
        self.assertEqual(system_metadata["ints"], [1, 2, 3])
        sqlite_metadata.close()
Exemple #18
0
    def test_recording_model_viewer_data(self):
        prob = Problem()
        prob.root = ConvergeDiverge()
        prob.driver.add_recorder(self.recorder)
        self.recorder.options["record_metadata"] = True
        prob.setup(check=False)
        prob.cleanup()  # closes recorders

        # do some basic tests to make sure the model_viewer_data was recorded
        db = SqliteDict(filename=self.filename, flag="r", tablename="metadata")
        model_viewer_data = db["model_viewer_data"]
        tr = model_viewer_data["tree"]
        self.assertEqual(set(["name", "type", "subsystem_type", "children"]), set(tr.keys()))
        cl = model_viewer_data["connections_list"]
        for c in cl:
            self.assertEqual(set(["src", "tgt"]), set(c.keys()))
        db.close()
 def test_1_theoretical_ion_space_step(self):
     print("test_1_theoretical_ion_space_step")
     ms_digest = MSDigestParameters.parse(self.protein_prospector_file)
     theo_ions = entry_point.generate_theoretical_ion_space(
         self.ms1_matching_output_file, self.glycosylation_sites_file,
         ms_digest.constant_modifications, ms_digest.variable_modifications,
         ms_digest.enzyme, self.num_procs)
     self.assertTrue(os.path.exists(theo_ions))
     self.theoretical_ion_space_file = theo_ions
     theoretical_ions = SqliteDict(theo_ions, tablename="theoretical_search_space")
     sequence_set = theoretical_ions.itervalues()
     peptide_sequences = [
         sequence.Sequence(s["Seq_with_mod"]) for s in sequence_set]
     peptide_mods = set()
     for seq in peptide_sequences:
         for resid, mod in seq:
             peptide_mods.update((m.rule for m in mod))
     print(peptide_mods)
class SqliteDictJsonSerializationTest(unittest.TestCase):
    def setUp(self):
        self.fname = norm_file('tests/db-json/sqlitedict.sqlite')
        self.db = SqliteDict(
            filename=self.fname, tablename='test', encode=json.dumps, decode=json.loads
        )

    def tearDown(self):
        self.db.close()
        os.unlink(self.fname)
        os.rmdir(os.path.dirname(self.fname))

    def get_json(self, key):
        return self.db.conn.select_one('SELECT value FROM test WHERE key = ?', (key,))[0]

    def test_int(self):
        self.db['test'] = -42
        assert self.db['test'] == -42
        assert self.get_json('test') == '-42'

    def test_str(self):
        test_str = u'Test \u30c6\u30b9\u30c8'
        self.db['test'] = test_str
        assert self.db['test'] == test_str
        assert self.get_json('test') == r'"Test \u30c6\u30b9\u30c8"'

    def test_bool(self):
        self.db['test'] = False
        assert self.db['test'] is False
        assert self.get_json('test') == 'false'

    def test_none(self):
        self.db['test'] = None
        assert self.db['test'] is None
        assert self.get_json('test') == 'null'

    def test_complex_struct(self):
        test_value = {
            'version': 2.5,
            'items': ['one', 'two'],
        }
        self.db['test'] = test_value
        assert self.db['test'] == test_value
        assert self.get_json('test') == json.dumps(test_value)
class ModelCacheStoreSqlite(ModelCacheStore):

    """ BTree查找实现 """

    def __init__(self, name):
        from sqlitedict import SqliteDict
        self.datadict = SqliteDict(name)

    def sync(self):
        return self.datadict.commit()  # instead of #sync
 def __init__(self, config):
     CacheInterface.__init__(self, config)
     self.db = SqliteDict(self.config[u"cache"][u"file"], autocommit=True)
     self.expiration = self.config[u"cache"].get(u"expiration", 86400)
     def closer():
         try:
             self.db.close()
         except Exception:
             logger.exception("Exception closing file cache")
     atexit.register(closer)
Exemple #23
0
class Scribe(object):
    def __init__(self, location, table_name, exp_name):
        filename = "{}/scribe.sqlite".format(location)
        self.book = SqliteDict(filename, autocommit=True, tablename=table_name)
        unique_id = datetime.now().strftime("date_%m.%d_time_%H.%M")
        self.exp_name = exp_name+"_"+unique_id
        self.observation_index = 0


    def record(self, value, type="general"):
        key = "{}; {}; {}".format(self.exp_name, self.observation_index, type)
        self.book[key] = value
        self.observation_index += 1

    observe = record #sometimes i forget which

    def lookup(self, type=None, exp_name=None, ret_sorted=False, strip_keys=False):
        type_func = lambda *args: True
        name_func = lambda *args: True

        if type:
            type_func = lambda x: x[2] == type

        if exp_name:
            name_func = lambda x: exp_name in x[0]

        key_func = lambda x: type_func(x) and name_func(x)
        unpack = lambda x: [f(x.strip()) for f,x in zip([str,int,str],x.split(";"))]
        items = {k:v for k,v in self.book.iteritems() if key_func(unpack(k))}
        if ret_sorted:
            return self.sort_results(items, strip_keys)
        return items

    def sort_results(self, result_dict, only_val_return=False):
        unpack = lambda x: [f(x.strip()) for f,x in zip([str,int,str],x.split(";"))]
        ranker = lambda x: unpack(x[0])[1]
        sorted_items = sorted(result_dict.items(), key=ranker)
        if only_val_return:
            return [v for k,v in sorted_items]
        return sorted_items

    def close(self):
        self.book.close()
 def test_as_str(self):
     """Verify SqliteDict.__str__()."""
     # given,
     db = SqliteDict()
     # exercise
     db.__str__()
     # test when db closed
     db.close()
     db.__str__()
class FileCache(CacheInterface):
    def __init__(self, config):
        CacheInterface.__init__(self, config)
        self.db = SqliteDict(self.config[u"cache"][u"file"], autocommit=True)
        self.expiration = self.config[u"cache"].get(u"expiration", 86400)
        def closer():
            try:
                self.db.close()
            except Exception:
                logger.exception("Exception closing file cache")
        atexit.register(closer)

    def get(self, key):
        if int(self.db[key + "_expiration"]) - time.time() <= 0:
            raise KeyError("cache key expired")
        return self.db[key]

    def set(self, key, val):
        self.db[key] = val
        self.db[key + "_expiration"] = str(int(time.time()) + self.expiration)
    def __init__(self, out, **sqlite_dict_args):
        super(SqliteRecorder, self).__init__()

        self.model_viewer_data = None

        if MPI and MPI.COMM_WORLD.rank > 0 :
            self._open_close_sqlitedict = False
        else:
            self._open_close_sqlitedict = True

        if self._open_close_sqlitedict:
            sqlite_dict_args.setdefault('autocommit', True)
            self.out_metadata = SqliteDict(filename=out, flag='n', tablename='metadata', **sqlite_dict_args)
            self.out_metadata['format_version'] = format_version
            self.out_iterations = SqliteDict(filename=out, flag='w', tablename='iterations', **sqlite_dict_args)
            self.out_derivs = SqliteDict(filename=out, flag='w', tablename='derivs', **sqlite_dict_args)

        else:
            self.out_metadata = None
            self.out_iterations = None
            self.out_derivs = None
    def test_default_reuse_existing_flag_c(self):
        """Re-opening of a database does not destroy it."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = SqliteDict(filename=fname)
        orig_db['key'] = 'value'
        orig_db.commit()
        orig_db.close()

        next_db = SqliteDict(filename=fname)
        self.assertIn('key', next_db.keys())
        self.assertEqual(next_db['key'], 'value')
    def test_overwrite_using_flag_n(self):
        """Re-opening of a database with flag='c' destroys it all."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db = SqliteDict(filename=fname, tablename='sometable')
        orig_db['key'] = 'value'
        orig_db.commit()
        orig_db.close()

        # verify,
        next_db = SqliteDict(filename=fname, tablename='sometable', flag='n')
        self.assertNotIn('key', next_db.keys())
Exemple #29
0
 def __init__(self, fname, num_features, shardsize=SHARD_SIZE, topsims=TOP_SIMS):
     """
     Spill index shards to disk after every `shardsize` documents.
     In similarity queries, return only the `topsims` most similar documents.
     """
     self.fname = fname
     self.shardsize = int(shardsize)
     self.topsims = int(topsims)
     self.id2pos = {} # map document id (string) to index position (integer)
     self.pos2id = {} # reverse mapping for id2pos; redundant, for performance
     self.id2sims = SqliteDict(self.fname + '.id2sims', journal_mode=JOURNAL_MODE) # precomputed top similar: document id -> [(doc_id, similarity)]
     self.qindex = gensim.similarities.Similarity(self.fname + '.idx', corpus=None,
         num_best=None, num_features=num_features, shardsize=shardsize)
     self.length = 0
Exemple #30
0
 def build_with_bad_flag():
     fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
     orig_db = SqliteDict(filename=fname, flag = 'FOO')
Exemple #31
0
def run(config, db_name):
    with SqliteDict(db_name, tablename='tagged_data') as db:
        data = list(db.items())
    with SqliteDict(db_name, tablename='linearized_data') as db:
        finished = set(db.keys())

    data = [datum for datum in data if datum[0] not in finished]

    model = language_model.from_config(config)
    _safety = 2**32

    beam = linearizer.decode.beam
    decode_one = linearizer.utils.decode_one
    gendist = linearizer.utils.gendist
    PartialTree = linearizer.partialtree.PartialTree
    editdist = editdistance.eval

    results = {}
    bar = tqdm(total=len(data), desc='partial to trellis to decoding')

    for idx, (datum, datum_str, _) in data:
        partial = PartialTree.from_list(datum)
        partial.prep()
        difficulty = partial.measure_difficulty()
        model.logger.debug(str(idx) + ' Difficulty: ' + str(difficulty))
        if difficulty > 2**35:
            bad_str = "Skipping.  index={}; difficulty={}".format(
                idx, difficulty)
            model.logger.debug(bad_str)
            bar.update(1)
            continue

        seqs, memos = linearizer.dp.run(partial)
        if len(seqs) == 0:
            bad_str = "Failure.  index={}; difficulty={}".format(
                idx, difficulty)
            model.logger.debug(bad_str)
            bar.update(1)
            continue

        datumstr_as_list = datum_str.split(" ")
        datum_len = len(datumstr_as_list)

        beam_state, step_decisions, best_idx = beam(memos, model)
        genscores = {}
        edscores = {}
        saving_state = {
            'datum': datum_str,
            'beam_state': beam_state,
            'difficulty': difficulty,
            'generation_distance': [],
            'edit_distance': [],
            'beam_solutions': [],
            'beam_scores': []
        }
        seen = set()
        for score, beam_idx in beam_state:
            sentence = decode_one(memos, beam_idx)
            assert beam_idx not in seen
            seen.add(beam_idx)

            gval = gendist(datumstr_as_list, sentence)
            edval = editdist(datumstr_as_list, sentence)

            saving_state['generation_distance'].append(gval)
            saving_state['edit_distance'].append(edval)
            saving_state['beam_solutions'].append(sentence)
            saving_state['beam_scores'].append(score)

        results[idx] = saving_state

        bar.update(1)

        if len(results) > 10:
            with SqliteDict(db_name, tablename='linearized_data') as db:
                db.update(results)
                db.commit()
            results = {}

    if len(results) > 0:
        with SqliteDict(db_name, tablename='linearized_data') as db:
            db.update(results)
            db.commit()
        results = {}

    print("Finished.")
Exemple #32
0
class SignupPlugin(Plugin):
    def load(self, ctx):
        self.guild_configs = SqliteDict('./guild_configs.sqlite',
                                        autocommit=True)
        self.signups = SqliteDict('./signups.sqlite', autocommit=True)

    def unload(self, ctx):
        self.guild_configs.close()
        self.signups.close()

    @Plugin.command(
        "config",
        "<admin_channel_id:snowflake> <signup_channel_id:snowflake> <announce_channel_id:snowflake>",
    )
    def on_config(self, event, admin_channel_id, signup_channel_id,
                  announce_channel_id):
        guild_id = str(event.msg.guild.id)
        self.guild_configs[guild_id] = {
            "admin_channel_id": admin_channel_id,
            "signup_channel_id": signup_channel_id,
            "announce_channel_id": announce_channel_id,
        }
        event.msg.reply("Succesfully configured this Discord for use!")

    @Plugin.command(
        "create",
        "<name:str> <tanks:int> <healers:int> <dps:int> <message:str...>")
    def on_create(self, event, name, tanks, healers, dps, message):
        guild_id = str(event.msg.guild.id)
        config = self.guild_configs[guild_id]

        if config is None:
            event.msg.reply(
                "I'm not configured! Please set up your channels first.")

        confirm_message = event.msg.reply(
            "You're creating an event named {} that requires {} tanks, {} healers, and {} dps. Your custom message is \n\n{}\n\nReact to confirm."
            .format(name, tanks, healers, dps, message))

        confirm_message.add_reaction("greentick:612799716161486888")

        self.signups[guild_id] = {
            str(confirm_message.id): {
                "name": name,
                "message": message,
                "tanks": tanks,
                "healers": healers,
                "dps": dps,
                "confirmed": False,
                "announced": False,
            }
        }

        print self.signups

    @Plugin.listen("MessageReactionAdd")
    def on_message_reaction_add(self, event):
        # Not the bot
        if event.user_id == 612451478485073925:
            return

        message_id = str(event.message_id)
        guild_id = str(event.guild.id)
        admin_channel_id = self.guild_configs[guild_id]["admin_channel_id"]
        admin_channel = self.client.api.channels_get(admin_channel_id)

        if self.signups[guild_id][message_id] is None:
            return

        if self.guild_configs[guild_id] is None:
            return

        # Green check emoji, not bot id
        if event.emoji.id == 612799716161486888:
            confirm_event(self, guild_id, admin_channel, message_id)

        # Cheer emoji, not bot id
        if event.emoji.id == 612778926640726024 and self.signups[guild_id][
                message_id]["confirmed"] is True:
            print 'here'
            announce_event(self, guild_id, admin_channel, message_id)
Exemple #33
0
class Baidu_ordinary_windows(object):
    """百度普通收录窗体"""
    def __init__(self, tree, site, token):
        # 展示等待窗体
        self.newroot = tk.Toplevel()
        self.newroot.title('普通收录')
        self.newroot.iconbitmap("favicon.ico")
        win_width = self.newroot.winfo_screenwidth()
        win_higth = self.newroot.winfo_screenheight()
        width_adjust = (win_width - 800) / 2
        higth_adjust = (win_higth - 250) / 2
        self.newroot.geometry("%dx%d+%d+%d" %
                              (800, 250, width_adjust, higth_adjust))

        # 提示内容
        self.content = tk.Label(self.newroot,
                                text="正在普通收录中,请不要中断操作,请耐心等待......")
        self.content.place(
            x=10,
            y=30,
        )
        self.content2 = tk.Label(self.newroot, text="")
        self.content2.place(
            x=10,
            y=60,
        )

        # 窗体日志
        self.ttlog = ttlog(master=self.newroot)
        self.ttlog.place(x=10, y=70, width=780, height=150)
        self.tree = tree
        self.site = site
        self.token = token
        self.mydict = SqliteDict('./my_db.sqlite', autocommit=True)

        # 开始处理线程
        self.p = Thread(target=self.main)
        self.p.setDaemon(True)
        self.p.start()
        self.ttlog.log("普通收录-->开启普通收录线程.....")

        # 点击关闭右上角
        self.newroot.protocol("WM_DELETE_WINDOW", self.close)

    def close(self):
        self.ttlog.stop_log()
        self.newroot.destroy()

    # 获取未提交的urls
    def get_url(self):
        url_list = []
        for key, value in sorted(self.mydict.iteritems()):
            if value[1] == "未提交":
                url_list.append(value)
        self.ttlog.log("普通收录-->共有没推送的网页链接数 :{} 条!".format(len(url_list)))
        print("共有没普通收录推送的网页链接数 :{} 条!".format(len(url_list)))
        return url_list

    # 查询剩余次数
    def get_remain(self):
        post_url = "http://data.zz.baidu.com/urls?site={}&token={}".format(
            self.site, self.token)
        headers = {
            'User-Agent': 'curl/7.12.1',
            'Host': 'data.zz.baidu.com',
            'Content-Type': 'text/plain',
            'Content-Length': '83',
        }
        response = requests.post(post_url, headers=headers, data=self.site)
        req = response.text
        if "success" in req:
            req_json = json.loads(req)
            if req_json["remain"] == 0:
                self.ttlog.log(
                    "普通收录-->查询剩余次数,今天普通收录推送任务已经完成,\n当天剩余的可推送url条数: " +
                    req_json["remain"] + "条。")
            else:
                self.ttlog.log(
                    "普通收录-->查询剩余次数,推送成功:" + self.site +
                    '\n当天剩余的可推送url条数: {}条'.format(req_json["remain"]))
            return req_json["remain"]
        else:
            return 0

    # 提交urls
    def api(self, url):
        post_url = "http://data.zz.baidu.com/urls?site={}&token={}".format(
            self.site, self.token)
        headers = {
            'User-Agent': 'curl/7.12.1',
            'Host': 'data.zz.baidu.com',
            'Content-Type': 'text/plain',
            'Content-Length': '83',
        }
        response = requests.post(post_url, headers=headers, data=url[0])
        req = response.text
        if "success" in req:
            req_json = json.loads(req)
            if req_json["remain"] == 0:
                self.ttlog.log("普通收录-->今天普通收录推送任务已经完成,当天剩余的可推送url条数: 0条。")
            else:
                # 是否修改列表,看是否加载列表
                tree_len = len(self.tree.get_children())
                if tree_len != 0:
                    print("普通收录-->修改列表")
                    self.tree.item(url[4],
                                   value=(url[0], "已提交", url[2], url[3]))

                # 修改数据库
                self.mydict[url[0]] = [url[0], "已提交", url[2], url[3], url[4]]
                self.ttlog.log(
                    "普通收录-->推送成功:" + url[0] +
                    '\n当天剩余的可推送url条数: {}条'.format(req_json["remain"]))
        else:
            req_json = json.loads(req)
            self.ttlog.log(r"普通收录-->推送失败:" + req_json["message"] +
                           ",当天可剩余推送数量为0条。")

        return None

    # 处理函数
    def main(self):
        # 获取未提交的urls
        urls = self.get_url()
        # 查询剩余次数
        num = self.get_remain()
        # 确定执行的urls
        post_urls = urls[:num]
        # 是否开始处理
        flag = tk.messagebox.askquestion(
            "提交", "本地共有没推送的网页数 :{} 条!\n"
            "当前剩余主动推送的次数 :{} 条!\n"
            "选“是”开始提交,选“否”取消提交".format(len(urls), num))

        if flag == "yes":
            try:
                # 窗体置顶
                self.newroot.wm_attributes('-topmost', 1)
                cpu_num = multiprocessing.cpu_count()
                self.ttlog.log("CPU核心数:" + str(cpu_num))
                self.ttlog.log("开启线程池,能一定程度加速")
                pool = ThreadPool(cpu_num)
                results = pool.map(self.api, post_urls)
                pool.close()
                pool.join()
                self.ttlog.stop_log()
                self.ttlog.log("普通收录-->今日的推送任务完成!")
                self.content.config(text="普通收录-->今日的推送任务完成!")
            except Exception as e:
                self.ttlog.log('错误代码:{}'.format(e))
                self.ttlog.log("Error: unable to start thread")
        else:
            self.ttlog.log("你选择了否,没有推送网页链接")
Exemple #34
0
class Update_window(object):
    """sitemap更新窗体"""
    def __init__(self, tree, eblog, sitemap):
        self.newroot = tk.Toplevel()
        self.newroot.title('下载文件中')
        self.newroot.iconbitmap("favicon.ico")
        self.newroot.wm_attributes('-topmost', 1)
        win_width = self.newroot.winfo_screenwidth()
        win_higth = self.newroot.winfo_screenheight()
        width_adjust = (win_width - 800) / 2
        higth_adjust = (win_higth - 250) / 2
        self.newroot.geometry("%dx%d+%d+%d" %
                              (800, 250, width_adjust, higth_adjust))

        # 进度条
        self.bar = ttk.Progressbar(self.newroot,
                                   length=740,
                                   mode="indeterminate",
                                   orient=tk.HORIZONTAL)
        self.bar.place(
            x=30,
            y=150,
        )
        self.bar.start(10)

        # 提示内容
        self.content = tk.Label(self.newroot, text="正在下载Sitemap.xml文件...")
        self.content.place(
            x=30,
            y=30,
        )
        self.content2 = tk.Label(self.newroot,
                                 text="下载速度和文件大小以及服务器带宽有关,请耐心等待......",
                                 wraplength=740,
                                 justify="left")
        self.content2.place(
            x=30,
            y=60,
        )

        self.eblog = eblog
        self.sitemap = sitemap
        self.tree = tree
        self.mydict = SqliteDict('./my_db.sqlite', autocommit=True)

        # 开启处理线程
        self.p = Thread(target=self.update)
        self.p.setDaemon(True)
        self.p.start()
        self.eblog.log("Sitemap线程:开启sitemap线程,下载Sitemap.xml中...")

        # 关闭右上角
        self.newroot.protocol("WM_DELETE_WINDOW", self.close)

    # 列表添加item,返回iid
    def append_item(self, item_list):
        # 加最后/加前面都可以,因为要是前面iid全是1
        item = self.tree.insert("",
                                0,
                                values=(item_list[0], item_list[1],
                                        item_list[2], item_list[3]))
        return item

    # 处理函数
    def update(self):
        try:
            with open("sitemap.xml", "wb") as f:
                f.write(requests.get(self.sitemap).content)
            with open("sitemap.xml", 'r', encoding='utf-8') as f:
                xml_data = f.read()
            self.content.configure(text="Sitemap文件下载完成,正在对比分析....")
            urls = re.findall(r'<loc>(.+?)</loc>', xml_data, re.S)
            self.eblog.log("Sitemap线程-->下载Sitemap.xml完成,正在解析xml文件...")

            tuple_list = list(self.mydict.iteritems())
            tree_urls = [i[0] for i in tuple_list]
            # 求交集
            c = list(set(urls).intersection(set(tree_urls)))

            # tree中多余的
            tree_urls_ = list(set(tree_urls).difference(set(c)))

            # 交集不动,tree把tree中把不是交集的删掉,把siemaop中不是交集的增添上.
            for key, value in sorted(tuple_list):
                self.content2.config(text="当前处理-->检查" + key)
                if key not in c:
                    # 是否删除,看有没有列表
                    if len(self.tree.get_children()) != 0:
                        self.tree.delete(value[4])

                    self.mydict.__delitem__(key=key)
            self.eblog.log("Sitemap线程-->本地删除" + str(tree_urls_))

            # sitemap中新添加的
            urls_ = list(set(urls).difference(set(c)))
            for url in sorted(urls_):
                cur_time = datetime.datetime.now().strftime(
                    '%Y-%m-%d %H:%M:%S')
                iid = self.append_item([url, "未提交", "未提交", cur_time])
                self.mydict[url] = [url, "未提交", "未提交", cur_time, iid]
                self.content2.config(text="当前处理-->正在添加" + url)

            self.eblog.log("Sitemap线程-->本地添加" + str(urls_))
            self.eblog.log("Sitemap线程-->关闭sitemap线程,更新完成。")
            self.close()
        except:
            self.eblog.log(traceback.format_exc())
            self.eblog.log("Sitemap线程-->更新失败")

    def close(self):
        self.newroot.destroy()
Exemple #35
0
class SimIndex(gensim.utils.SaveLoad):
    """
    An index of documents. Used internally by SimServer.

    It uses the Similarity class to persist all document vectors to disk (via mmap).
    """
    def __init__(self,
                 fname,
                 num_features,
                 shardsize=SHARD_SIZE,
                 topsims=TOP_SIMS):
        """
        Spill index shards to disk after every `shardsize` documents.
        In similarity queries, return only the `topsims` most similar documents.
        """
        self.fname = fname
        self.shardsize = int(shardsize)
        self.topsims = int(topsims)
        self.id2pos = {
        }  # map document id (string) to index position (integer)
        self.pos2id = {
        }  # reverse mapping for id2pos; redundant, for performance
        self.id2sims = SqliteDict(
            self.fname + '.id2sims', journal_mode=JOURNAL_MODE
        )  # precomputed top similar: document id -> [(doc_id, similarity)]
        self.qindex = gensim.similarities.Similarity(self.fname + '.idx',
                                                     corpus=None,
                                                     num_best=None,
                                                     num_features=num_features,
                                                     shardsize=shardsize)
        self.length = 0

    def save(self, fname):
        tmp, self.id2sims = self.id2sims, None
        super(SimIndex, self).save(fname)
        self.id2sims = tmp

    @staticmethod
    def load(fname):
        result = gensim.utils.SaveLoad.load(fname)
        result.check_moved(fname)
        result.id2sims = SqliteDict(result.fname + '.id2sims',
                                    journal_mode=JOURNAL_MODE)
        return result

    def check_moved(self, fname):
        # Add extra logic to loading: if the location on filesystem changed,
        # update locations of all shard files.
        # The other option was making shard locations relative to a directory name.
        # That way we wouldn't have to update their locations on load, but on the
        # other hand we'd have to pass a dirname to each call that needs their
        # absolute location... annoying.
        if self.fname != fname:
            logger.info(
                "index seems to have moved from %s to %s; updating locations" %
                (self.fname, fname))
            self.fname = fname
            output_prefix = fname + '.idx'
            for shard in self.qindex.shards:
                shard.fname = shard.fname.replace(self.qindex.output_prefix,
                                                  output_prefix, 1)
            self.qindex.output_prefix = output_prefix

    def close(self):
        "Explicitly release important resources (file handles, db, ...)"
        try:
            self.id2sims.close()
        except:
            pass
        try:
            del self.qindex
        except:
            pass

    def terminate(self):
        """Delete all files created by this index, invalidating `self`. Use with care."""
        try:
            self.id2sims.terminate()
        except:
            pass
        import glob
        for fname in glob.glob(self.fname + '*'):
            try:
                os.remove(fname)
                logger.info("deleted %s" % fname)
            except Exception, e:
                logger.warning("failed to delete %s: %s" % (fname, e))
        for val in self.__dict__.keys():
            try:
                delattr(self, val)
            except:
                pass
Exemple #36
0
 def __init__(self, name: str, author: str, table: str):
     # DB stores values directly (not encoded as a pickle)
     self.sqlite_dict = SqliteDict(get_sqlite_path(name, author),
                                   table,
                                   encode=lambda x: x,
                                   decode=lambda x: x)
Exemple #37
0
from flask import Flask, jsonify, make_response, abort, redirect, url_for, render_template
from flask.ext.httpauth import HTTPBasicAuth
from flask.ext.sqlalchemy import SQLAlchemy
from sqlitedict import SqliteDict

auth = HTTPBasicAuth()

app = Flask(__name__)
app.config['SECRET_KEY'] = 'the secret key'
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///db.sqlite'

flixrdb = SqliteDict('./movies.sqlite', autocommit=True)
db = SQLAlchemy(app)


class Movies(db.Model):
    __tablename__ = 'movies'
    id = db.Column(db.Integer, primary_key=True)
    social_id = db.Column(db.String(64), nullable=False, unique=True)
    nickname = db.Column(db.String(64), nullable=False)
    email = db.Column(db.String(64), nullable=True)


@app.route('/api/v1.0/movies/<int:movie_id>', methods=['GET', 'POST'])
@auth.login_required
def get_movie(movie_id):
    try:
        return jsonify({'movie': flixrdb[movie_id]})
    except:
        return make_response(jsonify({'error': 'Movie not found'}), 404)
Exemple #38
0
    def OptimizationHistory(self):
        """
        Reads in database history file and stores contents.
        Function information is stored as a dict in func_data,
        variable information is stored as a dict in var_data,
        and bounds information is stored as a dict in bounds.
        """

        # Initialize dictionaries for design variables and unknowns.
        # The data is saved redundantly in dicts for all iterations and then
        # for major iterations as well.
        self.func_data_all = {}
        self.func_data_major = {}
        self.var_data_all = {}
        self.var_data_major = {}
        db = {}
        self.num_iter = 0

        # Loop over each history file name provided by the user.
        for histIndex, histFileName in enumerate(self.histList):

            # If they only have one history file, we don't change the keys' names
            if len(self.histList) == 1:
                histIndex = ""
            else:  # If multiple history files, append letters to the keys,
                # such that 'key' becomes 'key_A', 'key_B', etc
                histIndex = "_" + chr(histIndex + ord("A"))
            self.histIndex = histIndex

            try:  # This is the classic method of storing history files
                db = shelve.open(histFileName, "r")
                OpenMDAO = False
            except:  # Bare except because error is not in standard Python. # noqa: E722
                # If the db has the 'iterations' tag, it's an OpenMDAO db.
                db = SqliteDict(histFileName, "iterations")
                OpenMDAO = True

                # Need to do this since in py3 db.keys() is a generator object
                keys = [i for i in db.keys()]

                # If it has no 'iterations' tag, it's a pyOptSparse db.
                if keys == []:
                    OpenMDAO = False
                    db = SqliteDict(histFileName)

            # Specific instructions for OpenMDAO databases
            if OpenMDAO:

                # Get the number of iterations by looking at the largest number
                # in the split string names for each entry in the db
                if major_python_version == 3:
                    for string in db.keys():
                        string = string.split("|")
                else:
                    string = db.keys()[-1].split("|")

                nkey = int(string[-1])
                self.solver_name = string[0]

                # Initalize a list detailing if the iterations are major or minor
                self.iter_type = np.zeros(nkey)

                # Get the keys of the database where derivatives were evaluated.
                # These correspond to major iterations, while no derivative
                # info is calculated for gradient-free linesearches.
                deriv_keys = SqliteDict(histFileName, "derivs").keys()
                self.deriv_keys = [
                    int(key.split("|")[-1]) for key in deriv_keys
                ]

                # Save information from the history file for the funcs.
                self.DetermineMajorIterations(db, OpenMDAO=OpenMDAO)

                # Save information from the history file for the unknowns.
                self.SaveDBData(db,
                                self.func_data_all,
                                self.func_data_major,
                                OpenMDAO=OpenMDAO,
                                data_str="Unknowns")

                # Save information from the history file for the design variables.
                self.SaveDBData(db,
                                self.var_data_all,
                                self.var_data_major,
                                OpenMDAO=OpenMDAO,
                                data_str="Parameters")

                # Add labels to OpenMDAO variables.
                # Corresponds to constraints, design variables, and objective.
                try:
                    db = SqliteDict(histFileName, "metadata")
                    self.SaveOpenMDAOData(db)

                except KeyError:  # Skip metadata info if not included in OpenMDAO hist file
                    pass

            else:

                # Get the number of iterations
                nkey = int(db["last"]) + 1
                self.nkey = nkey

                # Initalize a list detailing if the iterations are major or minor
                self.iter_type = np.zeros(nkey)

                # Check to see if there is bounds information in the db file.
                # If so, add them to self.bounds to plot later.
                try:
                    info_dict = db["varInfo"].copy()
                    info_dict.update(db["conInfo"])
                    # Got to be a little tricky here since we're modifying
                    # info_dict; if we simply loop over it with the generator
                    # from Python3, it will contain the new keys and then the
                    # names will be mangled incorrectly.
                    bounds_dict = {}
                    scaling_dict = {}
                    for key in info_dict.keys():
                        bounds_dict[key + histIndex] = {
                            "lower": info_dict[key]["lower"],
                            "upper": info_dict[key]["upper"],
                        }
                        scaling_dict[key + histIndex] = info_dict[key]["scale"]
                    self.bounds.update(bounds_dict)
                    self.scaling.update(scaling_dict)
                except KeyError:
                    pass

                # Check to see if there is proper saved info about iter type
                if "iu0" in db["0"].keys():
                    if db[db["last"]]["iu0"] > 0:
                        self.storedIters = True
                    else:
                        self.storedIters = False
                else:
                    self.storedIters = False

                # Save information from the history file for the funcs.
                self.DetermineMajorIterations(db, OpenMDAO=OpenMDAO)

                # Save information from the history file for the funcs.
                self.SaveDBData(db,
                                self.func_data_all,
                                self.func_data_major,
                                OpenMDAO=OpenMDAO,
                                data_str="funcs")

                # Save information from the history file for the design variables.
                self.SaveDBData(db,
                                self.var_data_all,
                                self.var_data_major,
                                OpenMDAO=OpenMDAO,
                                data_str="xuser")

        # Set the initial dictionaries to reference all iterations.
        # Later this can be set to reference only the major iterations.
        self.func_data = self.func_data_all
        self.var_data = self.var_data_all

        # Find the maximum length of any variable in the dictionaries and
        # save this as the number of iterations.
        for data_dict in [self.func_data, self.var_data]:
            for key in data_dict.keys():
                length = len(data_dict[key])
                if length > self.num_iter:
                    self.num_iter = length
Exemple #39
0
limiter = Limiter(key_func=get_remote_address)

cache_supported_backends = {
    None: __cache_module.NullCache,
    'memcached': __cache_module.MemcachedCache,
    'redis': __cache_module.RedisCache
}

__cache_uri = os.environ.get('CACHE_SERVICE')

if __cache_uri:
    try:
        # example __cache_uri is 'redis:dev_redis_1:6379'
        [__cache_type, __url, __port] = __cache_uri.split(':')
    except ValueError:
        raise ImproperlyConfigured('CACHE_SERVICE is wrongly formatted. Use "redis:dev_redis_1:6379" as example.')
    if __cache_type == 'redis':
        cache = __cache_module.RedisCache(host=__url, port=__port, default_timeout=os.environ.get('CACHE_TIMEOUT'))
    elif __cache_type == 'memcached':
        cache = __cache_module.MemcachedCache(
            servers=["{url}:{port}".format(url=__url, port=__port)],
            default_timeout=os.environ.get('CACHE_TIMEOUT')
        )
    else:
        raise ImproperlyConfigured('Unknown cache service, only Memcached and Redis are supported at the moment.')
else:
    cache = __cache_module.NullCache

credentials_store = SqliteDict('flask_oidc.db', autocommit=True)
openid_connect = OpenIDConnect(credentials_store=credentials_store)
Exemple #40
0
def grep_in_blogtree(blogtree_path, username, registered, string, case_sensitive):
    with SqliteDict(blogtree_path) as serialized:
        for blog in tqdm(serialized.itervalues(), total=len(serialized)):
            grep_in_blog(blog, username, registered, string, case_sensitive)
Exemple #41
0
class BackendDbHandler(object):
    """Table structure

    target_pages: A table to save URL where folklore is. Key-value pair. {url_string: TargetPage object}
    target_html: A table to save HTML of folklore. Key-value pair. {url_string: ExtractedPage object}
    """
    def __init__(self, path_db_file: str, interval: int = 3):
        self.db_target_pages = SqliteDict(path_db_file,
                                          autocommit=True,
                                          tablename='target_pages',
                                          encode=json.dumps,
                                          decode=json.loads)
        self.db_html = SqliteDict(path_db_file,
                                  autocommit=True,
                                  tablename='target_html',
                                  encode=json.dumps,
                                  decode=json.loads)
        self.interval = interval

    def save_target_urls(self, target_urls: List[str]):
        """Save target URL into DB."""
        for url in target_urls:
            if url not in self.db_target_pages:
                data, errs = TargetPage(strict=True).load({
                    'page_url': url,
                    'status': False,
                    'note': '',
                    'extracted_at': ''
                })
                self.db_target_pages[url] = data
            else:
                logger.info('URL={} is already in target. Skip.'.format(url))
        else:
            self.db_target_pages.commit()

    def run_html_extraction(self,
                            is_force_retry: bool = False,
                            limit: int = -1):
        """Gets all target page and save them into DB."""
        default_i = 0
        for url, page_obj in tqdm(list(self.db_target_pages.items())):
            _obj = TargetPage(strict=True).load(page_obj)
            if page_obj['status'] is False or is_force_retry is True:
                try:
                    html_doc = requests.get(url).text
                    error_msg = ''
                    status = True
                except ExtractedPage as e:
                    html_doc = ''
                    error_msg = e.__str__()
                    status = False

                data, errs = ExtractedPage(strict=True).load({
                    'page_url':
                    url,
                    'status':
                    status,
                    'html_document':
                    html_doc,
                    'note':
                    error_msg,
                    'extracted_at':
                    datetime.now().__str__()
                })
                page_obj['status'] = True
                page_obj['extracted_at'] = datetime.now().__str__()
                self.db_target_pages[url] = data
                default_i += 1
                time.sleep(self.interval)
                if default_i == limit:
                    logger.info('Terminated by limit={}'.format(limit))
                    break
            else:
                logger.info('URL={} is already in target. Skip.'.format(url))
        else:
            self.db_target_pages.commit()
            self.db_html.commit()

    def show_extracted_html(self) -> List[Dict[str, Any]]:
        __ = []
        for url, obj_ in self.db_target_pages.items():
            data, errs = ExtractedPage(strict=True).load(obj_)
            if data['status']:
                __.append(obj_)
        else:
            return __
Exemple #42
0
    def test_overwrite_using_flag_w(self):
        """Re-opening of a database with flag='w' destroys only the target table."""
        # given,
        fname = norm_file('tests/db/sqlitedict-override-test.sqlite')
        orig_db_1 = SqliteDict(filename=fname, tablename='one')
        orig_db_1['key'] = 'value'
        orig_db_1.commit()
        orig_db_1.close()

        orig_db_2 = SqliteDict(filename=fname, tablename='two')
        orig_db_2['key'] = 'value'
        orig_db_2.commit()
        orig_db_2.close()

        # verify, when re-opening table space 'one' with flag='2', we destroy
        # its contents.  However, when re-opening table space 'two' with
        # default flag='r', its contents remain.
        next_db_1 = SqliteDict(filename=fname, tablename='one', flag='w')
        self.assertNotIn('key', next_db_1.keys())

        next_db_2 = SqliteDict(filename=fname, tablename='two')
        self.assertIn('key', next_db_2.keys())
Exemple #43
0
    def test_irregular_tablenames(self):
        """Irregular table names need to be quoted"""
        db = SqliteDict(':memory:', tablename='9nine')
        db['key'] = 'value'
        db.commit()
        self.assertEqual(db['key'], 'value')
        db.close()

        db = SqliteDict(':memory:', tablename='outer space')
        db['key'] = 'value'
        db.commit()
        self.assertEqual(db['key'], 'value')
        db.close()

        with self.assertRaisesRegexp(ValueError, r'^Invalid tablename '):
            SqliteDict(':memory:', '"')
 def __init__(self, lm, cache_db):
     self.lm = lm
     self.cache_db = cache_db
     os.makedirs(os.path.dirname(cache_db), exist_ok=True)
     self.dbdict = SqliteDict(cache_db, autocommit=True)
Exemple #45
0
def get_db():
    return SqliteDict('db/mails_db.db', "mails", autocommit=True)
Exemple #46
0
 def load(fname):
     result = gensim.utils.SaveLoad.load(fname)
     result.check_moved(fname)
     result.id2sims = SqliteDict(result.fname + '.id2sims',
                                 journal_mode=JOURNAL_MODE)
     return result
Exemple #47
0
 def __init__(self, path):
     self.sd = SqliteDict(path, autocommit=True)
Exemple #48
0
class SimServer(object):
    """
    Top-level functionality for similarity services. A similarity server takes
    care of::

    1. creating semantic models
    2. indexing documents using these models
    3. finding the most similar documents in an index.

    An object of this class can be shared across network via Pyro, to answer remote
    client requests. It is thread safe. Using a server concurrently from multiple
    processes is safe for reading = answering similarity queries. Modifying
    (training/indexing) is realized via locking = serialized internally.
    """
    def __init__(self, basename, use_locks=True):
        """
        All data will be stored under directory `basename`. If there is a server
        there already, it will be loaded (resumed).

        The server object is stateless in RAM -- its state is defined entirely by its location.
        There is therefore no need to store the server object.
        """
        if not os.path.isdir(basename):
            raise ValueError("%r must be a writable directory" % basename)
        self.basename = basename
        self.lock_update = threading.RLock(
        ) if use_locks else gensim.utils.nocm
        try:
            self.fresh_index = SimIndex.load(self.location('index_fresh'))
        except:
            self.fresh_index = None
        try:
            self.opt_index = SimIndex.load(self.location('index_opt'))
        except:
            self.opt_index = None
        try:
            self.model = SimModel.load(self.location('model'))
        except:
            self.model = None
        self.payload = SqliteDict(self.location('payload'),
                                  autocommit=True,
                                  journal_mode=JOURNAL_MODE)
        # save the opened objects right back. this is not necessary and costs extra
        # time, but is cleaner when there are server location changes (see `check_moved`).
        self.flush(save_index=True, save_model=True, clear_buffer=True)
        logger.info("loaded %s" % self)

    def location(self, name):
        return os.path.join(self.basename, name)

    @gensim.utils.synchronous('lock_update')
    def flush(self, save_index=False, save_model=False, clear_buffer=False):
        """Commit all changes, clear all caches."""
        if save_index:
            if self.fresh_index is not None:
                self.fresh_index.save(self.location('index_fresh'))
            if self.opt_index is not None:
                self.opt_index.save(self.location('index_opt'))
        if save_model:
            if self.model is not None:
                self.model.save(self.location('model'))
        self.payload.commit()
        if clear_buffer:
            if hasattr(self, 'fresh_docs'):
                try:
                    self.fresh_docs.terminate(
                    )  # erase all buffered documents + file on disk
                except:
                    pass
            self.fresh_docs = SqliteDict(
                journal_mode=JOURNAL_MODE
            )  # buffer defaults to a random location in temp
        self.fresh_docs.sync()

    def close(self):
        """Explicitly close open file handles, databases etc."""
        try:
            self.payload.close()
        except:
            pass
        try:
            self.model.close()
        except:
            pass
        try:
            self.fresh_index.close()
        except:
            pass
        try:
            self.opt_index.close()
        except:
            pass
        try:
            self.fresh_docs.terminate()
        except:
            pass

    def __del__(self):
        """When the server went out of scope, make an effort to close its DBs."""
        self.close()

    @gensim.utils.synchronous('lock_update')
    def buffer(self, documents):
        """
        Add a sequence of documents to be processed (indexed or trained on).

        Here, the documents are simply collected; real processing is done later,
        during the `self.index` or `self.train` calls.

        `buffer` can be called repeatedly; the result is the same as if it was
        called once, with a concatenation of all the partial document batches.
        The point is to save memory when sending large corpora over network: the
        entire `documents` must be serialized into RAM. See `utils.upload_chunked()`.

        A call to `flush()` clears this documents-to-be-processed buffer (`flush`
        is also implicitly called when you call `index()` and `train()`).
        """
        logger.info("adding documents to temporary buffer of %s" % (self))
        for doc in documents:
            docid = doc['id']
            #            logger.debug("buffering document %r" % docid)
            if docid in self.fresh_docs:
                logger.warning("asked to re-add id %r; rewriting old value" %
                               docid)
            self.fresh_docs[docid] = doc
        self.fresh_docs.sync()

    @gensim.utils.synchronous('lock_update')
    def train(self, corpus=None, method='auto', clear_buffer=True):
        """
        Create an indexing model. Will overwrite the model if it already exists.
        All indexes become invalid, because documents in them use a now-obsolete
        representation.

        The model is trained on documents previously entered via `buffer`,
        or directly on `corpus`, if specified.
        """
        if corpus is not None:
            # use the supplied corpus only (erase existing buffer, if any)
            self.flush(clear_buffer=True)
            self.buffer(corpus)
        if not self.fresh_docs:
            msg = "train called but no training corpus specified for %s" % self
            logger.error(msg)
            raise ValueError(msg)
        if method == 'auto':
            numdocs = len(self.fresh_docs)
            if numdocs < 1000:
                logging.warning(
                    "too few training documents; using simple log-entropy model instead of latent semantic indexing"
                )
                method = 'logentropy'
            else:
                method = 'lsi'
        self.model = SimModel(self.fresh_docs, method=method)
        self.flush(save_model=True, clear_buffer=clear_buffer)

    @gensim.utils.synchronous('lock_update')
    def index(self, corpus=None, clear_buffer=True):
        """
        Permanently index all documents previously added via `buffer`, or
        directly index documents from `corpus`, if specified.

        The indexing model must already exist (see `train`) before this function
        is called.
        """
        if not self.model:
            msg = 'must initialize model for %s before indexing documents' % self.basename
            logger.error(msg)
            raise AttributeError(msg)

        if corpus is not None:
            # use the supplied corpus only (erase existing buffer, if any)
            self.flush(clear_buffer=True)
            self.buffer(corpus)

        if not self.fresh_docs:
            msg = "index called but no training corpus specified for %s" % self
            logger.error(msg)
            raise ValueError(msg)

        if not self.fresh_index:
            logger.info("starting a new fresh index for %s" % self)
            self.fresh_index = SimIndex(self.location('index_fresh'),
                                        self.model.num_features)
        self.fresh_index.index_documents(self.fresh_docs, self.model)
        if self.opt_index is not None:
            self.opt_index.delete(self.fresh_docs.keys())
        logger.info("storing document payloads")
        for docid in self.fresh_docs:
            payload = self.fresh_docs[docid].get('payload', None)
            if payload is None:
                # TODO HACK: exit on first doc without a payload (=assume all docs have payload, or none does)
                break
            self.payload[docid] = payload
        self.flush(save_index=True, clear_buffer=clear_buffer)

    @gensim.utils.synchronous('lock_update')
    def optimize(self):
        """
        Precompute top similarities for all indexed documents. This speeds up
        `find_similar` queries by id (but not queries by fulltext).

        Internally, documents are moved from a fresh index (=no precomputed similarities)
        to an optimized index (precomputed similarities). Similarity queries always
        query both indexes, so this split is transparent to clients.

        If you add documents later via `index`, they go to the fresh index again.
        To precompute top similarities for these new documents too, simply call
        `optimize` again.

        """
        if self.fresh_index is None:
            logger.warning("optimize called but there are no new documents")
            return  # nothing to do!

        if self.opt_index is None:
            logger.info("starting a new optimized index for %s" % self)
            self.opt_index = SimIndex(self.location('index_opt'),
                                      self.model.num_features)

        self.opt_index.merge(self.fresh_index)
        self.fresh_index.terminate()  # delete old files
        self.fresh_index = None
        self.flush(save_index=True)

    @gensim.utils.synchronous('lock_update')
    def drop_index(self, keep_model=True):
        """Drop all indexed documents. If `keep_model` is False, also dropped the model."""
        modelstr = "" if keep_model else "and model "
        logger.info("deleting similarity index " + modelstr +
                    "from %s" % self.basename)
        for index in [self.fresh_index, self.opt_index]:
            if index is not None:
                index.terminate()
        self.fresh_index, self.opt_index = None, None
        if not keep_model and self.model is not None:
            self.model.close()
            fname = self.location('model')
            try:
                os.remove(fname)
                logger.info("deleted %s" % fname)
            except Exception, e:
                logger.warning("failed to delete %s" % fname)
            self.model = None
        self.flush(save_index=True, save_model=True, clear_buffer=True)
Exemple #49
0
class ToolDocumentCache:
    def __init__(self, cache_dir):
        self.cache_dir = cache_dir
        if not os.path.exists(self.cache_dir):
            os.makedirs(self.cache_dir)
        self.cache_file = os.path.join(self.cache_dir, 'cache.sqlite')
        self.writeable_cache_file = None
        self._cache = None
        self.disabled = False
        self._get_cache(create_if_necessary=True)

    def close(self):
        self._cache and self._cache.close()

    def _get_cache(self, flag='r', create_if_necessary=False):
        try:
            if create_if_necessary and not os.path.exists(self.cache_file):
                # Create database if necessary using 'c' flag
                self._cache = SqliteDict(self.cache_file,
                                         flag='c',
                                         encode=encoder,
                                         decode=decoder,
                                         autocommit=False)
                if flag == 'r':
                    self._cache.flag = flag
            else:
                cache_file = self.writeable_cache_file.name if self.writeable_cache_file else self.cache_file
                self._cache = SqliteDict(cache_file,
                                         flag=flag,
                                         encode=encoder,
                                         decode=decoder,
                                         autocommit=False)
        except sqlite3.OperationalError:
            log.warning('Tool document cache unavailable')
            self._cache = None
            self.disabled = True

    @property
    def cache_file_is_writeable(self):
        return os.access(self.cache_file, os.W_OK)

    def reopen_ro(self):
        self._get_cache(flag='r')
        self.writeable_cache_file = None

    def get(self, config_file):
        try:
            tool_document = self._cache.get(config_file)
        except sqlite3.OperationalError:
            log.debug("Tool document cache unavailable")
            return None
        if not tool_document:
            return None
        if tool_document.get(
                'tool_cache_version') != CURRENT_TOOL_CACHE_VERSION:
            return None
        if self.cache_file_is_writeable:
            for path, modtime in tool_document['paths_and_modtimes'].items():
                if os.path.getmtime(path) != modtime:
                    return None
        return tool_document

    def _make_writable(self):
        if not self.writeable_cache_file:
            self.writeable_cache_file = tempfile.NamedTemporaryFile(
                dir=self.cache_dir, suffix='cache.sqlite.tmp', delete=False)
            if os.path.exists(self.cache_file):
                shutil.copy(self.cache_file, self.writeable_cache_file.name)
            self._get_cache(flag='c')

    def persist(self):
        if self.writeable_cache_file:
            self._cache.commit()
            os.rename(self.writeable_cache_file.name, self.cache_file)
            self.reopen_ro()

    def set(self, config_file, tool_source):
        try:
            if self.cache_file_is_writeable:
                self._make_writable()
                to_persist = {
                    'document': tool_source.to_string(),
                    'macro_paths': tool_source.macro_paths,
                    'paths_and_modtimes': tool_source.paths_and_modtimes(),
                    'tool_cache_version': CURRENT_TOOL_CACHE_VERSION,
                }
                try:
                    self._cache[config_file] = to_persist
                except RuntimeError:
                    log.debug("Tool document cache not writeable")
        except sqlite3.OperationalError:
            log.debug("Tool document cache unavailable")

    def delete(self, config_file):
        if self.cache_file_is_writeable:
            self._make_writable()
            try:
                del self._cache[config_file]
            except (KeyError, RuntimeError):
                pass

    def __del__(self):
        if self.writeable_cache_file:
            try:
                os.unlink(self.writeable_cache_file.name)
            except Exception:
                pass
Exemple #50
0
class Db:
    """The data handling object for pgpgram.

    Args:
        verbose (int): level of
    """

    config_path = config.get_config_dir()
    data_path = config.get_data_dir()
    cache_path = config.get_cache_dir()
    executable_path = dirname(realpath(__file__))
    files_db_path = path_join(config.get_config_dir(), "files.db")
    names_db_path = path_join(config.get_config_dir(), "names.db")

    def __init__(self, verbose=0):
        self.verbose = verbose

        if exists(self.files_db_path):
            self.files = SqliteDict(self.files_db_path, autocommit=False)
        else:
            self.files = SqliteDict(self.files_db_path, autocommit=False)
            self.from_pickle_to_db()
                    
        if exists(self.names_db_path):
            self.file_names = SqliteDict(self.names_db_path, autocommit=False)
        else:
            self.rebuild_names_db()
       
        # Load configuration from disk into 'config' attribute
        try:
            self.config = load(path_join(self.config_path, "config.pkl"))

        except FileNotFoundError as e:
            # Init configuration
            if verbose > 0:
                pprint("Config file not found in path, initializing")

            self.config = {"db key":random_id(20)}

            # Paths
            index_dir = path_join(self.data_path, "index")
            tdlib_dir = path_join(self.data_path, 'tdlib')
            tdlib_config_symlink = path_join(self.config_path, "tdlib")
            tdlib_documents_dir = path_join(self.cache_path, "documents")
            tdlib_documents_symlink = path_join(tdlib_dir, "documents")

            # Init paths
            if not exists(index_dir):
                mkdir(index_dir)

            if not exists(tdlib_dir):
                mkdir(tdlib_dir)
                mkdir(tdlib_documents_dir)
                symlink(tdlib_dir, tdlib_config_symlink)
                symlink(tdlib_documents_dir, tdlib_documents_symlink)


        # Load index
        # try:
        #     self.index = load(path_join(self.data_path, "index.pkl"))
        # except:
        #     if verbose > 0:
        #          print("index still not built")
        self.save()

    

    def from_pickle_to_db(self):
            files_pickle_path = path_join(self.config_path, "files.pkl")
            if exists(files_pickle_path):
                if verbose:
                    print("converting files pickle to proper db")
                pickle_files = load(files_pickle_path)
                for f in pickle_files:
                    self.files[f['hash']] = [f]

    def rebuild_names_db(self):
        print("Building names database")
        try:
            rm(self.names_db_path)
        except FileNotFoundError as e:
            pass
        self.file_names = SqliteDict(self.names_db_path, autocommit=False)
        for hash in self.files:
            for document in self.files[hash]:
                try:
                    name = document['name']
                    db_name_documents = self.file_names[name]
                except KeyError as e:
                    db_name_documents = []
                    
                db_name_documents.append(document)
                self.file_names[name] = db_name_documents
        print("read {} entries".format(len(self.files)))
  

    def save(self):
        """Save db

            Formats db in a format compatible with trovotutto,
            builds the trovotutto index and then save the following to disk:
            - search index
            - files list
            - configuration
        """
        #pgpgram_db = PGPgramDb(self, filetype="any", exclude=[], update=True)
        #self.index = Index(pgpgram_db, slb=3, verbose=self.verbose)
        #save(self.index, path_join(self.data_path, "index.pkl"))
        self.files.commit()
        self.file_names.commit()
        save(self.config, path_join(self.config_path, "config.pkl"))

    def search(self, query, 
                     path=getcwd(), 
                     filetype="any", 
                     exclude=[], 
                     results_number=10,
                     reverse=True,
                     verbose=0):

        if filetype != "any" or path != getcwd():
            word_shortest = min([len(w) for w in query.split(" ")])
            pgpgram_db_kwargs = {'path': path,
                                 'filetype': filetype,
                                 'exclude': exclude,
                                 'update': True}

        # To update for db usage
            #pgpgram_db = PGPgramDb(self, **pgpgram_db_kwargs)
            #self.index = Index(pgpgram_db, slb=word_shortest, verbose=verbose)

        #results = self.index.search(query)

        #self.display_results(results[:results_number], reverse=reverse)

        # if results != []:
        #     choice = int(input("Select file to restore (number): "))
        #     f = next(self.files[d][0] for d in self.files if self.files[d][0]['path'] == results[choice])["name"]
        #     restore = Restore(f, download_directory=getcwd(), verbose=verbose) 

    def display_results(self, results, reverse=True):
        lines = []
        for i,f in enumerate(results):
            g = f.split("/")
            result = {"title": "{}{}. {}{}{}".format(color.GREEN + color.BOLD,
                                                     i,
                                                     color.BLUE,
                                                     g[-1],
                                                     color.END),
                      "subtitle": "{}{}{}\n".format(color.GRAY,
                                                    f,
                                                    color.END)}
            lines.append(result)

        if reverse: lines.reverse()
        
        for result in lines:
            print(result['title'])
            print(result['subtitle'])

    def import_file(self, filename):
        if filename.endswith("pkl"):
            files = load(filename)
            for f in files:
                try: 
                    self.files[f['hash']]
                except KeyError as e:
                    self.files[f['hash']] = [f]
                    print("adding {}".format(f['name']))
        else:
            files = SqliteDict(filename, autocommit=False)
            for k in files:
                try: 
                    self.files[k]
                except KeyError as e:
                    self.files[k] = files[k]
                    print("adding {}".format(f['name']))
            self.rebuild_names_db()
               
        self.save()
Exemple #51
0
class Gdrive:
    def __init__(self, config, token_path, cache_path):
        self.cfg = config
        self.token_path = token_path
        self.cache_path = cache_path
        self.token = None
        self.cache = None

    def first_run(self):
        # token file
        if not os.path.exists(self.token_path):
            # token.json does not exist, lets do the first run auth process
            print(
                "Visit %s and authorize against the account you wish to use" %
                self.authorize_url())
            auth_code = raw_input('Enter authorization code: ')
            if self.first_access_token(auth_code) and self.token is not None:
                self.dump_token()
            else:
                logger.error(
                    "Failed to authorize with the supplied client_id/client_secret/auth_code..."
                )
                return False
        else:
            self.token = utils.load_json(self.token_path)

        # cache file
        self.cache = SqliteDict(self.cache_path,
                                tablename='cache',
                                encode=json.dumps,
                                decode=json.loads,
                                autocommit=False)
        return True

    def authorize_url(self):
        payload = {
            'client_id': self.cfg['GDRIVE']['CLIENT_ID'],
            'redirect_uri': 'urn:ietf:wg:oauth:2.0:oob',
            'response_type': 'code',
            'access_type': 'offline',
            'scope': 'https://www.googleapis.com/auth/drive'
        }
        url = 'https://accounts.google.com/o/oauth2/v2/auth?' + urlencode(
            payload)
        return url

    def first_access_token(self, auth_code):
        logger.info("Requesting access token for auth code %r", auth_code)
        payload = {
            'code': auth_code,
            'client_id': self.cfg['GDRIVE']['CLIENT_ID'],
            'client_secret': self.cfg['GDRIVE']['CLIENT_SECRET'],
            'grant_type': 'authorization_code',
            'redirect_uri': 'urn:ietf:wg:oauth:2.0:oob',
        }
        success, resp, data = self._make_request(
            'https://www.googleapis.com/oauth2/v4/token',
            data=payload,
            headers={},
            request_type='post')
        if success and resp.status_code == 200:
            logger.info("Retrieved first access token!")
            self.token = data
            self.token['page_token'] = ''
            return True
        else:
            logger.error("Error retrieving first access_token:\n%s", data)
            return False

    def refresh_access_token(self):
        logger.debug("Renewing access token...")
        payload = {
            'refresh_token': self.token['refresh_token'],
            'client_id': self.cfg['GDRIVE']['CLIENT_ID'],
            'client_secret': self.cfg['GDRIVE']['CLIENT_SECRET'],
            'grant_type': 'refresh_token',
        }
        success, resp, data = self._make_request(
            'https://www.googleapis.com/oauth2/v4/token',
            data=payload,
            headers={},
            request_type='post')
        if success and resp.status_code == 200 and 'access_token' in data:
            logger.info("Renewed access token!")

            refresh_token = self.token['refresh_token']
            page_token = self.token['page_token']
            self.token = data
            if 'refresh_token' not in self.token or not self.token[
                    'refresh_token']:
                self.token['refresh_token'] = refresh_token
            self.token['page_token'] = page_token
            self.dump_token()
            return True
        else:
            logger.error("Error renewing access token:\n%s", data)
            return False

    def get_changes_first_page_token(self):
        success, resp, data = self._make_request(
            'https://www.googleapis.com/drive/v3/changes/startPageToken',
            params={'supportsTeamDrives': self.cfg['GDRIVE']['TEAMDRIVE']})
        if success and resp.status_code == 200:
            if 'startPageToken' not in data:
                logger.error(
                    "Failed to retrieve startPageToken from returned startPageToken:\n%s",
                    data)
                return False
            self.token['page_token'] = data['startPageToken']
            self.dump_token()
            return True
        else:
            logger.error("Error retrieving first page token:\n%s", data)
            return False

    def get_changes(self):
        success, resp, data = self._make_request(
            'https://www.googleapis.com/drive/v3/changes',
            params={
                'pageToken':
                self.token['page_token'],
                'pageSize':
                1000,
                'includeRemoved':
                True,
                'includeTeamDriveItems':
                self.cfg['GDRIVE']['TEAMDRIVE'],
                'supportsTeamDrives':
                self.cfg['GDRIVE']['TEAMDRIVE'],
                'fields':
                'changes(file(md5Checksum,mimeType,modifiedTime,'
                'name,parents,teamDriveId,trashed),'
                'fileId,removed,teamDrive(id,name),'
                'teamDriveId),newStartPageToken,nextPageToken'
            })
        if success and resp.status_code == 200:
            # page token logic
            if data is not None and 'nextPageToken' in data:
                self.token['page_token'] = data['nextPageToken']
                self.dump_token()
            elif data is not None and 'newStartPageToken' in data:
                self.token['page_token'] = data['newStartPageToken']
                self.dump_token()
            else:
                logger.error(
                    "Unexpected response while polling for changes from page %s:\n%s",
                    str(self.token['page_token']), data)
                return False, data
            return True, data
        else:
            logger.error("Error getting page changes for page_token %r:\n%s",
                         self.token['page_token'], data)
            return False, data

    def get_id_metadata(self, item_id, teamdrive_id=None):
        # return cache from metadata if available
        cached_metadata = self._get_cached_metdata(item_id)
        if cached_metadata:
            return True, cached_metadata

        # does item_id match teamdrive_id?
        if teamdrive_id is not None and item_id == teamdrive_id:
            success, resp, data = self._make_request(
                'https://www.googleapis.com/drive/v3/teamdrives/%s' %
                str(item_id))
            if success and resp.status_code == 200 and 'name' in data:
                # we successfully retrieved this teamdrive info, lets place a mimeType key in the result
                # so we know it needs to be cached
                data['mimeType'] = 'application/vnd.google-apps.folder'
        else:
            # retrieve file metadata
            success, resp, data = self._make_request(
                'https://www.googleapis.com/drive/v3/files/%s' % str(item_id),
                params={
                    'supportsTeamDrives':
                    self.cfg['GDRIVE']['TEAMDRIVE'],
                    'fields':
                    'id,md5Checksum,mimeType,modifiedTime,name,parents,'
                    'trashed,teamDriveId'
                })
        if success and resp.status_code == 200:
            return True, data
        else:
            logger.error("Error retrieving metadata for item %r:\n%s", item_id,
                         data)
            return False, data

    def get_id_file_paths(self, item_id, teamdrive_id=None):
        file_paths = []
        added_to_cache = 0

        try:

            def get_item_paths(obj_id,
                               path,
                               paths,
                               new_cache_entries,
                               teamdrive_id=None):
                success, obj = self.get_id_metadata(obj_id, teamdrive_id)
                if not success:
                    return new_cache_entries

                teamdrive_id = teamdrive_id if 'teamDriveId' not in obj else obj[
                    'teamDriveId']

                # add item object to cache if we know its not from cache
                if 'mimeType' in obj:
                    # we know this is a new item fetched from the api, because the cache does not store this field
                    self.add_item_to_cache(
                        obj['id'], obj['name'],
                        [] if 'parents' not in obj else obj['parents'])
                    new_cache_entries += 1

                if path.strip() == '':
                    path = obj['name']
                else:
                    path = os.path.join(obj['name'], path)

                if 'parents' in obj and obj['parents']:
                    for parent in obj['parents']:
                        new_cache_entries += get_item_paths(
                            parent, path, paths, new_cache_entries,
                            teamdrive_id)

                if (not obj or 'parents' not in obj
                        or not obj['parents']) and len(path):
                    paths.append(path)
                    return new_cache_entries
                return new_cache_entries

            added_to_cache += get_item_paths(item_id, '', file_paths,
                                             added_to_cache, teamdrive_id)
            if added_to_cache:
                logger.debug("Dumping cache due to new entries!")
                self.dump_cache()

            if len(file_paths):
                return True, file_paths
            else:
                return False, file_paths

        except Exception:
            logger.exception("Exception retrieving filepaths for '%s': ",
                             item_id)

        return False, []

    # cache
    def add_item_to_cache(self, item_id, item_name, item_parents):
        if self.cfg['GDRIVE'][
                'SHOW_CACHE_MESSAGES'] and item_id not in self.cache:
            logger.info("Added '%s' to cache: %s", item_id, item_name)
        self.cache[item_id] = {'name': item_name, 'parents': item_parents}
        return

    def remove_item_from_cache(self, item_id):
        if self.cache.pop(item_id, None):
            return True
        return False

    # dump jsons
    def dump_token(self):
        utils.dump_json(self.token_path, self.token)
        return

    def dump_cache(self):
        self.cache.commit()
        return

    ############################################################
    # INTERNALS
    ############################################################

    # cache
    def _get_cached_metdata(self, item_id):
        if item_id in self.cache:
            return self.cache[item_id]
        return None

    # requests
    @backoff.on_predicate(backoff.expo,
                          lambda x: not x[0] and
                          ('error' in x[2] and 'code' in x[2]['error'] and x[2]
                           ['error']['code'] != 401),
                          max_tries=8)
    def _make_request(self,
                      url,
                      headers=None,
                      data=None,
                      params=None,
                      request_type='get'):
        refreshed_token = False

        while True:
            if headers is None and self.token:
                auth_headers = {
                    'Authorization': 'Bearer %s' % self.token['access_token'],
                }
            else:
                auth_headers = {}

            resp = None
            if request_type == 'get':
                resp = requests.get(
                    url,
                    params=params,
                    headers=headers if headers is not None else auth_headers,
                    timeout=30)
            elif request_type == 'post':
                resp = requests.post(
                    url,
                    data=data,
                    headers=headers if headers is not None else auth_headers,
                    timeout=30)
            else:
                return False, resp, {
                    'error': {
                        'code':
                        401,
                        'message':
                        'Invalid request_type was supplied to _make_request'
                    }
                }

            # response logic
            try:
                data = resp.json()
            except ValueError:
                logger.exception(
                    "Exception while decoding response from Google Drive for data:\n%s\nTraceback: ",
                    resp.text)
                return False, resp, {
                    'error': {
                        'code': resp.status_code,
                        'message':
                        'Failed to json decode Google Drive response'
                    }
                }

            if 'error' in data and 'code' in data['error'] and (
                    'message' in data['error']
                    and 'Invalid Credentials' in data['error']['message']):
                # the token has expired.
                if not refreshed_token:
                    refreshed_token = True
                    self.refresh_access_token()
                    continue
                else:
                    # attempt was already made to refresh token
                    return False, resp, data

            if resp.status_code == 200:
                return True, resp, data
            else:
                return False, resp, data
Exemple #52
0
class Tree_control(object):
    """列表加载窗体"""
    def __init__(self, tree, eblog):
        self.newroot = tk.Toplevel()
        self.newroot.title('加载列表')
        self.newroot.iconbitmap("favicon.ico")
        self.newroot.wm_attributes('-topmost', 1)
        win_width = self.newroot.winfo_screenwidth()
        win_higth = self.newroot.winfo_screenheight()
        width_adjust = (win_width - 400) / 2
        higth_adjust = (win_higth - 250) / 2
        self.newroot.geometry("%dx%d+%d+%d" %
                              (400, 250, width_adjust, higth_adjust))

        # 进度条
        self.__showFlag = True
        self.__width = 300
        self.__heigth = 20
        self.__sleep = 0
        self.bar = ttk.Progressbar(self.newroot,
                                   length=self.__width,
                                   mode="indeterminate",
                                   orient=tk.HORIZONTAL)
        self.bar.pack(expand=True)
        self.bar.start(10)

        # 提示内容
        self.content2 = tk.Label(self.newroot,
                                 text="正在加载列表中,请不要中断操作,请耐心等待......")
        self.content2.place(
            x=50,
            y=30,
        )
        self.content = tk.Label(self.newroot, text="")
        self.content.place(
            x=50,
            y=60,
        )
        self.eblog = eblog
        self.tree = tree
        self.mydict = SqliteDict('./my_db.sqlite', autocommit=True)

        # 开启处理线程
        self.p = Thread(target=self.add_item)
        self.p.setDaemon(True)
        self.p.start()

        # 点击关闭右上角
        self.newroot.protocol("WM_DELETE_WINDOW", self.close)

    # 加载item
    def add_item(self):
        len_items = len(sorted(self.mydict.iteritems()))
        i = 0
        for key, value in sorted(self.mydict.iteritems()):
            i = i + 1
            self.content.config(text="当前正处理:第" + str(i) + "个,共有" +
                                str(len_items) + "个链接")
            self.tree.insert("",
                             0,
                             iid=value[4],
                             values=(value[0], value[1], value[2], value[3]))
        self.close()
        return 1

    def close(self):
        self.newroot.destroy()
class SqliteRecorder(BaseRecorder):
    """ Recorder that saves cases in an SQLite dictionary.

    Args
    ----
    sqlite_dict_args : dict
        Dictionary lf any additional arguments for the SQL db.

    Options
    -------
    options['record_metadata'] :  bool(True)
        Tells recorder whether to record variable attribute metadata.
    options['record_unknowns'] :  bool(True)
        Tells recorder whether to record the unknowns vector.
    options['record_params'] :  bool(False)
        Tells recorder whether to record the params vector.
    options['record_resids'] :  bool(False)
        Tells recorder whether to record the ressiduals vector.
    options['record_derivs'] :  bool(True)
        Tells recorder whether to record derivatives that are requested by a `Driver`.
    options['includes'] :  list of strings
        Patterns for variables to include in recording.
    options['excludes'] :  list of strings
        Patterns for variables to exclude in recording (processed after includes).
    """
    def __init__(self, out, **sqlite_dict_args):
        super(SqliteRecorder, self).__init__()

        if MPI and MPI.COMM_WORLD.rank > 0:
            self._open_close_sqlitedict = False
        else:
            self._open_close_sqlitedict = True

        if self._open_close_sqlitedict:
            sqlite_dict_args.setdefault('autocommit', True)
            self.out = SqliteDict(filename=out,
                                  flag='n',
                                  tablename='openmdao',
                                  **sqlite_dict_args)
            self.out_derivs = SqliteDict(filename=out,
                                         flag='w',
                                         tablename='openmdao_derivs',
                                         **sqlite_dict_args)

        else:
            self.out = None

    def record_metadata(self, group):
        """Stores the metadata of the given group in a sqlite file using
        the variable name for the key.

        Args
        ----
        group : `System`
            `System` containing vectors
        """

        params = group.params.iteritems()
        #resids = group.resids.iteritems()
        unknowns = group.unknowns.iteritems()

        data = OrderedDict([
            ('format_version', format_version),
            ('Parameters', dict(params)),
            ('Unknowns', dict(unknowns)),
        ])

        self.out['metadata'] = data

    def record_iteration(self, params, unknowns, resids, metadata):
        """
        Stores the provided data in the sqlite file using the iteration
        coordinate for the key.

        Args
        ----
        params : dict
            Dictionary containing parameters. (p)

        unknowns : dict
            Dictionary containing outputs and states. (u)

        resids : dict
            Dictionary containing residuals. (r)

        metadata : dict, optional
            Dictionary containing execution metadata (e.g. iteration coordinate).
        """

        data = OrderedDict()
        iteration_coordinate = metadata['coord']
        timestamp = metadata['timestamp']

        group_name = format_iteration_coordinate(iteration_coordinate)

        data['timestamp'] = timestamp
        data['success'] = metadata['success']
        data['msg'] = metadata['msg']

        if self.options['record_params']:
            data['Parameters'] = self._filter_vector(params, 'p',
                                                     iteration_coordinate)

        if self.options['record_unknowns']:
            data['Unknowns'] = self._filter_vector(unknowns, 'u',
                                                   iteration_coordinate)

        if self.options['record_resids']:
            data['Residuals'] = self._filter_vector(resids, 'r',
                                                    iteration_coordinate)

        self.out[group_name] = data

    def record_derivatives(self, derivs, metadata):
        """Writes the derivatives that were calculated for the driver.

        Args
        ----
        derivs : dict or ndarray depending on the optimizer
            Dictionary containing derivatives

        metadata : dict, optional
            Dictionary containing execution metadata (e.g. iteration coordinate).
        """

        data = OrderedDict()
        iteration_coordinate = metadata['coord']
        timestamp = metadata['timestamp']

        group_name = format_iteration_coordinate(iteration_coordinate)

        data['timestamp'] = timestamp
        data['success'] = metadata['success']
        data['msg'] = metadata['msg']
        data['Derivatives'] = derivs

        self.out_derivs[group_name] = data

    def close(self):
        """Closes `out`"""

        if self._open_close_sqlitedict:
            if self.out is not None:
                self.out.close()
                self.out = None
            if self.out_derivs is not None:
                self.out_derivs.close()
                self.out_derivs = None
Exemple #54
0
def file_reader_generator(file_object):
    while True:
        data = file_object.readline()
        if not data:
            break
        yield data


print('Loading claimToDocsDict')
claimToDocsDict_f = open('claimToDocsDict_train.pickle', 'rb')
claimToDocsDict = pickle.load(claimToDocsDict_f)
claimToDocsDict_f.close()

print('Loading Claims')
training_db = SqliteDict('training_db.sqlite', decode=decompress_set)

print('Loading wiki corpus')
conn = sqlite3.connect('wiki_corpus.db')
c = conn.cursor()


def flatten_list(lst):
    flattened = [item for nstd in lst for item in nstd]
    return flattened


translator = str.maketrans('', '', string.punctuation)


def tokenise_line(line):
 def __init__(self):
     SqliteDict.__init__(self,
                         filename=ActiniaConfig.GRAPH_DB,
                         autocommit=True)
Exemple #56
0
 def load(self, ctx):
     self.guild_configs = SqliteDict('./guild_configs.sqlite',
                                     autocommit=True)
     self.signups = SqliteDict('./signups.sqlite', autocommit=True)
import os
import pandas as pd
from math import inf
from sqlitedict import SqliteDict
from statsmodels.tsa.ar_model import AutoReg

cache = SqliteDict('precompute.db', autocommit=True)
# A US manufacturer buys raw materials in multiple currencies
purchases = pd.read_excel('Purchases.xlsx')

# For each of those currencies, find the best model to forecast prices
best_model = {}
for currency in purchases.currency:
    print('Currency', currency)
    data = pd.read_excel(f'{currency}.xlsx')
    data = data[data[currency] > 0]
    best_aic, best_fit, best_lags = inf, None, None
    check_lags = cache.get(
        currency, (3, 5, 7, 10, 14, 28, 60, 90, 120, 183, 365, 730, 1095))
    for lags in check_lags:
        print('    Lags', lags)
        model = AutoReg(data[currency], lags=lags)
        fit = model.fit()
        if fit.aic < best_aic:
            best_aic, best_fit, best_lags = fit.aic, fit, lags
    cache[currency] = (best_lags, )
    best_model[currency] = best_fit

# Estimate next month's price increase assuming the same volume as today
forecasted_value = 0
for index, row in purchases.iterrows():
Exemple #58
0
# Creates a sqlite for each category

from sqlitedict import SqliteDict

splits = [i*10000000 for i in range(0, 8)]
source = './../data/sqlite/split_texts/'
path = "./../data/sqlite/community_texts/"
actual_category = "none"
category_dict = SqliteDict(f"{path}AL.sqlite", tablename="value", journal_mode="OFF")
text_dict = SqliteDict(f"{source}text_dict_{0}.sqlite", tablename="value", flag="r")

c = 0
for num in splits:
	category_dict.commit()
	text_dict.close()
	text_dict = SqliteDict(f"{source}text_dict_{num}.sqlite", tablename="value", flag="r")

	print(num)
	for id_c, value in text_dict.items():
		if value["category"] != actual_category:
			category_dict.commit()
			category_dict.close()
			category_dict = SqliteDict(f"{path}{value['category']}.sqlite", tablename="value", journal_mode="OFF")
			category_dict[id_c] = value

category_dict.commit()
category_dict.close()
Exemple #59
0
def init_db(base_path):
    mydict = SqliteDict(base_path, autocommit=True)
    return mydict