Esempio n. 1
0
class MongoSessionFactory(object):

    def __init__(self, database, collection='sessions', ttl=None, **kwargs):
        self.collection = MongoClient(**kwargs)[database][collection]
        self.collection.drop_indexes()
        if ttl is not None:
            self.collection.ensure_index('last_access', expireAfterSeconds=ttl)

    def load(self, id=None):
        session = Session()
        try:
            doc = self.collection.find_one({'_id': ObjectId(id)})
            if doc is not None:
                session.data = doc['data']
                session.id = id
        except:
            pass
        return session

    def save(self, session):
        doc = {
            'data': session.data,
            'last_access': datetime.utcnow(),
        }
        if session.id is not None:
            doc['_id'] = ObjectId(session.id)
        self.collection.save(doc)
        return str(doc['_id'])
Esempio n. 2
0
class cache_session(Session):
    def __init__(self, db_uri, dbname='tmp', colname='cache', expire_time=None, disabled=False):
        self.col = MongoClient(db_uri)[dbname][colname]
        self.disabled = disabled
        if expire_time:
            if not self.col.index_information().get('cache_time'+'_-1'):
                self.col.create_index([("cache_time", DESCENDING)],expireAfterSeconds=expire_time)
            else:
                self.col.drop_indexes()
                self.col.create_index([("cache_time", DESCENDING)],expireAfterSeconds=expire_time)
        super(cache_session, self).__init__()

    def request(self, method, url,
                params=None,
                data=None,
                headers=None,
                cookies=None,
                files=None,
                auth=None,
                timeout=None,
                allow_redirects=True,
                proxies=None,
                hooks=None,
                stream=None,
                verify=None,
                cert=None,
                json=None):
        req = (
            method.upper(),
            url,
            headers,
            files,
            data or {},
            json,
            params or {},
            auth,
            cookies,
            hooks,
        )
        req1 = {
            'method': method.upper(),
            'url': url,
            'headers': headers,
            'files': files,
            'data': data or {},
            'json': json,
            'params': params or {},
            'auth': auth,
            'cookies': cookies,
            'hooks': hooks,
        }
        req_to_str = '&'.join("%s=%s" % (k, v) for k, v in req1.items())
        key = sha1(req_to_str).hexdigest()
        cached_one = self.col.find_one({'key': key})
        if cached_one and not self.disabled:
            print 'cached'
            return cached_one['html']
        else:
            online_req = super(cache_session, self).request(method, url,
                                                 params=None,
                                                 data=None,
                                                 headers=None,
                                                 cookies=None,
                                                 files=None,
                                                 auth=None,
                                                 timeout=None,
                                                 allow_redirects=True,
                                                 proxies=None,
                                                 hooks=None,
                                                 stream=None,
                                                 verify=None,
                                                 cert=None,
                                                 json=None
                                                 )

            html = online_req.text
            self.col.insert_one({'key': key, 'html': html, 'cache_time': datetime.utcnow()})
            return html
		return json.load(fp)


if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument("inputPath", type=str, help="path to a wiktionary dump")
	# parser.add_argument("outputPath", type=str, help="path to the outputed json file")

	args = parser.parse_args()

	inputPath = args.inputPath
	# outputPath = args.outputPath
	# extractAndDump(inputPath, outputPath)

	collection = MongoClient()['wiktionary'][os.path.basename(inputPath).split('.')[0]]
	collection.drop()
	collection.drop_indexes()

	collection.create_indexes([
		IndexModel([('language', 1), ('synset', 1)])
	])
	data = load_json(inputPath)

	for i, (lang, v) in enumerate(data.items()):
		for synset, doc in tqdm(v.items(), "Importing {} ({}/{})".format(lang, i, len(data))):
			collection.insert_one({
				'language': lang,
				'synset': synset,
				**doc
			})
Esempio n. 4
0
class cache_session(Session):
    def __init__(self, db_uri, dbname='tmp', colname='cache', expire_time=None, disabled=False, url_only=True):
        self.col = MongoClient(db_uri)[dbname][colname]
        self.disabled = disabled
        self.url_only = url_only
        if expire_time:
            if not self.col.index_information().get('cache_time'+'_-1'):
                self.col.create_index([("cache_time", DESCENDING)],expireAfterSeconds=expire_time)
            else:
                self.col.drop_indexes()
                self.col.create_index([("cache_time", DESCENDING)],expireAfterSeconds=expire_time)
        super(cache_session, self).__init__()

    def request(self, method, url,
                params=None,
                data=None,
                headers=None,
                cookies=None,
                files=None,
                auth=None,
                timeout=None,
                allow_redirects=True,
                proxies=None,
                hooks=None,
                stream=None,
                verify=None,
                cert=None,
                json=None):
        req = (
            method.upper(),
            url,
            headers,
            files,
            data or {},
            json,
            params or {},
            auth,
            cookies,
            hooks,
        )
        if self.url_only:
            req1 = {
                'url': url,
            }
        else:
            req1 = {
                'method': method.upper(),
                'url': url,
                'headers': headers,
                'files': files,
                'data': data or {},
                'json': json,
                'params': params or {},
                'auth': auth,
                'cookies': cookies,
                'hooks': hooks,
            }
        req_to_str = '&'.join("%s=%s" % (k, v) for k, v in req1.items())
        key = sha1(req_to_str).hexdigest()
        cached_one = self.col.find_one({'key': key})
        if cached_one and not self.disabled:
            print 'cached'
            return cached_one['html']
        else:
            online_req = super(cache_session, self).request(method, url,
                                                 params=None,
                                                 data=None,
                                                 headers=None,
                                                 cookies=None,
                                                 files=None,
                                                 auth=None,
                                                 timeout=None,
                                                 allow_redirects=True,
                                                 proxies=None,
                                                 hooks=None,
                                                 stream=None,
                                                 verify=None,
                                                 cert=None,
                                                 json=None
                                                 )

            html = online_req.text
            self.col.insert_one({'key': key, 'html': html, 'cache_time': datetime.utcnow()})
            return html