def test_replication_close_open(self): class FS(xodb.Schema): language = 'en' x = xodb.Integer class F(object): x = 9 def _writer(): db = xodb.temp() db.map(F, FS) return db writer = _writer() f = F() writer.add(f) path = writer.db_path writer.flush() reader = xodb.open(path, writable=False, replicated=True) assert len(reader) == 1 g = F() writer.add(g) assert len(reader) == 1 writer.flush() assert len(reader) == 2
def wikit(): num = 0 if len(sys.argv) > 1: db = xodb.open('test_et4', writable=False) last = db.backend.get_doccount() with bz2.BZ2File('enwiki-latest-pages-articles.xml.bz2') as f: current = None batch = [] for event, element in etree.iterparse(f, events=('start', 'end')): if event == 'start': if element.tag.endswith('page'): current = Page() redirect = False if element.tag.endswith('title'): current.title = element.text if element.tag.endswith('text'): if element.text: current.text = element.text if element.tag.endswith('redirect'): redirect = True if event == 'end': if element.tag.endswith('page'): # num += 1 # if num < last: # print 'Skipping ', num, " ", last, " ", current.title # element.clear() # continue if current.text and current.title and not redirect: if not current.title.startswith( ('Template:', 'Category:', 'File:')): print "Pumping ", current.title try: batch.append(current) if len(batch) > BATCH_SIZE: source.send(dumps(batch)) batch = [] except Exception: log.exception('wtf') element.clear()
def wikit(): num = 0 if len(sys.argv) > 1: db = xodb.open('test_et4', writable=False) last = db.backend.get_doccount() with bz2.BZ2File('enwiki-latest-pages-articles.xml.bz2') as f: current = None batch = [] for event, element in etree.iterparse(f, events=('start', 'end')): if event == 'start': if element.tag.endswith('page'): current = Page() redirect = False if element.tag.endswith('title'): current.title = element.text if element.tag.endswith('text'): if element.text: current.text = element.text if element.tag.endswith('redirect'): redirect = True if event == 'end': if element.tag.endswith('page'): # num += 1 # if num < last: # print 'Skipping ', num, " ", last, " ", current.title # element.clear() # continue if current.text and current.title and not redirect: if not current.title.startswith(('Template:', 'Category:', 'File:')): print "Pumping ", current.title try: batch.append(current) if len(batch) > BATCH_SIZE: source.send(dumps(batch)) batch = [] except Exception: log.exception('wtf') element.clear()
def run(section, worker_cls): import xodb from xodb.tools.signals import register_signals register_signals() name = os.environ.get('SUPERVISOR_PROCESS_NAME', 'tester') from ConfigParser import ConfigParser if len(sys.argv) < 1: print "usage: %s config_file" % sys.argv[0] sys.exit(-1) config = ConfigParser() config.read(sys.argv[1]) worker_url = config.get(section, 'worker_url') log_file = config.get(section, 'log_file') db_path = config.get(section, 'db_path') logging.basicConfig(filename=log_file % name, level=logging.DEBUG) db = xodb.open(db_path, writable=False) w = worker_cls(name, worker_url, db) logging.debug('Running worker on %s' % worker_url) w.run()
import atexit import logging import zmq import xodb import schemas from cPickle import loads db = xodb.open('test_et4', writable=True) db.map(schemas.Page, schemas.PageSchema) atexit.register(db.flush) logging.basicConfig(level=logging.DEBUG) ctx = zmq.Context() sink = ctx.socket(zmq.PULL) sink.setsockopt(zmq.RCVHWM, 1000) sink.bind('tcp://127.0.0.1:9123') log = logging.getLogger(__name__) while True: batch = loads(sink.recv()) for o in batch: print o.title try: db.add(o) except Exception: log.exception('error')
import bz2 import xodb import logging from lxml import etree from schemas import Page, PageSchema log = logging.getLogger(__name__) db = xodb.open('/home/michel/xap/ms_test2') db.map(Page, PageSchema) redirects = {} with bz2.BZ2File('enwiki-latest-pages-articles.xml.bz2') as f: current = None for event, element in etree.iterparse(f, events=('start', 'end')): if event == 'start': if element.tag.endswith('page'): current = Page() redirect = False if element.tag.endswith('title'): current.title = element.text if element.tag.endswith('text'): if element.text: current.text = element.text if element.tag.endswith('redirect'): redirect = True if event == 'end': if element.tag.endswith('page'): if current.text and current.title and not redirect: try:
import bz2 import xodb import logging from lxml import etree from schemas import Page, PageSchema log = logging.getLogger(__name__) db = xodb.open('/home/michel/xap/ms_test2') db.map(Page, PageSchema) redirects = {} with bz2.BZ2File('enwiki-latest-pages-articles.xml.bz2') as f: current = None for event, element in etree.iterparse(f, events=('start', 'end')): if event == 'start': if element.tag.endswith('page'): current = Page() redirect = False if element.tag.endswith('title'): current.title = element.text if element.tag.endswith('text'): if element.text: current.text = element.text if element.tag.endswith('redirect'): redirect = True if event == 'end':
from pprint import pprint from collections import OrderedDict import xodb from schemas import Page, PageSchema import json db = xodb.open('test_et4', writable=False) db.map(Page, PageSchema) def e(q, l, count=10, limit=1025): print json.dumps(db.expand(q, [(i, count, limit, 1, 1) for j, i in enumerate(l)], language='en'), indent=4) def q(terms, limit=20): # is it an entity? terms = " ".join(terms.strip().lower().split()) entity_term = 'entity:"%s"' % terms if db.estimate(entity_term) > 20: for r in db.query(entity_term, language='en', limit=limit): print r.title