Example #1
0
    def __init__(self):
        self.filename = "%s/corpus/ix/h5.ix" % BASE_PATH
        self.open_ix(create=True)

        self.p = Props()
        self.p.backpressure = 100
        self.p.exchange = "wikiindex"
        self.queue = Consumer(self.p)
Example #2
0
    def __init__(self):
        self.filename = "%s/corpus/ix/h5.ix" % BASE_PATH
        self.open_ix(create = True)

        self.p = Props()
        self.p.backpressure = 100
        self.p.exchange = "wikiindex"
        self.queue = Consumer(self.p)
Example #3
0
class Index(object):
    group = None
    table = None
    entry = None

    def __init__(self):
        self.filename = "%s/corpus/ix/h5.ix" % BASE_PATH
        self.open_ix(create=True)

        self.p = Props()
        self.p.backpressure = 100
        self.p.exchange = "wikiindex"
        self.queue = Consumer(self.p)

    def open_ix(self, create=False):
        self.h5 = openFile(self.filename, mode="a", title="Wiki Index")
        if create:
            self.group = self.h5.createGroup("/", 'index', 'Wiki Index')
            self.table = self.h5.createTable(self.group, 'index', IndexEntry,
                                             "Wiki Index")
        else:
            self.group = self.h5.root.index
            self.table = self.h5.root.index.index

        self.entry = self.table.row

    def consume(self):
        self.queue.start(self.on_message, self.p.exchange)

    def on_message(self, message):
        msg = pickle.loads(message)
        if type(msg) == tuple:
            cat, filename, start, end = msg

            print cat

            for c in cat:
                self.entry['cat'] = c.encode('ascii', "ignore")
                self.entry['filename'] = filename.encode("ascii", "ignore")
                self.entry['start'] = start
                self.entry['length'] = end
                self.entry.append()
                print c
        else:
            self.h5.close()
            self.open_ix()
Example #4
0
class Index(object):
    group = None
    table = None
    entry = None

    def __init__(self):
        self.filename = "%s/corpus/ix/h5.ix" % BASE_PATH
        self.open_ix(create = True)

        self.p = Props()
        self.p.backpressure = 100
        self.p.exchange = "wikiindex"
        self.queue = Consumer(self.p)

    def open_ix(self, create = False):
        self.h5 = openFile(self.filename, mode = "a", title = "Wiki Index")
        if create:
            self.group = self.h5.createGroup("/", 'index', 'Wiki Index')
            self.table = self.h5.createTable(self.group, 'index', IndexEntry, "Wiki Index")
        else:
            self.group = self.h5.root.index
            self.table = self.h5.root.index.index

        self.entry = self.table.row

    def consume(self):
        self.queue.start(self.on_message, self.p.exchange)

    def on_message(self, message):
        msg = pickle.loads(message)
        if type(msg) == tuple:
            cat, filename, start, end = msg

            print cat

            for c in cat:
                self.entry['cat'] = c.encode('ascii', "ignore")
                self.entry['filename'] = filename.encode("ascii", "ignore")
                self.entry['start'] = start
                self.entry['length'] = end
                self.entry.append()
                print c
        else:
            self.h5.close()
            self.open_ix()