Ejemplo n.º 1
0
def build_master_index(cache=False):
    master_index = dict()
    size = IndexEntry.size()
    for node in localnode.nodes():
        # print("Processing %s_index" % node)
        node_index_file = os.path.join(GPFS_STORAGE, "%s_index" % node)
        with open(node_index_file, 'r') as f:
            while True:
                chunk = f.read(size)
                if chunk == '':
                    break
                index_entry = IndexEntry.unpack(chunk)
                strid = str(index_entry.id)
                if strid not in master_index:
                    master_index[strid] = []

                index_content = (index_entry.index,
                     index_entry.offset,
                     index_entry.chunk_size)
                master_index[strid].append(index_content)

    if cache:
        print("putting it to memcache")
        for key in master_index:
            memcached.set(key, master_index[key])
        print("done")
    return master_index
Ejemplo n.º 2
0
def process():
    print("Starting up...")
    offset = localnode.index_offset
    nnodes = len( localnode.nodes() )
    words_index = build_master_index()
    n = -1
    new_index = dict()
    print("Processing hash by hash...")
    it = gen_files()
    outfile, outfile_name = it.next()
    for hash32 in words_index:
        n += 1
        if n % nnodes != offset:
            continue

        data = load_hash32(hash32, words_index)
        if len(data) == 0:
            continue
        # hex decimal
        for word, content in data.iteritems():
            print("writing data for: %s" % word)
            start_pos, end_pos, has_space = write_data_main(outfile, word, content)
            new_index[word] = {
              "file" : outfile_name,
              "start" : start_pos,
              "chunk_size" : end_pos - start_pos
            }
            print("Bytes: %d" % end_pos)
            if not has_space:
                print("%s is full" % outfile)
                outfile, outfile_name = it.send(outfile)
                print("moving on to %s" % outfile)

    it.send(outfile)

    return new_index