def migrate(muri, dbname, huri): "Migrate data from MongoDB (muri) to HDFS (huri)" mstg = MongoStorage(muri, dbname) hstg = HdfsStorage(huri) # read data from MongoDB query = {'stype': mstg.stype} mdocs = mstg.read(query) mids = [d['wmaid'] for d in mdocs] # do nothing if no documents is found if not len(mdocs): return # store data to HDFS wmaid = hstg.write(mdocs) # read data from HDFS hdocs = hstg.read(wmaid) # now we can compare MongoDB docs with HDFS docs, a la cross-check for mdoc, hdoc in zip(mdocs, hdocs): # drop WMArchive keys for key in ['stype', 'wmaid']: if key in mdoc: del mdoc[key] if key in hdoc: del hdoc[key] if mdoc != hdoc: print("ERROR", mdoc, hdoc) sys.exit(1) # update status attributes of docs in MongoDB query = {'$set': {'stype': hstg.stype}} mstg.update(mids, query)
def migrate(muri, huri): "Migrate data from MongoDB (muri) to HDFS (huri)" mstg = MongoStorage(muri) hstg = HdfsStorage(huri) # read data from MongoDB query = {'stype': mstg.stype} mdocs = mstg.read(query) mids = [d['wmaid'] for d in mdocs] # do nothing if no documents is found if not len(mdocs): return # store data to HDFS wmaid = hstg.write(mdocs) # read data from HDFS hdocs = hstg.read(wmaid) # now we can compare MongoDB docs with HDFS docs, a la cross-check for mdoc, hdoc in zip(mdocs, hdocs): # drop WMArchive keys for key in ['stype', 'wmaid']: if key in mdoc: del mdoc[key] if key in hdoc: del hdoc[key] if mdoc != hdoc: print("ERROR", mdoc, hdoc) sys.exit(1) # update status attributes of docs in MongoDB query = {'$set' : {'stype': hstg.stype}} mstg.update(mids, query)
def write(fin, huri): "Write fiven file into HDFS" hstg = HdfsStorage(huri) data = open(fin).read() path = huri.split(':', 1)[-1] hstg.dump(data, path)