Example #1
0
    def setUp(self):
        TPCCTestCase.setUp(self)

        config = RawConfigParser()
        configutil.setDefaultValues(config)

        self.designer = Designer(config, self.metadata_db, self.dataset_db)
        self.dc = self.designer.generateDesignCandidates(self.collections, self.workload)
        self.assertIsNotNone(self.dc)
        
        # Make sure that we don't have any invalid candidate keys
        for col_name in self.collections.iterkeys():
            for index_keys in self.dc.indexKeys[col_name]:
                for key in index_keys:
                    assert not key.startswith(constants.REPLACE_KEY_DOLLAR_PREFIX), \
                        "Unexpected candidate key '%s.%s'" % (col_name, key)
Example #2
0
## ==============================================
## main
## ==============================================
if __name__ == '__main__':
    aparser = argparse.ArgumentParser(description="CSV File Loader")
    aparser.add_argument('input', help='CSV Input Data Dump Directory')
    aparser.add_argument('--config', type=file, help='Path to %s configuration file' % constants.PROJECT_NAME)
    aparser.add_argument('--debug', action='store_true', help='Enable debug log messages.')
    args = vars(aparser.parse_args())
    if args['debug']: LOG.setLevel(logging.DEBUG)
    
    if not args['config']:
        LOG.error("Missing configuration file")
        print
        aparser.print_usage()
        sys.exit(1)
    LOG.debug("Loading configuration file '%s'" % args['config'])
    config = RawConfigParser()
    configutil.setDefaultValues(config)
    config.read(os.path.realpath(args['config'].name))
    
    db_host = config.get(configutil.SECT_MONGODB, 'host')
    db_name = config.get(configutil.SECT_MONGODB, 'dataset_db')
    for dataFile in glob.glob(os.path.join(args["input"], "*.json")):
        collection = os.path.basename(dataFile).replace(".csv", "")
        cmd = "mongoimport --host=%s --db %s --collection %s --file %s --type json" % (db_host, db_name, collection, dataFile)
        subprocess.check_call(cmd, shell=True)
        LOG.info("Loaded %s.%s", db_name, collection)
    ## FOR
## IF