def main(): parser = optparse.OptionParser('usage: %prog DATABASE') parser.add_option("-m", "--make", action="store_true", dest="make", default=False, help="Remake the shared object files (calls make).") parser.add_option("-p", "--put", action="store_true", dest="put", default=False, help="Put the the shared objects into HDFS") parser.add_option("-n", "--noact", action="store_true", dest="noact", default=False, help="just print queries, don't execute over impala") (options, args) = parser.parse_args() if len(args) < 1: parser.print_usage() return # compile the lib*.so files if options.make: doit("make -B all") # put them into HDFS so impala can load them if options.put: for lb, tar in libs: doit('hadoop fs -rm /user/cloudera/%s' % tar, mayfail=True) doit('hadoop fs -mkdir -p /user/cloudera') doit('hadoop fs -put %s /user/cloudera/%s' % (lb, tar)) # register the functions with impala for q in queries: print q if not options.noact: iutil.impala_shell_exec(queries, args[0])
def main(): parser = optparse.OptionParser('usage: %prog DATABASE OUTPUT_TALBE_NAME') parser.add_option("-r", "--nrows", dest="n", default=10, type=int, help="number of examples (rows) to create", metavar="EXAMPPLES") parser.add_option("-c", "--mcols", dest="m", default=3, type=int, help="number of features (columns) to create", metavar="FEATURES") parser.add_option("-n", "--noact", action="store_true", dest="noact", default=False, help="just print queries, don't execute over impala") (options, args) = parser.parse_args() if len(args) != 2: parser.print_usage() return n = options.n m = options.m name = args[1] db = args[0] sol, qrs = generate_all_queries(name, m, n) for q in qrs: print q if not options.noact: iutil.impala_shell_exec(qrs, database=db)
def main(): parser = optparse.OptionParser('usage: %prog DATABASE') parser.add_option("-m", "--make", action="store_true", dest="make", default=False, help="Remake the shared object files (calls make).") parser.add_option("-p", "--put", action="store_true", dest="put", default=False, help="Put the the shared objects into HDFS") parser.add_option("-n", "--noact", action="store_true", dest="noact", default=False, help="just print queries, don't execute over impala") parser.add_option("-o", "--path", default='/user/cloudera/lib', help="abs path (dir) on HDFS to put the shared objects") (options, args) = parser.parse_args() if len(args) < 1: parser.print_usage() return # compile the lib*.so files if options.make: doit("make -B all") # put them into HDFS so impala can load them if options.put: for lb, tar in libs: doit('hadoop fs -rm %s' % os.path.join(options.path, tar), mayfail=True) doit('hadoop fs -mkdir -p %s' % options.path) doit('hadoop fs -put %s %s' % (lb, os.path.join(options.path, tar))) # register the functions with impala bound_queries = [] for q in queries: try: bound_query = q % options.path bound_queries.append(bound_query) print bound_query except TypeError: bound_queries.append(q) print q if not options.noact: iutil.impala_shell_exec(bound_queries, args[0])
def train_svm(mod_table=None, dat_table=None, label=None, arr=None, step=None, mu=None, decay=None, epochs=None, database=None, noact=False): qry = [] qry.append(iutil.make_model_table(mod_table)) for i in xrange(1, epochs+1): qry.append(svm_epoch(mod_table, dat_table, label, arr, i, step=step, mu=mu)) step = step * decay # qry.append(svm_loss(mod_table, dat_table, label, arr, epoch=epochs)) for q in qry: print q if not noact: iutil.impala_shell_exec(qry, database=database)
def train_svm(mod_table=None, dat_table=None, label=None, arr=None, step=None, mu=None, decay=None, epochs=None, database=None, noact=False): qry = [] qry.append(iutil.make_model_table(mod_table)) for i in xrange(1, epochs + 1): qry.append( svm_epoch(mod_table, dat_table, label, arr, i, step=step, mu=mu)) step = step * decay # qry.append(svm_loss(mod_table, dat_table, label, arr, epoch=epochs)) for q in qry: print q if not noact: iutil.impala_shell_exec(qry, database=database)
def main(): # argument parsing parser = optparse.OptionParser( 'usage: %prog LABEL_COL FEATURE_COL [FEATURE_COL ...]') parser.add_option("-b", "--db", dest="database", default=None, help="the database which holds data table", metavar="DB") parser.add_option("-t", "--table", dest="table", default=None, help="data table to iterate over", metavar="TABLE") parser.add_option( "-y", "--history", dest="history", default='history', help="name of table to story iteratoin history (default history)", metavar="HIST") parser.add_option("-n", "--noact", action="store_true", dest="noact", default=False, help="just print queries, don't execute over impala") parser.add_option("-s", "--step", dest="step", default=0.1, type="float", help="step size for SGD (default 0.1)") parser.add_option("-d", "--decay", dest="decay", default=0.95, type="float", help="step size decay (default 0.95)") parser.add_option("-u", "--mu", dest="mu", default=0, type="float", help="regularizer weight (defualt 0)") parser.add_option("-e", "--epochs", dest="epochs", default=1, type="int", help="number of epochs to run (default 1)") (options, args) = parser.parse_args() # we need at least 1 label and 1 feature to train if len(args) < 2: parser.print_usage() return if options.database is None: print 'use --db to specify a database to use.' return if options.table is None: print 'use --table to specify the data table.' return # list of queries will submit to impala qry = [] mod_table = options.history dat_table = options.table step = options.step mu = options.mu label = args[0] # the query which creates an array out of the feature columns arr = 'toarray(%s)' % (', '.join( map(lambda f: '%s.%s' % (dat_table, f), args[1:]))) qry.append(iutil.make_model_table(mod_table)) # create all the queries for i in xrange(1, options.epochs + 1): qry.append( logr_epoch(mod_table, dat_table, label, arr, i, step=step, mu=mu)) step = step * options.decay # submit a query to to compute the loss # qry.append(logr_loss(mod_table, dat_table, label, arr, epoch=options.epochs)) # print the query for reference for q in qry: print q # submit the queries to impala if not options.noact: iutil.impala_shell_exec(qry, database=options.database)
def main(): # argument parsing parser = optparse.OptionParser('usage: %prog LABEL_COL FEATURE_COL [FEATURE_COL ...]') parser.add_option("-b", "--db", dest="database", default=None, help="the database which holds data table", metavar="DB") parser.add_option("-t", "--table", dest="table", default=None, help="data table to iterate over", metavar="TABLE") parser.add_option("-y", "--history", dest="history", default='history', help="name of table to story iteratoin history (default history)", metavar="HIST") parser.add_option("-n", "--noact", action="store_true", dest="noact", default=False, help="just print queries, don't execute over impala") parser.add_option("-s", "--step", dest="step", default=0.1, type="float", help="step size for SGD (default 0.1)") parser.add_option("-d", "--decay", dest="decay", default=0.95, type="float", help="step size decay (default 0.95)") parser.add_option("-u", "--mu", dest="mu", default=0, type="float", help="regularizer weight (defualt 0)") parser.add_option("-e", "--epochs", dest="epochs", default=1, type="int", help="number of epochs to run (default 1)") (options, args) = parser.parse_args() # we need at least 1 label and 1 feature to train if len(args) < 2: parser.print_usage() return if options.database is None: print 'use --db to specify a database to use.' return if options.table is None: print 'use --table to specify the data table.' return # list of queries will submit to impala qry = [] mod_table = options.history dat_table = options.table step = options.step mu = options.mu label = args[0] # the query which creates an array out of the feature columns arr = 'toarray(%s)' % (', '.join(map(lambda f: '%s.%s' % (dat_table, f), args[1:]))) qry.append(iutil.make_model_table(mod_table)) # create all the queries for i in xrange(1, options.epochs+1): qry.append(logr_epoch(mod_table, dat_table, label, arr, i, step=step, mu=mu)) step = step * options.decay # submit a query to to compute the loss # qry.append(logr_loss(mod_table, dat_table, label, arr, epoch=options.epochs)) # print the query for reference for q in qry: print q # submit the queries to impala if not options.noact: iutil.impala_shell_exec(qry, database=options.database)