Beispiel #1
0
def main():
  parser = optparse.OptionParser('usage: %prog DATABASE')

  parser.add_option("-m", "--make",
                        action="store_true", dest="make", default=False,
                                          help="Remake the shared object files (calls make).")
  parser.add_option("-p", "--put",
                        action="store_true", dest="put", default=False,
                                          help="Put the the shared objects into HDFS")
  parser.add_option("-n", "--noact",
                        action="store_true", dest="noact", default=False,
                                          help="just print queries, don't execute over impala")

  (options, args) = parser.parse_args()

  if len(args) < 1:
    parser.print_usage()
    return

  # compile the lib*.so files
  if options.make:
    doit("make -B all")

  # put them into HDFS so impala can load them
  if options.put:
    for lb, tar in libs:
      doit('hadoop fs -rm /user/cloudera/%s' % tar, mayfail=True)
      doit('hadoop fs -mkdir -p /user/cloudera')
      doit('hadoop fs -put %s /user/cloudera/%s' % (lb, tar))

  # register the functions with impala
  for q in queries:
    print q
  if not options.noact:
    iutil.impala_shell_exec(queries, args[0])
def main():
  parser = optparse.OptionParser('usage: %prog DATABASE OUTPUT_TALBE_NAME')
  parser.add_option("-r", "--nrows", dest="n", default=10, type=int,
                        help="number of examples (rows) to create", metavar="EXAMPPLES")
  parser.add_option("-c", "--mcols", dest="m", default=3,
      type=int,
                        help="number of features (columns) to create", metavar="FEATURES")
  parser.add_option("-n", "--noact",
                        action="store_true", dest="noact", default=False,
                                          help="just print queries, don't execute over impala")

  (options, args) = parser.parse_args()
  if len(args) != 2:
    parser.print_usage()
    return


  n = options.n
  m = options.m
  name = args[1]
  db = args[0]

  sol, qrs = generate_all_queries(name, m, n)

  for q in qrs:
    print q

  if not options.noact:
    iutil.impala_shell_exec(qrs, database=db)
Beispiel #3
0
def main():
    parser = optparse.OptionParser('usage: %prog DATABASE')

    parser.add_option("-m",
                      "--make",
                      action="store_true",
                      dest="make",
                      default=False,
                      help="Remake the shared object files (calls make).")
    parser.add_option("-p",
                      "--put",
                      action="store_true",
                      dest="put",
                      default=False,
                      help="Put the the shared objects into HDFS")
    parser.add_option("-n",
                      "--noact",
                      action="store_true",
                      dest="noact",
                      default=False,
                      help="just print queries, don't execute over impala")
    parser.add_option("-o",
                      "--path",
                      default='/user/cloudera/lib',
                      help="abs path (dir) on HDFS to put the shared objects")

    (options, args) = parser.parse_args()

    if len(args) < 1:
        parser.print_usage()
        return

    # compile the lib*.so files
    if options.make:
        doit("make -B all")

    # put them into HDFS so impala can load them
    if options.put:
        for lb, tar in libs:
            doit('hadoop fs -rm %s' % os.path.join(options.path, tar),
                 mayfail=True)
            doit('hadoop fs -mkdir -p %s' % options.path)
            doit('hadoop fs -put %s %s' %
                 (lb, os.path.join(options.path, tar)))

    # register the functions with impala
    bound_queries = []
    for q in queries:
        try:
            bound_query = q % options.path
            bound_queries.append(bound_query)
            print bound_query
        except TypeError:
            bound_queries.append(q)
            print q
    if not options.noact:
        iutil.impala_shell_exec(bound_queries, args[0])
Beispiel #4
0
def train_svm(mod_table=None, dat_table=None, label=None, arr=None, 
    step=None, mu=None, decay=None, epochs=None, database=None, noact=False):
  qry = []
  qry.append(iutil.make_model_table(mod_table))
  for i in xrange(1, epochs+1):
    qry.append(svm_epoch(mod_table, dat_table, label, arr, i, step=step, mu=mu))
    step = step * decay
#  qry.append(svm_loss(mod_table, dat_table, label, arr, epoch=epochs))

  for q in qry:
    print q
  if not noact:
    iutil.impala_shell_exec(qry, database=database)
Beispiel #5
0
def main():
  parser = optparse.OptionParser('usage: %prog DATABASE')

  parser.add_option("-m", "--make",
                    action="store_true", dest="make", default=False,
                    help="Remake the shared object files (calls make).")
  parser.add_option("-p", "--put",
                    action="store_true", dest="put", default=False,
                    help="Put the the shared objects into HDFS")
  parser.add_option("-n", "--noact",
                    action="store_true", dest="noact", default=False,
                    help="just print queries, don't execute over impala")
  parser.add_option("-o", "--path", default='/user/cloudera/lib',
                    help="abs path (dir) on HDFS to put the shared objects")

  (options, args) = parser.parse_args()

  if len(args) < 1:
    parser.print_usage()
    return

  # compile the lib*.so files
  if options.make:
    doit("make -B all")

  # put them into HDFS so impala can load them
  if options.put:
    for lb, tar in libs:
      doit('hadoop fs -rm %s' % os.path.join(options.path, tar), mayfail=True)
      doit('hadoop fs -mkdir -p %s' % options.path)
      doit('hadoop fs -put %s %s' % (lb, os.path.join(options.path, tar)))

  # register the functions with impala
  bound_queries = []
  for q in queries:
    try:
      bound_query = q % options.path
      bound_queries.append(bound_query)
      print bound_query
    except TypeError:
      bound_queries.append(q)
      print q
  if not options.noact:
    iutil.impala_shell_exec(bound_queries, args[0])
Beispiel #6
0
def train_svm(mod_table=None,
              dat_table=None,
              label=None,
              arr=None,
              step=None,
              mu=None,
              decay=None,
              epochs=None,
              database=None,
              noact=False):
    qry = []
    qry.append(iutil.make_model_table(mod_table))
    for i in xrange(1, epochs + 1):
        qry.append(
            svm_epoch(mod_table, dat_table, label, arr, i, step=step, mu=mu))
        step = step * decay


#  qry.append(svm_loss(mod_table, dat_table, label, arr, epoch=epochs))

    for q in qry:
        print q
    if not noact:
        iutil.impala_shell_exec(qry, database=database)
Beispiel #7
0
def main():
    # argument parsing
    parser = optparse.OptionParser(
        'usage: %prog LABEL_COL FEATURE_COL [FEATURE_COL ...]')
    parser.add_option("-b",
                      "--db",
                      dest="database",
                      default=None,
                      help="the database which holds data table",
                      metavar="DB")
    parser.add_option("-t",
                      "--table",
                      dest="table",
                      default=None,
                      help="data table to iterate over",
                      metavar="TABLE")
    parser.add_option(
        "-y",
        "--history",
        dest="history",
        default='history',
        help="name of table to story iteratoin history (default history)",
        metavar="HIST")
    parser.add_option("-n",
                      "--noact",
                      action="store_true",
                      dest="noact",
                      default=False,
                      help="just print queries, don't execute over impala")
    parser.add_option("-s",
                      "--step",
                      dest="step",
                      default=0.1,
                      type="float",
                      help="step size for SGD (default 0.1)")
    parser.add_option("-d",
                      "--decay",
                      dest="decay",
                      default=0.95,
                      type="float",
                      help="step size decay (default 0.95)")
    parser.add_option("-u",
                      "--mu",
                      dest="mu",
                      default=0,
                      type="float",
                      help="regularizer weight (defualt 0)")
    parser.add_option("-e",
                      "--epochs",
                      dest="epochs",
                      default=1,
                      type="int",
                      help="number of epochs to run (default 1)")

    (options, args) = parser.parse_args()

    # we need at least 1 label and 1 feature to train
    if len(args) < 2:
        parser.print_usage()
        return

    if options.database is None:
        print 'use --db to specify a database to use.'
        return
    if options.table is None:
        print 'use --table to specify the data table.'
        return

    # list of queries will submit to impala
    qry = []

    mod_table = options.history
    dat_table = options.table
    step = options.step
    mu = options.mu
    label = args[0]

    # the query which creates an array out of the feature columns
    arr = 'toarray(%s)' % (', '.join(
        map(lambda f: '%s.%s' % (dat_table, f), args[1:])))

    qry.append(iutil.make_model_table(mod_table))

    # create all the queries
    for i in xrange(1, options.epochs + 1):
        qry.append(
            logr_epoch(mod_table, dat_table, label, arr, i, step=step, mu=mu))
        step = step * options.decay

    # submit a query to to compute the loss
    # qry.append(logr_loss(mod_table, dat_table, label, arr, epoch=options.epochs))

    # print the query for reference
    for q in qry:
        print q

    # submit the queries to impala
    if not options.noact:
        iutil.impala_shell_exec(qry, database=options.database)
Beispiel #8
0
def main():
  # argument parsing
  parser = optparse.OptionParser('usage: %prog LABEL_COL FEATURE_COL [FEATURE_COL ...]')
  parser.add_option("-b", "--db", dest="database", default=None,
                        help="the database which holds data table", metavar="DB")
  parser.add_option("-t", "--table", dest="table", default=None,
                        help="data table to iterate over", metavar="TABLE")
  parser.add_option("-y", "--history", dest="history", default='history',
                        help="name of table to story iteratoin history (default history)", metavar="HIST")
  parser.add_option("-n", "--noact", action="store_true", dest="noact", default=False,
                                          help="just print queries, don't execute over impala")
  parser.add_option("-s", "--step", dest="step", default=0.1, type="float",
                                          help="step size for SGD (default 0.1)")
  parser.add_option("-d", "--decay", dest="decay", default=0.95, type="float",
                                          help="step size decay (default 0.95)")
  parser.add_option("-u", "--mu", dest="mu", default=0, type="float",
                                          help="regularizer weight (defualt 0)")
  parser.add_option("-e", "--epochs", dest="epochs", default=1, type="int",
                                          help="number of epochs to run (default 1)")

  (options, args) = parser.parse_args()

  # we need at least 1 label and 1 feature to train
  if len(args) < 2:
    parser.print_usage()
    return

  if options.database is None:
    print 'use --db to specify a database to use.'
    return
  if options.table is None:
    print 'use --table to specify the data table.'
    return

  # list of queries will submit to impala
  qry = []

  mod_table = options.history
  dat_table = options.table
  step = options.step
  mu = options.mu
  label = args[0]

  # the query which creates an array out of the feature columns
  arr = 'toarray(%s)' % (', '.join(map(lambda f: '%s.%s' % (dat_table, f), args[1:])))

  qry.append(iutil.make_model_table(mod_table))

  # create all the queries
  for i in xrange(1, options.epochs+1):
    qry.append(logr_epoch(mod_table, dat_table, label, arr, i, step=step, mu=mu))
    step = step * options.decay

  # submit a query to to compute the loss
  # qry.append(logr_loss(mod_table, dat_table, label, arr, epoch=options.epochs))

  # print the query for reference
  for q in qry:
    print q

  # submit the queries to impala
  if not options.noact:
    iutil.impala_shell_exec(qry, database=options.database)