Esempio n. 1
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--input_dir', required=True)
  parser.add_argument('--output_dir', required=True)
  parser.add_argument('--overwrite', action='store_true')
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    tickers = fp.read().splitlines()
  logging.info('Processing %d tickers' % len(tickers))

  for i in range(len(tickers)):
    ticker = tickers[i]
    if ticker in SKIPPED_TICKERS:
      logging.warning('%d/%d: skipped %s' % (i+1, len(tickers), ticker))
      continue
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))

    input_path = '%s/%s.csv' % (args.input_dir, ticker)
    output_path = '%s/%s.csv' % (args.output_dir, ticker)
    if not path.isfile(input_path):
      logging.warning('Input file does not exist: %s' % input_path)
      continue
    if path.isfile(output_path) and not args.overwrite:
      logging.warning('Output file exists and not overwritable: %s'
          % output_path)
      continue
    parse(input_path, output_path)
Esempio n. 2
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--input_dir', required=True)
  parser.add_argument('--output_dir', required=True)
  parser.add_argument('--overwrite', action='store_true')
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  # Sanity check.
  assert args.input_dir != args.output_dir

  utils.setup_logging(args.verbose)

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    tickers = fp.read().splitlines()
  logging.info('Processing %d tickers' % len(tickers))

  for i in range(len(tickers)):
    ticker = tickers[i]
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))
    input_path = '%s/%s.csv' % (args.input_dir, ticker.replace('^', '_'))
    if not path.isfile(input_path):
      logging.warning('Input file is missing: %s' % input_path)
      continue
    output_path = '%s/%s.csv' % (args.output_dir, ticker.replace('^', '_'))
    if path.isfile(output_path) and not args.overwrite:
      logging.warning('Output file exists and not overwritable: %s'
                      % output_path)
      continue
    sample(input_path, output_path)
Esempio n. 3
0
def main():
    _mkdirs(SRCDIR, INSTALLDIR)
    setup_logging()
    fetch_and_build()
    for db in ('sqlite3', 'mysql'):
        shell('rm -rf {}/*'.format(INSTALLDIR))
        setup_and_test(db)
Esempio n. 4
0
    def __init__(self, name, port, pin, scale_factor, zero_point):
        logger = logging.getLogger('log')
        setup_logging(name)

        try:
            import RPi.GPIO as GPIO
        except ImportError:
            logger.critical('[Servo Socket]: GPIO not configured properly!')
            sys.exit(1)

        self.port = port
        self.pin = pin
        self.scale_factor = scale_factor
        self.zero_point = zero_point

        # Configure the servo
        GPIO.setmode(GPIO.BOARD)
        GPIO.setup(self.pin, GPIO.OUT)

        # Define the socket parameters
        HOST = ''
        PORT = self.port

        connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        connection.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

        # Bind socket to local host and port
        try:
            connection.bind((HOST, PORT))
        except socket.error, msg:
            logger.critical('[Servo Socket]: Bind failed. Error Code: ' + str(msg[0]) + ' Message ' \
                + msg[1])
            sys.exit()
Esempio n. 5
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--input_dir', required=True)
  parser.add_argument('--from_ticker', default='')
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    lines = fp.read().splitlines()
  tickers = []
  for line in lines:
    if line >= args.from_ticker:
      tickers.append(line)
  logging.info('Processing %d tickers' % len(tickers))

  for i in range(len(tickers)):
    ticker = tickers[i]
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))
    input_path = '%s/%s.csv' % (args.input_dir, ticker.replace('^', '_'))
    if not path.isfile(input_path):
      logging.warning('Input file does not exist: %s' % input_path)
      continue
    validate(input_path)
Esempio n. 6
0
def start_tracker():
    """Start the Torrent Tracker.
    """
    # parse commandline options
    parser = OptionParser()
    parser.add_option('-p', '--port', help='Tracker Port', default=0)
    parser.add_option('-b', '--background', action='store_true', default=False,
                      help='Start in background')
    parser.add_option('-d', '--debug', action='store_true', default=False,
                      help='Debug mode')
    (options, args) = parser.parse_args()

    # setup directories
    utils.create_pytt_dirs()
    # setup logging
    utils.setup_logging(options.debug)

    try:
        # start the torrent tracker
        run_app(int(options.port) or utils.get_config().getint('tracker',
                                                               'port'))
    except KeyboardInterrupt:
        logging.info('Tracker Stopped.')
        utils.close_db()
        sys.exit(0)
    except Exception, ex:
        logging.fatal('%s' % str(ex))
        utils.close_db()
        sys.exit(-1)
Esempio n. 7
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--total_assets_path', required=True)
  parser.add_argument('--intangible_assets_path', required=True)
  parser.add_argument('--total_liabilities_path', required=True)
  parser.add_argument('--prices_path', required=True)
  parser.add_argument('--outstanding_shares_path', required=True)
  parser.add_argument('--output_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  ta_map = utils.read_map(args.total_assets_path)
  tl_map = utils.read_map(args.total_liabilities_path)
  p_map = utils.read_map(args.prices_path)
  s_map = utils.read_map(args.outstanding_shares_path)
  tickers = ta_map.keys() & tl_map.keys() & p_map.keys() & s_map.keys()

  # intangible assets are 0 by default
  ia_map = dict()
  for t in tickers:
    ia_map[t] = 0.0
  ia_part = utils.read_map(args.intangible_assets_path)
  for k, v in ia_part.items():
    ia_map[k] = v

  with open(args.output_path, 'w') as fp:
    for ticker in sorted(tickers):
      output = ((ta_map[ticker] - ia_map[ticker] - tl_map[ticker])
                / s_map[ticker] / p_map[ticker])
      print('%s %f' % (ticker, output), file=fp)
Esempio n. 8
0
def run():
    """
    Main loop. Run this TA for ever
    """

    try:
        meta_configs, stanza_configs = conf.parse_modinput_configs(
            sys.stdin.read())
    except Exception as ex:
        _LOGGER.error("Failed to setup config for manager TA: %s", ex.message)
        _LOGGER.error(traceback.format_exc())
        raise

    if not stanza_configs:
        _LOGGER.info("No config, exiting...")
        return 0

    if stanza_configs:
        loglevel = stanza_configs[0].get("loglevel", "INFO")
        _LOGGER.info("Setup logging level=%s", loglevel)
        for log_file in all_logs:
            utils.setup_logging(log_file, loglevel, True)

    ta_manager = tm.TAManager(meta_configs, stanza_configs[0])
    _setup_signal_handler(ta_manager)
    ta_manager.run()
Esempio n. 9
0
    def __init__(self, name):
        logger = logging.getLogger('log')
        setup_logging()

        try:
            import smbus
        except ImportError:
            logger.critical('[Arduino Socket]: SMBUS not configured properly!')
            sys.exit(1)

        arduino_device = None  # Global arduino_device variable
        states = None

        # Define the socket parameters
        HOST = ''
        PORT = 7893

        connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        connection.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

        # Bind socket to local host and port
        try:
            connection.bind((HOST, PORT))
        except socket.error, msg:
            logger.critical('[Arduino Socket]: Bind failed. Error Code: ' + str(msg[0]) + ' Message ' \
                + msg[1])
            sys.exit()
Esempio n. 10
0
def main():
    _mkdirs(SRCDIR, INSTALLDIR)
    setup_logging()
    fetch_and_build()
    for db in ('sqlite3', 'mysql'):
        if db == 'mysql':
            shell('mysqladmin -u root password %s' % MYSQL_ROOT_PASSWD)
        for i in ('prompt', 'auto'):
            shell('rm -rf {}/*'.format(INSTALLDIR))
            setup_and_test(db, i)
Esempio n. 11
0
def setup():
    global copied, uploaded, last_scanned, warnings
    copied = open_shelf("copied.db")
    uploaded = open_shelf("uploaded.db")
    last_scanned = []

    log_path = os.path.join(PROJECT_PATH, "smugsync.log")
    utils.setup_logging(log_path)
    warnings = StringIO.StringIO()
    handler = logging.StreamHandler(warnings)
    handler.setLevel(logging.WARNING)
    logging.getLogger("").addHandler(handler)
Esempio n. 12
0
def start():
    """ Запуск планировщика """
    setup_logging(logging.DEBUG if settings.DEBUG is True else logging.INFO)
    queue = Queue()

    # Start scheduler subprocess
    Process(target=scheduler_process, args=(queue, os.getpid())).start()

    
    # To support Ctrl+C in debug mode
    if not settings.DEBUG:
        Thread(target=amqp_thread, args=(queue, )).start()
    else:
        Process(target=amqp_thread, args=(queue, )).start()
Esempio n. 13
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--price_dir', required=True)
  parser.add_argument('--yyyy_mm', required=True)
  parser.add_argument('--k', default='12')
  parser.add_argument('--output_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    tickers = fp.read().splitlines()
  logging.info('Processing %d tickers' % len(tickers))

  k = int(args.k)
  assert k > 0

  volume_map = dict()
  for i in range(len(tickers)):
    ticker = tickers[i]
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))
    input_path = '%s/%s.csv' % (args.price_dir, ticker.replace('^', '_'))
    if not path.isfile(input_path):
      logging.warning('Input file is missing: %s' % input_path)
      continue

    with open(input_path, 'r') as fp:
      lines = fp.read().splitlines()
    vmap = dict()
    assert len(lines) > 0
    for j in range(1, len(lines)):
      d, o, h, l, c, v, a = lines[j].split(',')
      d = d[:7]
      if args.yyyy_mm < d: continue
      if distance(args.yyyy_mm, d) >= k: break
      v = float(v) * float(a)
      if d in vmap: vmap[d] += v
      else: vmap[d] = v
    assert len(vmap) <= k
    if len(vmap) < k:  #max(1, k/2):
      logging.warning('Could not find enough data for %s' % ticker)
      continue
    volume_map[ticker] = sum(vmap.values()) / len(vmap)

  with open(args.output_path, 'w') as fp:
    for ticker in sorted(volume_map.keys()):
      print('%s %f' % (ticker, volume_map[ticker]), file=fp)
Esempio n. 14
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--price_sample_dir', required=True)
  parser.add_argument('--market_sample_path', required=True)
  parser.add_argument('--yyyy_mm', required=True)
  parser.add_argument('--k', required=True)
  parser.add_argument('--output_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)
  k = int(args.k)
  assert k > 0

  market_samples = read_samples(args.market_sample_path)
  curr_date = args.yyyy_mm
  prev_date = compute_date(curr_date, k)
  logging.info('current date = %s, previous date = %s' % (curr_date, prev_date))
  assert curr_date in market_samples
  assert prev_date in market_samples

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    tickers = fp.read().splitlines()
  logging.info('Processing %d tickers' % len(tickers))

  excess_map = dict()
  for i in range(len(tickers)):
    ticker = tickers[i]
    assert ticker.find('^') == -1  # ^GSPC should not be in tickers.
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))
    stock_sample_path = '%s/%s.csv' % (args.price_sample_dir, ticker)
    if not path.isfile(stock_sample_path):
      logging.warning('Input file does not exist: %s' % stock_sample_path)
      continue
    stock_samples = read_samples(stock_sample_path)
    if (curr_date not in stock_samples
        or prev_date not in stock_samples):
      logging.warning('Insufficient data for %s' % ticker)
      continue
    excess = compute_excess(
        stock_samples[prev_date], stock_samples[curr_date],
        market_samples[prev_date], market_samples[curr_date])
    excess_map[ticker] = excess
  with open(args.output_path, 'w') as fp:
    for ticker in sorted(excess_map.keys()):
      print('%s %f' % (ticker, excess_map[ticker]), file=fp)
Esempio n. 15
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--from_ticker', default='')
  parser.add_argument('--report_type', required=True)
  parser.add_argument('--input_dir', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  rt = args.report_type
  assert rt in TYPE_MAP, (
      'report_type must be one of %s' % TYPE_MAP.keys())
  (req_map, opt_map, add_map, skip_map) = TYPE_MAP[rt]

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    tickers = fp.read().splitlines()
  logging.info('Processing %d tickers' % len(tickers))

  total, opts, quarterly = 0, 0, 0
  common_keys = None
  for i in range(len(tickers)):
    ticker = tickers[i]
    if ticker < args.from_ticker or ticker in SKIPPED_TICKERS:
      logging.info('%d/%d: skipped %s' % (i+1, len(tickers), ticker))
      continue
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))

    input_path = '%s/%s.csv' % (args.input_dir, ticker)
    if not path.isfile(input_path):
      logging.warning('Input file does not exist: %s' % input_path)
      continue
    keys, has_opt, is_quarterly = validate(
        input_path, ticker, req_map, opt_map, add_map, skip_map)
    if common_keys is None:
      common_keys = keys
    else:
      common_keys &= keys
    if has_opt:
      opts += 1
    if is_quarterly:
      quarterly += 1
    total += 1
  logging.info('%d out of %d have optional metrics' % (opts, total))
  logging.info('%d out of %d are consecutive quarters' % (quarterly, total))
  logging.info('Common keys: %s' % common_keys)
Esempio n. 16
0
    def __init__(self, meta_configs, stanza_configs):
        """
        @meta_configs: a dict like object, implement dict.get/[] like
        interfaces to get the value for a key. meta_configs shall at least
        contain
        {"server_uri": uri, "checkpoint_dir": dir, "session_key": key}
        key/value pairs
        @stanza_configs: a list like object containing a list of dict
        like object. Each element shall implement dict.get/[] like interfaces
        to get the value for a key. Each element in the list shall at least
        contain
        """

        import timer_queue as tq
        import ta_configure_manager as conf_mgr
        import servers
        import ta_conf_client as tcc

        self.meta_configs = meta_configs
        appname = utils.get_appname_from_path(op.abspath(__file__))
        meta_configs["appname"] = appname
        self.wakeup_queue = Queue.Queue()
        self.conf_manager = conf_mgr.TAConfigureManager(meta_configs)
        self.timer_queue = tq.TimerQueue()
        self.pub_server = servers.PubServer(stanza_configs)
        self.rep_server = servers.RepServer(stanza_configs,
                                            self._handle_request)
        self.conf_client = tcc.TAConfClient(stanza_configs["repserver"],
                                            meta_configs["server_uri"],
                                            meta_configs["session_key"])
        self._state_logger = utils.setup_logging("ta_state")
        self._started = False
    def __init__(self, 
                 take_ownership=True, # Tor dies when the Crawler does
                 torrc_config={"CookieAuth": "1"},
                 tor_log="/var/log/tor/tor.log",
                 tor_cell_log="/var/log/tor/tor_cell_seq.log",
                 control_port=9051,
                 socks_port=9050, 
                 run_in_xvfb=True,
                 tbb_path=join("/opt","tbb","tor-browser_en-US"),
                 tb_log_path=join(_log_dir,"firefox.log"),
                 tb_tor_cfg=USE_RUNNING_TOR,
                 page_load_timeout=20,
                 wait_on_page=5,
                 wait_after_closing_circuits=0,
                 restart_on_sketchy_exception=True,
                 additional_control_fields={},
                 db_handler=None):

        self.logger = setup_logging(_log_dir, "crawler")

        self.torrc_config = torrc_config
        self.socks_port = find_free_port(socks_port, control_port)
        self.torrc_config.update({"SocksPort": str(self.socks_port)})
        self.control_port = find_free_port(control_port, self.socks_port)
        self.torrc_config.update({"ControlPort": str(self.control_port)})
        self.torrc_config.update({"Log": "INFO file {}".format(tor_log)})
        self.logger.info("Starting tor process with config "
                         "{torrc_config}.".format(**locals()))
        self.tor_process = launch_tor_with_config(config=self.torrc_config,
                                                  take_ownership=take_ownership)
        self.authenticate_to_tor_controlport()

        self.logger.info("Opening cell log stream...")
        self.cell_log = open(tor_cell_log, "rb")

        if run_in_xvfb:
            self.logger.info("Starting Xvfb...")
            self.run_in_xvfb = True
            self.virtual_framebuffer = start_xvfb()

        self.logger.info("Starting Tor Browser...")
        self.tb_driver = TorBrowserDriver(tbb_path=tbb_path,
                                          tor_cfg=tb_tor_cfg,
                                          tbb_logfile_path=tb_log_path,
                                          socks_port=self.socks_port,
                                          control_port=self.control_port)

        self.wait_after_closing_circuits = wait_after_closing_circuits
        self.page_load_timeout = page_load_timeout
        self.tb_driver.set_page_load_timeout(page_load_timeout)
        self.wait_on_page = wait_on_page
        self.restart_on_sketchy_exception = restart_on_sketchy_exception

        self.control_data = self.get_control_data(page_load_timeout,
                                                  wait_on_page,
                                                  wait_after_closing_circuits,
                                                  additional_control_fields)
        self.db_handler = db_handler
        if db_handler:
            self.crawlid = self.db_handler.add_crawl(self.control_data)
Esempio n. 18
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--report_type', required=True)
  parser.add_argument('--period', required=True)
  parser.add_argument('--output_dir', required=True)
  parser.add_argument('--overwrite', action='store_true')
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  rt = args.report_type
  assert rt == 'is' or rt == 'bs' or rt == 'cf', (
      'report_type must be one of "is", "bs" and "cf"')
  p = args.period
  assert p == '3' or p == '12', 'period must be "3" or "12"'

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    tickers = fp.read().splitlines()
  logging.info('Processing %d tickers' % len(tickers))

  sl, fl = [], []  # Lists of tickers succeeded/failed to download.
  for i in range(len(tickers)):
    ticker = tickers[i]
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))

    output_path = '%s/%s.csv' % (args.output_dir, ticker)
    dl = False
    if path.isfile(output_path):
      action = 'skipping'
      if args.overwrite:
        remove(output_path)
        action = 'overwriting'
        dl = True
      logging.warning('Output file exists: %s, %s' % (output_path, action))
    else: dl = True

    if dl:
      ok = download(ticker, rt, p, output_path)
      if ok: sl.append(ticker)
      else: fl.append(ticker)
  logging.info('Downloaded %d tickers, failed %d tickers'
               % (len(sl), len(fl)))
  logging.info('Downloaded tickers: %s' % sl)
  logging.info('Failed tickers: %s' % fl)
Esempio n. 19
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--er1_path', required=True)
  parser.add_argument('--er12_path', required=True)
  parser.add_argument('--tv2mc_path', required=True)
  parser.add_argument('--er2_path', required=True)
  parser.add_argument('--e2p_path', required=True)
  parser.add_argument('--roe_path', required=True)
  parser.add_argument('--b2p_path', required=True)
  parser.add_argument('--er6_path', required=True)
  parser.add_argument('--cf2p_path', required=True)
  parser.add_argument('--output_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  er1_map = utils.read_map(args.er1_path)
  er12_map = utils.read_map(args.er12_path)
  tv2mc_map = utils.read_map(args.tv2mc_path)
  er2_map = utils.read_map(args.er2_path)
  e2p_map = utils.read_map(args.e2p_path)
  roe_map = utils.read_map(args.roe_path)
  b2p_map = utils.read_map(args.b2p_path)
  er6_map = utils.read_map(args.er6_path)
  cf2p_map = utils.read_map(args.cf2p_path)
  tickers = (er1_map.keys() & er12_map.keys() & tv2mc_map.keys()
             & er2_map.keys() & e2p_map.keys() & roe_map.keys()
             & b2p_map.keys() & er6_map.keys() & cf2p_map.keys())
  logging.info('%d tickers' % len(tickers))
  logging.info('total weight: %f' %
      (ER1 + ER12 + TV2MC + ER2 + E2P + ROE + B2P + ER6 + CF2P))

  with open(args.output_path, 'w') as fp:
    for t in sorted(tickers):
      score = (er1_map[t] * ER1
               + er12_map[t] * ER12
               + tv2mc_map[t] * TV2MC
               + er2_map[t] * ER2
               + e2p_map[t] * E2P
               + roe_map[t] * ROE
               + b2p_map[t] * B2P
               + er6_map[t] * ER6
               + cf2p_map[t] * CF2P) / 100  # accounting for %
      print('%s %f' % (t, score), file=fp)
Esempio n. 20
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--net_income_path', required=True)
  parser.add_argument('--total_equity_path', required=True)
  parser.add_argument('--output_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  ni_map = utils.read_map(args.net_income_path)
  e_map = utils.read_map(args.total_equity_path)
  tickers = ni_map.keys() & e_map.keys()

  with open(args.output_path, 'w') as fp:
    for ticker in sorted(tickers):
      output = ni_map[ticker] / e_map[ticker]
      print('%s %f' % (ticker, output), file=fp)
Esempio n. 21
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--prices_path', required=True)
  parser.add_argument('--outstanding_shares_path', required=True)
  parser.add_argument('--output_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  p_map = utils.read_map(args.prices_path)
  s_map = utils.read_map(args.outstanding_shares_path)
  tickers = p_map.keys() & s_map.keys()

  with open(args.output_path, 'w') as fp:
    for ticker in sorted(tickers):
      output = p_map[ticker] * s_map[ticker]
      print('%s %f' % (ticker, output), file=fp)
Esempio n. 22
0
    def setup_logging(self, level='DEBUG'):
        """
        Sets up the generic logging of the worker

        :param level: level of the logging, INFO, DEBUG, WARN
        :return: no return
        """

        return utils.setup_logging(__file__, self.__class__.__name__)
Esempio n. 23
0
    def test_setup_logging(self):
        logname = "ta_frmk_unittest"
        logfile = utils.make_splunk_path(["var", "log", "splunk",
                                          "%s.log" % logname])
        try:
            os.remove(logfile)
        except OSError:
            pass
        logger = utils.setup_logging(logname, "DEBUG")
        logger.debug("ta_unittest_frmk_debug")
        logger.info("ta_unittest_frmk_info")
        logger.error("ta_unittest_frmk_error")
        utils.setup_logging(logname, "INFO", True)
        logger.debug("ta_unittest_frmk_debug")
        logger.info("ta_unittest_frmk_info")
        logger.error("ta_unittest_frmk_error")
        utils.setup_logging(logname, "ERROR", True)
        logger.debug("ta_unittest_frmk_debug")
        logger.info("ta_unittest_frmk_info")
        logger.error("ta_unittest_frmk_error")

        with open(logfile) as f:
            logs = f.readlines()

        self.assertEqual(len(logs), 6)

        m = re.search(r"DEBUG\s+\d+\s+-\s+ta_unittest_frmk_debug$", logs[0])
        self.assertIsNotNone(m)

        m = re.search(r"INFO\s+\d+\s+-\s+ta_unittest_frmk_info$", logs[1])
        self.assertIsNotNone(m)

        m = re.search(r"ERROR\s+\d+\s+-\s+ta_unittest_frmk_error$", logs[2])
        self.assertIsNotNone(m)

        m = re.search(r"INFO\s+\d+\s+-\s+ta_unittest_frmk_info$", logs[3])
        self.assertIsNotNone(m)

        m = re.search(r"ERROR\s+\d+\s+-\s+ta_unittest_frmk_error$", logs[4])
        self.assertIsNotNone(m)

        m = re.search(r"ERROR\s+\d+\s+-\s+ta_unittest_frmk_error$", logs[5])
        self.assertIsNotNone(m)
Esempio n. 24
0
 def __init__(self, repserver_ip_port, splunkd_uri, session_key):
     self._req_client = cb.ReqClient(repserver_ip_port)
     self._splunkd_uri = splunkd_uri
     self._session_key = session_key
     self._conf_thr = threading.Thread(target=self._monitor_and_generate)
     self._shutdown_q = Queue.Queue()
     self._tasks_need_resent = {}
     self._heartbeat_logger = utils.setup_logging("ta_heartbeat")
     self._started = False
     self._stopped = False
Esempio n. 25
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--input_path', required=True)
  parser.add_argument('--output_data_path', required=True)
  parser.add_argument('--output_index_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  with open(args.input_path, 'r') as fp:
    lines = fp.read().splitlines()

  # This block below is to keep the output data in sync with the ones
  # produced by split_data_for_cv.py.  I.e. the date and ticker of each
  # input line are swapped (such that date goes before ticker), and the
  # lines are sorted (by date and then by ticker).

  # Swap date and ticker in place.
  item_count = -1
  for i in range(len(lines)):
    items = lines[i].split(' ')
    if item_count < 0: item_count = len(items)
    else: assert item_count == len(items)
    items[0], items[1] = items[1], items[0]
    lines[i] = ' '.join(items)
  # This will sort lines by entry and then ticker.
  lines.sort()

  data_fp = open(args.output_data_path, 'w')
  index_fp = open(args.output_index_path, 'w')
  for line in lines:
    items = line.split(' ')
    assert len(items) > 3
    data = '%s %s' % (utils.make_label(float(items[2]), False),
                      ' '.join(items[3:]))
    index = ' '.join(items[:2])
    print(data, file=data_fp)
    print(index, file=index_fp)

  data_fp.close()
  index_fp.close()
Esempio n. 26
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--scores_path', required=True)
  parser.add_argument('--prices_path', required=True)
  parser.add_argument('--mc_path', required=True)
  parser.add_argument('--output_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  s_map = utils.read_map(args.scores_path)
  p_map = utils.read_map(args.prices_path)
  mc_map = utils.read_map(args.mc_path)
  tickers = s_map.keys() & p_map.keys() & mc_map.keys()

  with open(args.output_path, 'w') as fp:
    for ticker in sorted(tickers):
      if p_map[ticker] < MIN_PRICE: continue
      if mc_map[ticker] < MIN_MC: continue
      print('%s %f' % (ticker, s_map[ticker]), file=fp)
Esempio n. 27
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--output_dir', required=True)
  parser.add_argument('--overwrite', action='store_true')
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    tickers = fp.read().splitlines()
  logging.info('Processing %d tickers' % len(tickers))

  sl, fl = [], []  # Lists of tickers succeeded/failed to download.
  for i in range(len(tickers)):
    ticker = tickers[i]
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))

    output_path = '%s/%s.csv' % (args.output_dir, ticker.replace('^', '_'))
    dl = False
    if path.isfile(output_path):
      action = 'skipping'
      if args.overwrite:
        remove(output_path)
        action = 'overwriting'
        dl = True
      logging.warning('Output file exists: %s, %s' % (output_path, action))
    else: dl = True

    if dl:
      ok = download(ticker, output_path)
      if ok: sl.append(ticker)
      else: fl.append(ticker)
  logging.info('Downloaded %d tickers, failed %d tickers'
               % (len(sl), len(fl)))
  logging.info('Downloaded tickers: %s' % sl)
  logging.info('Failed tickers: %s' % fl)
Esempio n. 28
0
File: main.py Progetto: forter/boten
def main(conf_file):
    utils.setup_logging(False)
    logger = logging.getLogger("boten")
    config = boten.core.get_config(init=conf_file)
    sqs_conn = sqs.connect_to_region(config['config']['aws_region'])

    queue = sqs_conn.get_queue(config['config']['queue_name'])
    bots = init_bots()
    logger.info('bots loaded [{}]'.format(",".join(bots.keys())))
    while True:
        logger.info('polling for new job')
        with utils.poll_sqs(queue) as payload:
            logger.info('Got new job')
            bot_name = payload['command'][1:]
            if payload['token'] != config[bot_name]['slack_token']:
                logger.warning('Got unauthorized slack command')
                logger.warning(payload)
                continue
            payload['subcommand'] = payload['text'].partition(' ')[0]
            payload['args'] = payload['text'].partition(' ')[2]
            p = multiprocessing.Process(target=run_payload, args=(bots[bot_name], payload, logger))
            p.start()
Esempio n. 29
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--sample_dir', required=True)
  parser.add_argument('--market_sample_path', required=True)
  parser.add_argument('--output_dir', required=True)
  parser.add_argument('--er_months', default=ER_MONTHS)
  parser.add_argument('--ev_months', default=EV_MONTHS)
  parser.add_argument('--overwrite', action='store_true')
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  market_samples = utils.read_samples(args.market_sample_path)
  er_months = [int(m) for m in args.er_months.split(',')]
  ev_months = [int(m) for m in args.ev_months.split(',')]

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    tickers = fp.read().splitlines()
  logging.info('Processing %d tickers' % len(tickers))

  for i in range(len(tickers)):
    ticker = tickers[i]
    assert ticker.find('^') == -1  # ^GSPC should not be in tickers.
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))
    stock_sample_path = '%s/%s.csv' % (args.sample_dir, ticker)
    if not path.isfile(stock_sample_path):
      logging.warning('Input file does not exist: %s' % stock_sample_path)
      continue
    # The output format is no longer csv.  Use txt instead.
    output_path = '%s/%s.txt' % (args.output_dir, ticker)
    if path.isfile(output_path) and not args.overwrite:
      logging.warning('Output file exists: %s, skipping' % output_path)
      continue
    stock_samples = utils.read_samples(stock_sample_path)
    compute_features(stock_samples, market_samples, er_months, ev_months,
                     output_path)
Esempio n. 30
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--ticker_file', required=True)
  parser.add_argument('--price_sample_dir', required=True)
  parser.add_argument('--yyyy_mm', required=True)
  parser.add_argument('--output_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  # Tickers are listed one per line.
  with open(args.ticker_file, 'r') as fp:
    tickers = fp.read().splitlines()
  logging.info('Processing %d tickers' % len(tickers))

  price_map = dict()
  for i in range(len(tickers)):
    ticker = tickers[i]
    logging.info('%d/%d: %s' % (i+1, len(tickers), ticker))
    input_path = '%s/%s.csv' % (args.price_sample_dir, ticker.replace('^', '_'))
    if not path.isfile(input_path):
      logging.warning('Input file is missing: %s' % input_path)
      continue
    with open(input_path, 'r') as fp:
      lines = fp.read().splitlines()
    found = False
    for line in lines:
      if line.startswith(args.yyyy_mm):
        d, v, p = line.split(' ')
        price_map[ticker] = float(p)
        found = True
        break
    if not found:
      logging.warning('Could not find current price data for %s' % ticker)
  with open(args.output_path, 'w') as fp:
    for ticker in sorted(price_map.keys()):
      print('%s %.2f' % (ticker, price_map[ticker]), file=fp)
Esempio n. 31
0
def main(args):
    "Put all the pieces together"
    if args.dump_per_instance_results:
        args.dump = True
    if args.dump:
        args.disable_tqdm = True
        if len(args.logfile.name) == 0:
            basename_fusion = [
                str(i.with_suffix('').with_name(i.stem)) for i in args.snapshot
            ]
            args.logfile = Path('-'.join(basename_fusion) + '_corpus-eval')
        if args.logfile.exists():
            raise ValueError(
                f'{args.logfile} already exists. Please provide a logfile or'
                'backup existing results.')
    setup_logging(args)

    logging.info('Corpus Retrieval Evaluation for CAL/MCN')
    logging.info(f'Git revision hash: {get_git_revision_hash()}')
    load_hyperparameters(args)
    logging.info(args)

    engine_prm = {}
    if args.arch == 'MCN':
        args.dataset = 'UntrimmedMCN'
        args.engine = 'MomentRetrievalFromProposalsTable'
    elif args.arch == 'SMCN':
        args.dataset = 'UntrimmedSMCN'
        args.engine = 'MomentRetrievalFromClipBasedProposalsTable'
    else:
        ValueError('Unknown/unsupported architecture')

    logging.info('Loading dataset')
    dataset_novisual = True
    dataset_cues = {feat: None for feat in args.tags}
    if args.h5_path:
        for i, key in enumerate(args.tags):
            dataset_cues[key] = {'file': args.h5_path[i]}
        dataset_novisual = False
        clip_length = None
    else:
        clip_length = args.clip_length
    proposals_interface = proposals.__dict__[args.proposal_interface](
        args.min_length, args.scales, args.stride)
    dataset_setup = dict(json_file=args.test_list,
                         cues=dataset_cues,
                         loc=args.loc,
                         context=args.context,
                         debug=args.debug,
                         eval=True,
                         no_visual=dataset_novisual,
                         proposals_interface=proposals_interface,
                         clip_length=clip_length)
    dataset = dataset_untrimmed.__dict__[args.dataset](**dataset_setup)
    if args.arch == 'SMCN':
        logging.info('Set padding on UntrimmedSMCN dataset')
        dataset.set_padding(False)

    logging.info('Setting up models')
    models_dict = {}
    for i, key in enumerate(args.snapshot_tags):
        arch_setup = dict(
            visual_size=dataset.visual_size[key],
            lang_size=dataset.language_size,
            max_length=dataset.max_words,
            embedding_size=args.embedding_size,
            visual_hidden=args.visual_hidden,
            lang_hidden=args.lang_hidden,
            visual_layers=args.visual_layers,
        )
        models_dict[key] = model.__dict__[args.arch](**arch_setup)
        filename = args.snapshot[i].with_suffix('.pth.tar')
        snapshot_ = torch.load(filename,
                               map_location=lambda storage, loc: storage)
        models_dict[key].load_state_dict(snapshot_['state_dict'])
        models_dict[key].eval()

    logging.info('Creating database alas indexing corpus')
    engine = corpus.__dict__[args.engine](dataset, models_dict, **engine_prm)
    engine.indexing()

    logging.info('Launch evaluation...')
    # log-scale up to the end of the database
    if len(args.topk) == 1 and args.topk[0] == 0:
        exp = int(np.floor(np.log10(engine.num_moments)))
        args.topk = [10**i for i in range(0, exp + 1)]
        args.topk.append(engine.num_moments)
    num_instances_retrieved = []
    judge = CorpusVideoMomentRetrievalEval(topk=args.topk)
    args.n_display = max(int(args.n_display * len(dataset.metadata)), 1)
    for it, query_metadata in tqdm(enumerate(dataset.metadata),
                                   disable=args.disable_tqdm):
        result_per_query = engine.query(
            query_metadata['language_input'],
            return_indices=args.dump_per_instance_results)
        if args.dump_per_instance_results:
            vid_indices, segments, proposals_ind = result_per_query
        else:
            vid_indices, segments = result_per_query
        judge.add_single_predicted_moment_info(query_metadata,
                                               vid_indices,
                                               segments,
                                               max_rank=engine.num_moments)
        num_instances_retrieved.append(len(vid_indices))
        if args.disable_tqdm and (it + 1) % args.n_display == 0:
            logging.info(f'Processed queries [{it}/{len(dataset.metadata)}]')

        if args.dump_per_instance_results:
            # TODO: wrap-up this inside a class. We could even dump in a
            # non-blocking thread using a Queue
            if it == 0:
                filename = args.logfile.with_suffix('.h5')
                fid = h5py.File(filename, 'x')
                if args.reduced_dump:
                    fid_vi = fid.create_dataset(name='vid_indices',
                                                chunks=True,
                                                shape=(len(dataset),
                                                       dataset.num_videos),
                                                dtype='int64')
                else:
                    fid.create_dataset(name='proposals',
                                       data=engine.proposals,
                                       chunks=True)
                    fid_vi = fid.create_dataset(name='vid_indices',
                                                chunks=True,
                                                shape=(len(dataset), ) +
                                                vid_indices.shape,
                                                dtype='int64')
                    fid_pi = fid.create_dataset(name='proposals_ind',
                                                chunks=True,
                                                shape=(len(dataset), ) +
                                                proposals_ind.shape,
                                                dtype='int64')

            if args.reduced_dump:
                fid_vi[it, ...] = pd.unique(vid_indices.numpy())
            else:
                fid_vi[it, ...] = vid_indices
                fid_pi[it, ...] = proposals_ind

    if args.dump_per_instance_results:
        fid.close()

    logging.info('Summarizing results')
    num_instances_retrieved = np.array(num_instances_retrieved)
    logging.info(f'Number of queries: {len(judge.map_query)}')
    logging.info(f'Number of proposals: {engine.num_moments}')
    retrieved_proposals_median = int(np.median(num_instances_retrieved))
    retrieved_proposals_min = int(num_instances_retrieved.min())
    if (num_instances_retrieved != engine.num_moments).any():
        logging.info('Triggered approximate search')
        logging.info('Median numbers of retrieved proposals: '
                     f'{retrieved_proposals_median:d}')
        logging.info('Min numbers of retrieved proposals: '
                     f'{retrieved_proposals_min:d}')
    result = judge.evaluate()
    _ = [logging.info(f'{k}: {v}') for k, v in result.items()]
    if args.dump:
        filename = args.logfile.with_suffix('.json')
        logging.info(f'Dumping results into: {filename}')
        with open(filename, 'x') as fid:
            for key, value in result.items():
                result[key] = float(value)
            result['snapshot'] = [str(i) for i in args.snapshot]
            result['corpus'] = str(args.test_list)
            result['topk'] = args.topk
            result['iou_threshold'] = judge.iou_thresholds
            result['median_proposals_retrieved'] = retrieved_proposals_median
            result['min_proposals_retrieved'] = retrieved_proposals_min
            result['date'] = datetime.now().isoformat()
            result['git_hash'] = get_git_revision_hash()
            json.dump(result, fid, indent=1)
Esempio n. 32
0
def run(local_rank: int, config: Any, *args: Any, **kwargs: Any):
    """function to be run by idist.Parallel context manager."""

    # ----------------------
    # make a certain seed
    # ----------------------
    rank = idist.get_rank()
    manual_seed(config.seed + rank)

    # -----------------------
    # create output folder
    # -----------------------

    if rank == 0:
        now = datetime.now().strftime("%Y%m%d-%H%M%S")
        name = f"{config.dataset}-backend-{idist.backend()}-{now}"
        path = Path(config.output_dir, name)
        path.mkdir(parents=True, exist_ok=True)
        config.output_dir = path.as_posix()

    config.output_dir = Path(idist.broadcast(config.output_dir, src=0))

    # -----------------------------
    # datasets and dataloaders
    # -----------------------------

    train_dataset, num_channels = get_datasets(config.dataset, config.data_path)

    train_dataloader = idist.auto_dataloader(
        train_dataset,
        batch_size=config.batch_size,
        num_workers=config.num_workers,
        {% if use_distributed_training and not use_distributed_launcher %}
        persistent_workers=True,
        {% endif %}
    )

    # ------------------------------------------
    # model, optimizer, loss function, device
    # ------------------------------------------

    device = idist.device()
    netD, netG, optimizerD, optimizerG, loss_fn, lr_scheduler = initialize(config, num_channels)

    # -----------------------------
    # trainer and evaluator
    # -----------------------------
    ws = idist.get_world_size()
    real_labels = torch.ones(config.batch_size // ws, device=device)
    fake_labels = torch.zeros(config.batch_size // ws, device=device)
    fixed_noise = torch.randn(config.batch_size // ws, config.z_dim, 1, 1, device=device)

    trainer = create_trainers(
        config=config,
        netD=netD,
        netG=netG,
        optimizerD=optimizerD,
        optimizerG=optimizerG,
        loss_fn=loss_fn,
        device=device,
        real_labels=real_labels,
        fake_labels=fake_labels,
    )

    # -------------------------------------------
    # setup engines logger with python logging
    # print training configurations
    # -------------------------------------------

    logger = setup_logging(config)
    log_basic_info(logger, config)
    trainer.logger = logger

    # -------------------------------------
    # ignite handlers and ignite loggers
    # -------------------------------------

    to_save = {'netD': netD, 'netG': netG, 'optimizerD': optimizerD, 'optimizerG': optimizerG, 'trainer': trainer}
    optimizers = {'optimizerD': optimizerD, 'optimizerG': optimizerG}
    best_model_handler, es_handler, timer_handler = get_handlers(
        config=config,
        model={'netD', netD, 'netG', netG},
        trainer=trainer,
        evaluator=trainer,
        metric_name='errD',
        es_metric_name='errD',
        to_save=to_save,
        lr_scheduler=lr_scheduler,
        output_names=["errD", "errG", "D_x", "D_G_z1", "D_G_z2"],
    )

    # setup ignite logger only on rank 0
    if rank == 0:
        logger_handler = get_logger(config=config, trainer=trainer, optimizers=optimizers)

    # -----------------------------------
    # resume from the saved checkpoints
    # -----------------------------------

    if config.resume_from:
        resume_from(to_load=to_save, checkpoint_fp=config.resume_from)

    # --------------------------------------------------
    # adding handlers using `trainer.on` decorator API
    # --------------------------------------------------

    @trainer.on(Events.EPOCH_COMPLETED)
    def save_fake_example(engine):
        fake = netG(fixed_noise)
        path = config.output_dir / (FAKE_IMG_FNAME.format(engine.state.epoch))
        vutils.save_image(fake.detach(), path, normalize=True)

    # --------------------------------------------------
    # adding handlers using `trainer.on` decorator API
    # --------------------------------------------------
    @trainer.on(Events.EPOCH_COMPLETED)
    def save_real_example(engine):
        img, y = engine.state.batch
        path = config.output_dir / (REAL_IMG_FNAME.format(engine.state.epoch))
        vutils.save_image(img, path, normalize=True)

    # -------------------------------------------------------------
    # adding handlers using `trainer.on` decorator API
    # -------------------------------------------------------------
    @trainer.on(Events.EPOCH_COMPLETED)
    def print_times(engine):
        if not timer_handler:
            logger.info(f"Epoch {engine.state.epoch} done. Time per batch: {timer_handler.value():.3f}[s]")
            timer_handler.reset()

    @trainer.on(Events.ITERATION_COMPLETED(every=config.log_every_iters))
    @idist.one_rank_only()
    def print_logs(engine):
        fname = config.output_dir / LOGS_FNAME
        columns = ["iteration", ] + list(engine.state.metrics.keys())
        values = [str(engine.state.iteration), ] + [str(round(value, 5)) for value in engine.state.metrics.values()]

        with open(fname, "a") as f:
            if f.tell() == 0:
                print("\t".join(columns), file=f)
            print("\t".join(values), file=f)
        message = f"[{engine.state.epoch}/{config.max_epochs}][{engine.state.iteration % len(train_dataloader)}/{len(train_dataloader)}]"
        for name, value in zip(columns, values):
            message += f" | {name}: {value}"

    # -------------------------------------------------------------
    # adding handlers using `trainer.on` decorator API
    # -------------------------------------------------------------
    @trainer.on(Events.EPOCH_COMPLETED)
    def create_plots(engine):
        try:
            import matplotlib as mpl

            mpl.use("agg")

            import matplotlib.pyplot as plt
            import pandas as pd

        except ImportError:
            warnings.warn("Loss plots will not be generated -- pandas or matplotlib not found")

        else:
            df = pd.read_csv(config.output_dir / LOGS_FNAME, delimiter="\t", index_col="iteration")
            _ = df.plot(subplots=True, figsize=(20, 20))
            _ = plt.xlabel("Iteration number")
            fig = plt.gcf()
            path = config.output_dir / PLOT_FNAME

            fig.savefig(path)

    # --------------------------------
    # print metrics to the stderr
    # with `add_event_handler` API
    # for training stats
    # --------------------------------

    trainer.add_event_handler(Events.ITERATION_COMPLETED(every=config.log_every_iters), log_metrics, tag="train")

    # ------------------------------------------
    # setup if done. let's run the training
    # ------------------------------------------

    trainer.run(train_dataloader, max_epochs=config.max_epochs, epoch_length=config.train_epoch_length)

    # ------------------------------------------------------------
    # close the logger after the training completed / terminated
    # ------------------------------------------------------------

    if rank == 0:
        from ignite.contrib.handlers.wandb_logger import WandBLogger

        if isinstance(logger_handler, WandBLogger):
            # why handle differently for wandb ?
            # See : https://github.com/pytorch/ignite/issues/1894
            logger_handler.finish()
        elif logger_handler:
            logger_handler.close()

    # -----------------------------------------
    # where is my best and last checkpoint ?
    # -----------------------------------------

    if best_model_handler is not None:
        logger.info("Last and best checkpoint: %s", best_model_handler.last_checkpoint)
Esempio n. 33
0
    ])

    valid_transform = transforms.Compose([
        transforms.CenterCrop(args.crop_size),
        # transforms.RandomHorizontalFlip(), # do we need to flip when eval?
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    train_dataloader = get_loader(opt, mode='train', transform=train_transform)
    valid_dataloader = get_loader(opt, mode='val', transform=valid_transform)

    print('load the dataset into memory...')
    print(
        'total iterations in training phase : {} \ntotal iterations in validation phase : {}'
        .format(len(train_dataloader), len(valid_dataloader)))

    trainer = Trainer(opt, train_dataloader, valid_dataloader)
    trainer.train()
    print('done')


if __name__ == "__main__":
    args = parse_opt()

    setup_logging(os.path.join('log.txt'))
    logging.info("\nrun arguments: %s",
                 json.dumps(vars(args), indent=4, sort_keys=True))

    main(args)
    print('done')
Esempio n. 34
0
def main():
    utils.setup_logging()
    logging.info('Script Started')
    logging.debug('args: ' + str(sys.argv))
    # if the environmental variables are not present then we are making an assumption here that the script is running
    # locally not in Open shift
    if not set(('COURSE_ID', 'CATEGORY_NAME')).issubset(environ):
        if len(sys.argv) is not 3:
            logging.error(
                "If running script in OpenShift you may misspelled the Environmental variables COURSE_ID/CATEGORY_NAME "
                "OR when running script locally command line arguments (path to properties files) are missing should be"
                " 'python groupsforsections.py /config.yaml /security.yaml'")
            sys.exit(1)
        config_file = sys.argv[1]
        security_file = sys.argv[2]

        logging.debug('reading the file %s ' % basename(security_file))

        with open(security_file, 'r') as yml_file:
            sf = yaml.load(yml_file)

        if not sf or CONST_CANVAS not in sf:
            logging.error('The key \'canvas\' is missing ')
            sys.exit(1)

        logging.debug('reading the file %s ' % basename(config_file))

        with open(config_file, 'r') as yml_file:
            cfg = yaml.load(yml_file)

        if not cfg or CONST_COURSE not in cfg:
            logging.error('The key \'course\' is missing ')
            sys.exit(1)

        if not sf[CONST_CANVAS] or CONST_TOKEN not in sf[CONST_CANVAS] or CONST_URL not in sf[CONST_CANVAS] or \
                not cfg[CONST_COURSE] or CONST_ID not in cfg[CONST_COURSE] or CONST_GRP_CAT_NAME not in cfg[CONST_COURSE]:
            logging.error(
                "Some of the keys are missing from the properties files %s:  %s , %s:   %s"
                % (basename(security_file), '"canvas keys missing" '
                   if sf[CONST_CANVAS] is None else sf[CONST_CANVAS].keys(),
                   basename(config_file), '"course keys missing"'
                   if cfg[CONST_COURSE] is None else cfg[CONST_COURSE].keys()))
            sys.exit(1)

        course_id = cfg[CONST_COURSE][CONST_ID]
        group_category_name = cfg[CONST_COURSE][CONST_GRP_CAT_NAME]
        canvas_token = sf[CONST_CANVAS][CONST_TOKEN]
        canvas_url = sf[CONST_CANVAS][CONST_URL]
    else:
        course_id = environ['COURSE_ID']
        group_category_name = environ['CATEGORY_NAME']
        with open("/usr/local/secret-volume/canvas-url", 'r') as url:
            canvas_url = url.read()
        with open("/usr/local/secret-volume/canvas-token", 'r') as token:
            canvas_token = token.read()

    if not course_id or not group_category_name or not canvas_token or not canvas_url:
        logging.error(
            "some of the configurations from properties file are missing: "
            "course_id = " + str(course_id) + " ; group_category_name = " +
            str(group_category_name) + " ; canvas_url = " + str(canvas_url) +
            " ;  canvas_token = " +
            (str(canvas_token) if canvas_token is None else "Not_Shown"))
        sys.exit(1)

    logging.debug('Canvas Token: ' + canvas_token)
    logging.info('Canvas URL: ' + canvas_url)
    logging.info('Course Id: ' + course_id)
    logging.info('Group Category Name: ' + group_category_name)

    # instantiating the class
    groups_for_section_class = GroupsForSections(canvas_token, canvas_url)
    # this hold the list of users that needs to be added to a group, group => users
    groups_to_users_dict = {}

    group_category_id = create_group_category(group_category_name,
                                              groups_for_section_class,
                                              course_id)

    if group_category_id is None:
        logging.error('Group category "%s" is not created for course %s ' %
                      (group_category_name, course_id))
        sys.exit(1)

    sections = get_sections_for_course({}, groups_for_section_class, course_id)

    if sections is None or not sections:
        logging.error(
            'No sections in the course or error in getting sections for the course: '
            + course_id)
        sys.exit(1)

    logging.info(
        'Total # of sections that are in course %s are %d and are %s ' %
        (course_id, len(sections), sections.keys()))
    for section_id in sections:
        users = get_users_in_section(groups_for_section_class, [],
                                     str(section_id))

        if users is None:
            logging.error('Could not get users in section %s(%s): ' %
                          (section_id, sections[section_id]))
            sys.exit(1)

        logging.info('section %s (%s) has %s users : ' %
                     (section_id, sections[section_id], str(len(users))))

        # creating one group for each section in course.
        group_id = create_group(groups_for_section_class,
                                str(group_category_id), sections[section_id],
                                course_id)

        if group_id is None:
            logging.error('Could not create group for section %s(%s): ' %
                          (section_id, sections[section_id]))
            sys.exit(1)

        logging.info(
            'The Group id %s created for the Section %s with name %s' %
            (str(group_id), section_id, sections[section_id]))

        # mapping all the users in a sections to corresponding group
        groups_to_users_dict[group_id] = users

    failed_groups_to_users_dict = defaultdict(list)
    success_groups_to_users_dict = defaultdict(list)

    # adding users to the group
    for group, users in groups_to_users_dict.items():
        for user in users:
            membership_id = add_users_to_group(groups_for_section_class, group,
                                               user)
            if membership_id is None:
                logging.error('The user %s is not added to the group %s' %
                              (user, group))
                failed_groups_to_users_dict[group].append(user)
            else:
                success_groups_to_users_dict[group].append(user)
                logging.info(
                    'The User %s got added to the Group %s  with membership id  %s '
                    % (user, group, str(membership_id)))

    # logging total users that belongs to corresponding group
    logging.info("**** Total Users List in a Group set: ")
    for group in groups_to_users_dict:
        logging.info('%d users should be added to the group %s' %
                     (len(groups_to_users_dict[group]), group))

    # logging the total successful users added to the each group
    if success_groups_to_users_dict:
        logging.info("**** Successful Addition of Users to Groups: ")
        for group in success_groups_to_users_dict:
            logging.info('%d users successfully added to the group %s' %
                         (len(success_groups_to_users_dict[group]), group))

    # logging the users list that was not added to a group
    if failed_groups_to_users_dict:
        logging.error("**** Failed Addition of Users to Groups: ")
        for group in failed_groups_to_users_dict:
            users = ','.join(failed_groups_to_users_dict[group])
            logging.info(
                '%d users are not added in the group %s and they are %s ' %
                (len(failed_groups_to_users_dict[group]), group, users))

    logging.info('script ran successfully')
Esempio n. 35
0
import os
import sys
import antlr3
import inspect, importlib

from PyFuncLexer import PyFuncLexer
from PyFuncParser import PyFuncParser

from splunk import Intersplunk as si

import utils

logger = utils.setup_logging("pyfunc")


def parse_func(pfunc):
    char_stream = antlr3.ANTLRStringStream(pfunc)
    lexer = PyFuncLexer(char_stream)
    tokens = antlr3.CommonTokenStream(lexer)
    tokens.fillBuffer()
    parser = PyFuncParser(tokens)
    return parser.pyfunc()


def find_func(func):
    pkg = '.'.join(func.packages)
    module = importlib.import_module(pkg)
    members = inspect.getmembers(module)
    flist = [f for n, f in members if inspect.isfunction(f) and n == func.name]

    if len(flist) >= 1: return flist[0]
def run_experiment(args):
    import os
    # set environment variables for tensorflow
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    import inspect
    import shutil
    import numpy as np
    import tensorflow as tf

    from collections import OrderedDict
    import matplotlib.pyplot as plt
    plt.switch_backend('Agg')

    import utils
    import paramgraphics
    import nn
    from tensorflow.contrib.framework.python.ops import arg_scope
    # import tensorflow.contrib.layers as layers

    # ----------------------------------------------------------------
    # Arguments and Settings
    args.message = 'LBT-GAN-celebA_' + args.message
    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    # copy file for reproducibility
    logger, dirname = utils.setup_logging(args)
    script_fn = inspect.getfile(inspect.currentframe())
    script_src = os.path.abspath(script_fn)
    script_dst = os.path.abspath(os.path.join(dirname, script_fn))
    shutil.copyfile(script_src, script_dst)
    logger.info("script copied from %s to %s" % (script_src, script_dst))

    # print arguments
    for k, v in sorted(vars(args).items()):
        logger.info("  %20s: %s" % (k, v))

    # get arguments
    batch_size = args.batch_size
    batch_size_est = args.batch_size_est
    gen_lr = args.gen_lr
    dis_lr = args.dis_lr
    est_lr = args.est_lr
    lambda_gan = args.lambda_gan
    beta1 = 0.5
    epsilon = 1e-8
    max_iter = args.max_iter
    viz_every = args.viz_every
    z_dim, vae_z_dim = utils.get_ints(args.z_dims)
    unrolling_steps = args.unrolling_steps
    assert unrolling_steps > 0
    n_viz = args.n_viz

    # ----------------------------------------------------------------
    # Dataset
    from dataset import load_celebA, DataSet
    train_x, test_x = load_celebA()
    train_x = train_x * 2. - 1.
    test_x = test_x * 2. - 1.

    dtrain = DataSet(train_x, None)
    dtest = DataSet(test_x, None)

    # data_channel = 3
    x_dim = 64 * 64 * 3
    dim_input = (64, 64)

    # ----------------------------------------------------------------
    # Model setup
    logger.info("Setting up model ...")

    def discriminator(x, Reuse=tf.AUTO_REUSE, is_training=True):

        def leaky_relu(x, alpha=0.2):
            return tf.maximum(alpha * x, x)

        with tf.variable_scope("discriminator", reuse=Reuse):

            x = tf.reshape(x, [batch_size, 64, 64, 3])
            lx = tf.layers.dropout(x, 0.2, training=is_training)

            conv1 = tf.layers.conv2d(
                lx, 64, 5, 2, use_bias=True, padding='same')
            conv1 = leaky_relu(conv1)

            conv2 = tf.layers.conv2d(
                conv1, 128, 5, 2, use_bias=False, padding='same')
            conv2 = tf.layers.batch_normalization(conv2, training=is_training)
            conv2 = leaky_relu(conv2)

            conv3 = tf.layers.conv2d(
                conv2, 256, 5, 2, use_bias=False, padding='same')
            conv3 = tf.layers.batch_normalization(conv3, training=is_training)
            conv3 = leaky_relu(conv3)

            conv4 = tf.layers.conv2d(
                conv3, 512, 5, 2, use_bias=False, padding='same')
            conv4 = tf.layers.batch_normalization(conv4, training=is_training)
            conv4 = leaky_relu(conv4)
            conv4 = tf.layers.flatten(conv4)

            fc2 = tf.layers.dense(conv4, 1)
            return fc2

    def generator(z, Reuse=tf.AUTO_REUSE, flatten=True, is_training=True):
        if args.g_nonlin == 'relu':
            # print("Use Relu in G")
            nonlin = tf.nn.relu
        else:
            # print("Use tanh in G")
            nonlin = tf.nn.tanh
        # nonlin = tf.nn.relu if args.g_nonlin == 'relu' else tf.nn.tanh

        # norm_prms = {'is_training': is_training, 'decay': 0.9, 'scale': False}
        with tf.variable_scope("generator", reuse=Reuse):
            lx = tf.layers.dense(z, 4 * 4 * 512)
            lx = tf.reshape(lx, [-1, 4, 4, 512])
            lx = tf.layers.batch_normalization(lx, training=is_training)
            lx = nonlin(lx)

            lx = tf.layers.conv2d_transpose(
                lx, 256, 5, 2, use_bias=False, padding='same')
            lx = tf.layers.batch_normalization(lx, training=is_training)
            lx = nonlin(lx)

            lx = tf.layers.conv2d_transpose(
                lx, 128, 5, 2, use_bias=False, padding='same')
            lx = tf.layers.batch_normalization(lx, training=is_training)
            lx = nonlin(lx)

            lx = tf.layers.conv2d_transpose(
                lx, 64, 5, 2, use_bias=False, padding='same')
            lx = tf.layers.batch_normalization(lx, training=is_training)
            lx = nonlin(lx)

            lx = tf.layers.conv2d_transpose(lx, 3, 5, 2, padding='same')
            lx = tf.nn.tanh(lx)

            if flatten is True:
                lx = tf.layers.flatten(lx)
            return lx

    nonlin = tf.nn.relu

    def compute_est_samples(z, params=None, reuse=tf.AUTO_REUSE):
        with tf.variable_scope("estimator"):
            with arg_scope([nn.dense], params=params):
                with tf.variable_scope("decoder", reuse=reuse):
                    h_dec_1 = nn.dense(
                        z, vae_z_dim, 200 * 2, "dense1", nonlinearity=nonlin)
                    h_dec_2 = nn.dense(
                        h_dec_1,
                        200 * 2,
                        500 * 2,
                        "dense2",
                        nonlinearity=nonlin)
                    x_mean = nn.dense(
                        h_dec_2, 500 * 2, x_dim, "dense3", nonlinearity=None)
                    x_mean = tf.nn.tanh(x_mean)
                    return x_mean

    def compute_est_ll(x, params=None, reuse=tf.AUTO_REUSE):
        with tf.variable_scope("estimator", reuse=reuse):
            logvae_x_var = tf.get_variable(
                "logvae_x_var", (),
                tf.float32,
                trainable=True,
                initializer=tf.constant_initializer(-1))

            with arg_scope([nn.dense], params=params):
                with tf.variable_scope("encoder", reuse=reuse):
                    h_enc_1 = nn.dense(
                        x, x_dim, 500 * 2, "dense1", nonlinearity=nonlin)
                    # h_enc_1 = nn.batch_norm(h_enc_1, "bn1", 129, 2)
                    h_enc_2 = nn.dense(
                        h_enc_1,
                        500 * 2,
                        200 * 2,
                        "dense2",
                        nonlinearity=nonlin)
                    # h_enc_2 = nn.batch_norm(h_enc_2, "bn2", 128, 2)
                    z_mean = nn.dense(
                        h_enc_2,
                        200 * 2,
                        vae_z_dim,
                        "dense3",
                        nonlinearity=None)
                    z_logvar = nn.dense(
                        h_enc_2,
                        200 * 2,
                        vae_z_dim,
                        "dense4",
                        nonlinearity=None)
                epsilon = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)
                z = z_mean + tf.exp(0.5 * z_logvar) * epsilon

                with tf.variable_scope("decoder", reuse=reuse):
                    h_dec_1 = nn.dense(
                        z, vae_z_dim, 200 * 2, "dense1", nonlinearity=nonlin)
                    # h_dec_1 = nn.batch_norm(h_dec_1, "bn1", 127, 2)
                    h_dec_2 = nn.dense(
                        h_dec_1,
                        200 * 2,
                        500 * 2,
                        "dense2",
                        nonlinearity=nonlin)
                    # h_dec_2 = nn.batch_norm(h_dec_2, "bn2", 128, 2)
                    x_mean = nn.dense(
                        h_dec_2, 500 * 2, x_dim, "dense3", nonlinearity=None)
                    x_mean = tf.nn.tanh(x_mean)

        vae_x_var = tf.exp(logvae_x_var)
        elbo = tf.reduce_mean(
            tf.reduce_sum(
                -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(vae_x_var) -
                tf.layers.flatten(tf.square(x - x_mean)) / (2 * vae_x_var),
                axis=1) -
            tf.reduce_sum(
                -0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar)),
                axis=1))
        return elbo, tf.nn.tanh(x_mean)

    def compute_est_updated_with_SGD(x, lr=0.001, params=None):
        elbo, _ = compute_est_ll(x, params=params)
        grads = tf.gradients(elbo, params.values())
        new_params = params.copy()
        for key, g in zip(params, grads):
            new_params[key] += lr * g
        return elbo, new_params

    def compute_est_updated_with_Adam(x,
                                      lr=0.001,
                                      beta_1=0.9,
                                      beta_2=0.999,
                                      epsilon=1e-7,
                                      decay=0.,
                                      params=None,
                                      adam_params=None):
        elbo, _ = compute_est_ll(x, params=params)
        grads = tf.gradients(elbo, params.values())
        new_params = params.copy()
        new_adam_params = adam_params.copy()
        new_adam_params['iterations'] += 1
        lr = lr * \
            (1. / (1. + decay *
                   tf.cast(adam_params['iterations'], tf.float32)))
        t = tf.cast(new_adam_params['iterations'], tf.float32)
        lr_t = lr * (tf.sqrt(1. - tf.pow(beta_2, t)) / (1. - tf.pow(beta_1, t)))
        for key, g in zip(params, grads):
            new_adam_params['m_' + key] = (
                beta_1 * adam_params['m_' + key]) + (1. - beta_1) * g
            new_adam_params['v_' + key] = tf.stop_gradient(
                (beta_2 * adam_params['v_' + key]) +
                (1. - beta_2) * tf.square(g))
            new_params[
                key] = params[key] + lr_t * new_adam_params['m_' + key] / tf.sqrt(
                    new_adam_params['v_' + key] + epsilon)
        return elbo, new_params, new_adam_params

    lr = tf.placeholder(tf.float32)
    data = tf.placeholder(tf.float32, shape=(batch_size, x_dim))

    # Construct generator and estimator nets
    est_params_dict = OrderedDict()
    _, _ = compute_est_ll(data, params=est_params_dict)
    gen_noise = tf.random_normal((batch_size_est, z_dim), dtype=tf.float32)
    samples_gen = generator(gen_noise)
    vae_noise = tf.random_normal((batch_size_est, vae_z_dim), dtype=tf.float32)
    samples_est = tf.nn.sigmoid(
        compute_est_samples(z=vae_noise, params=est_params_dict))
    # for key in est_params_dict:
    #    print(key, est_params_dict[key])

    adam_params_dict = OrderedDict()
    with tf.variable_scope("adam"):
        adam_params_dict['iterations'] = tf.Variable(
            0, dtype=tf.int64, name='iterations')
        for key in est_params_dict:
            adam_params_dict['m_' + key] = tf.Variable(
                tf.zeros_like(est_params_dict[key]), name='m_' + key)
            adam_params_dict['v_' + key] = tf.Variable(
                tf.zeros_like(est_params_dict[key]), name='v_' + key)

    gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generator")
    est_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "estimator")
    adam_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "adam")

    # unrolling estimator updates
    cur_params = est_params_dict
    cur_adam_params = adam_params_dict
    elbo_genx_at_steps = []
    for _ in range(unrolling_steps):
        samples_gen = generator(
            tf.random_normal((batch_size_est, z_dim), dtype=tf.float32))
        elbo_genx_step, cur_params, cur_adam_params = compute_est_updated_with_Adam(
            samples_gen,
            lr=lr,
            beta_1=beta1,
            epsilon=epsilon,
            params=cur_params,
            adam_params=cur_adam_params)
        elbo_genx_at_steps.append(elbo_genx_step)

    # estimator update
    updates = []
    for key in est_params_dict:
        updates.append(tf.assign(est_params_dict[key], cur_params[key]))
    for key in adam_params_dict:
        updates.append(tf.assign(adam_params_dict[key], cur_adam_params[key]))
    e_train_op = tf.group(*updates, name="e_train_op")

    # Optimize the generator on the unrolled ELBO loss
    unrolled_elbo_data, _ = compute_est_ll(data, params=cur_params)
    # unrolled_elbo_samp, _ = compute_est_ll(
    #     tf.stop_gradient(samples_gen), params=cur_params)

    # GAN-loss for discriminator and generator
    samples_gen_gan = generator(
        tf.random_normal((batch_size_est, z_dim), dtype=tf.float32))
    fake_D_output = discriminator(samples_gen_gan)
    real_D_output = discriminator(data)
    # print(fake_D_output, real_D_output)
    ganloss_g = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.ones_like(fake_D_output), logits=fake_D_output))
    ganloss_D_fake = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.zeros_like(fake_D_output), logits=fake_D_output))
    ganloss_D_real = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.ones_like(real_D_output), logits=real_D_output))

    use_e_sym = tf.placeholder(tf.float32, shape=(), name="use_E")
    if args.lbt:
        logger.info("Using lbt")
        object_g = lambda_gan * ganloss_g - use_e_sym * unrolled_elbo_data
    else:
        logger.info("Using GAN")
        object_g = lambda_gan * ganloss_g  # - use_e_sym * unrolled_elbo_data

    # object_g = -1 * unrolled_elbo_data
    object_d = ganloss_D_fake + ganloss_D_real
    dis_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 "discriminator")

    g_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, "generator")
    g_train_opt = tf.train.AdamOptimizer(
        learning_rate=gen_lr, beta1=beta1, epsilon=epsilon)
    # g_train_opt = tf.train.RMSPropOptimizer(learning_rate=gen_lr, epsilon=epsilon)
    g_grads = g_train_opt.compute_gradients(object_g, var_list=gen_vars)
    # g_grads_clipped = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in g_grads]
    g_grads_, g_vars_ = zip(*g_grads)
    g_grads_clipped_, g_grads_norm_ = tf.clip_by_global_norm(g_grads_, 5.)
    g_grads_clipped = zip(g_grads_clipped_, g_vars_)
    if args.clip_grad:
        logger.info("Clipping gradients of generator parameters.")
        with tf.control_dependencies(g_update_ops):
            g_train_op = g_train_opt.apply_gradients(g_grads_clipped)
    else:
        with tf.control_dependencies(g_update_ops):
            g_train_op = g_train_opt.apply_gradients(g_grads)
        # g_train_op = g_train_opt.apply_gradients(g_grads)

    d_train_opt = tf.train.AdamOptimizer(
        learning_rate=dis_lr, beta1=beta1, epsilon=epsilon)
    d_train_op = d_train_opt.minimize(object_d, var_list=dis_vars)

    # ----------------------------------------------------------------
    # Training
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(max_to_keep=None)
    if args.model_path:
        saver.restore(sess, args.model_path)

    # # print variables
    # logger.info("Generator parameters:")
    # for p in gen_vars:
    #     logger.debug("%s: %s" % (p.name, sess.run(tf.shape(p))))
    # logger.info("Estimator parameters:")
    # for p in est_vars:
    #     logger.debug("%s: %s" % (p.name, sess.run(tf.shape(p))))
    # logger.info("Adam parameters:")
    # for p in adam_vars:
    #     logger.debug("%s: %s" % (p.name, sess.run(tf.shape(p))))

    elbo_vals = []
    ganloss_vals = []
    tgan_g, tgan_d_fake, tgan_d_real = 0., 0., 0.
    elbo_genx_val, elbo_data_val, gradients_nrom = -np.inf, -np.inf, 0
    use_e_flag = 0.

    for i in range(max_iter + 1):

        x_mini_batch = dtrain.next_batch(batch_size)[0].reshape(
            [batch_size, x_dim])

        if i > 3000:
            use_e_flag = 1.
            for _ in range(args.n_est):
                elbo_genx_val, _ = sess.run(
                    [elbo_genx_at_steps[-1], e_train_op],
                    feed_dict={lr: 3. * est_lr})

        for _ in range(args.n_dis):
            _, tgan_g, tgan_d_real, tgan_d_fake = sess.run(
                [d_train_op, ganloss_g, ganloss_D_real, ganloss_D_fake],
                feed_dict={data: x_mini_batch})

        elbo_data_val, gradients_nrom, _ = sess.run(
            [unrolled_elbo_data, g_grads_norm_, g_train_op],
            feed_dict={
                data: x_mini_batch,
                lr: est_lr,
                use_e_sym: use_e_flag
            })
        elbo_vals.append([elbo_genx_val, elbo_data_val])
        ganloss_vals.append([tgan_g, tgan_d_real, tgan_d_fake])

        # visualization
        if i % viz_every == 0:
            np_samples_gen, np_samples_est, np_data = sess.run(
                [samples_gen, samples_est, data],
                feed_dict={data: x_mini_batch})
            np_samples_est = np_samples_est.reshape([-1, 64, 64, 3]).transpose(
                [0, 3, 1, 2]).reshape([-1, 64 * 64 * 3])
            np_samples_gen = np_samples_gen.reshape([-1, 64, 64, 3]).transpose(
                [0, 3, 1, 2]).reshape([-1, 64 * 64 * 3])
            np_data = np_data.reshape([-1, 64, 64, 3]).transpose(
                [0, 3, 1, 2]).reshape([-1, 64 * 64 * 3])

            np_samples_est = np_samples_est / 2. + 0.5
            np_samples_gen = np_samples_gen / 2. + 0.5
            np_data = np_data / 2. + 0.5

            paramgraphics.mat_to_img(
                np_samples_gen[:n_viz],
                dim_input,
                colorImg=True,
                save_path=os.path.join(dirname,
                                       'sample_' + str(i) + '_gen.png'))
            paramgraphics.mat_to_img(
                np_data[:n_viz],
                dim_input,
                colorImg=True,
                save_path=os.path.join(dirname,
                                       'sample_' + str(i) + '_dat.png'))
            paramgraphics.mat_to_img(
                np_samples_est[:n_viz],
                dim_input,
                colorImg=True,
                save_path=os.path.join(dirname,
                                       'sample_' + str(i) + '_est.png'))

            fig = plt.figure(figsize=(6, 4))
            plt.plot(
                elbo_vals,
                '.',
                markersize=2,
                markeredgecolor='none',
                linestyle='none',
                alpha=min(1.0, 0.01 * max_iter / (i + 1)))
            plt.ylim((-200.0, 0.0))
            legend = plt.legend(('elbo_genx', 'elbo_data'), markerscale=6)
            for lh in legend.legendHandles:
                lh._legmarker.set_alpha(1.)
            plt.grid(True)
            plt.tight_layout()
            plt.savefig(os.path.join(dirname, 'curve.png'), bbox_inches='tight')
            plt.close(fig)

        # training log
        if i % viz_every == 0:
            elbo_genx_ma_val, elbo_data_ma_val = np.mean(
                elbo_vals[-200:], axis=0)
            logger.info(
                "Iter %d: gradients norm = %.4f. samples LL = %.4f, data LL = %.4f."
                % (i, gradients_nrom, elbo_genx_ma_val, elbo_data_ma_val))
            logger.info(
                "Iter %d: gan_g = %.4f. gan_d_real = %.4f, gan_d_fake = %.4f." %
                (i, tgan_g, tgan_d_real, tgan_d_fake))

        if i % args.model_every == 0:
            saver.save(sess, os.path.join(dirname, 'model_' + str(i)))
Esempio n. 37
0
def main():

    stats_start_time = time.time()

    # setup logger
    logger = utils.setup_logging()
    logger.info("logger set up")

    # accept input
    search_string = raw_input("Enter search keyword ")
    try:
        crawl_limit = int(raw_input("Enter maximum number of pages to crawl "))
    except ValueError:
        logger.error("number of pages in not an integer")
        return
    if crawl_limit < 11:
        logger.error("no crawling required")
        return
    logger.info("starting search for %s by crawling %d pages", search_string, crawl_limit)

    # fetch initial pages
    while True:
        logger.info("fetching initial seed links for :: %s", search_string)
        initial_urls = utils.fetch_seed(search_string)
        logger.info("%d initial seed links fetched", len(initial_urls))
        if len(initial_urls) > 0:
            break

    # setup initial data

    # page_heap --> used to store type page which contains url, promise, depth
    # page_heap --> ordered by promise, largest promise on top
    page_heap = []
    # relevance is used to store relevance of crawled urls
    # url--> relevance
    relevance = {}
    # mapping to store incoming links from other urls
    # url -> [url1, url2...url_n]
    # this is mapped as an  inverted graph.
    # eg: url1 has incoming links from [url2, url3]
    links = {}
    # pages_crawled, stats_errors, relevant_count are used to track crawler stats
    pages_crawled = 0
    stats_errors = 0
    relevant_count = 0
    black_list = ["php", "pdf", "jpg", "png", "mailto", "comment", "advertising", "javascript",
                  "cite", "cite_note", "picture", "image", "photo", "#", ".mp3", ".mp4"]
    # output file
    output_file = open("crawler.txt", "w");

    # push initial seed urls to heap
    for url in initial_urls:
        if FOCUSSED_CRAWL:
            heapq.heappush(page_heap, page.Page(url, 10, 0))
        else:
            page_heap.append(page.Page(url, 10, 0))
        links[url] = ["www.google.com"]

    # setup loop to crawl the web
    # Flow:
    #   1. Pop page off the heap
    #   2. Fetch page
    #   3. Compute & store relevance
    #   4. If page was too deep, don't dig page for links
    #   5. Find all links in the page
    #   6. For all link
    #       1.  if we are seeing the url for the first time add to heap
    #       2. If we are seeing the url before, update promise in heap
    #   7. Repeat
    while pages_crawled < crawl_limit and len(page_heap) > 0:
        if FOCUSSED_CRAWL:
            next_page_to_crawl = heapq.heappop(page_heap)
        else:
            next_page_to_crawl = page_heap.pop(0)
        next_page_url = next_page_to_crawl.url

        try:
            if not utils.can_crawl(next_page_url):
                logger.info("not allowed to crawl %s", next_page_url)
                del links[next_page_url]
                continue
        except IOError:
            logger.error("error connecting to %s", next_page_url)
            continue
        try:
            logger.info("trying to fetch page :: %s", next_page_url)
            next_page = requests.get(next_page_url, timeout=1)
        except requests.exceptions.RequestException:
            logger.error("exception fetching page :: %s", next_page_url)
            stats_errors = stats_errors+1
            continue
        if next_page.status_code != 200:
            logger.error("error fetching page :: %s", next_page.status_code)
            stats_errors = stats_errors+1
            continue

        pages_crawled = pages_crawled + 1
        page_relevance = utils.compute_relevance(next_page.text, search_string)
        # scale cosine threshold to 0-100
        if page_relevance > COSINE_RELEVANCE_THRESHOLD*100:
            relevant_count = relevant_count + 1

        # write coutput to file
        output = str(pages_crawled)+" "+next_page_url+"\n"
        output_string = "   time: "+str(datetime.datetime.time(datetime.datetime.now())) +\
                        " size:"+str(len(next_page.content))+" relevance:"+str(page_relevance)
        if FOCUSSED_CRAWL:
            output_string = output_string+" promise:"+str(next_page_to_crawl.promise)+"\n\n"
        else:
            output_string = output_string + "\n\n"
        output_file.write(output)
        output_file.write(output_string)
        output_file.flush()

        relevance[next_page_url] = page_relevance
        old_domain = urlparse(next_page_url).netloc

        links_on_page = utils.get_links_on_page(next_page_url, next_page.text)
        for url in links_on_page:
            # check if url has already been visited
            if url in relevance:
                logger.info("ignoring already visited url :: %s", url)
                continue
            # check if url is blacklisted
            if utils.is_blacklisted_url(black_list, url):
                logger.info("ignoring blacklisted url :: %s", url)
                continue
            # check if page is soon to be visited (present in page_heap)
            if page.Page(url, 0, 0) in page_heap:
                # update url promise if we are in focussed mode only
                # no need to update promise in bfs
                if FOCUSSED_CRAWL:
                    logger.info("new pointer to %s , updating promise", url)
                    utils.update_url_promise(url, next_page_url, relevance, links, page_heap, crawl_limit)
                continue

            # At this point, we know we are seeing the page for the first time
            # add page to heap, create first link for page
            logger.info("new link %s found, adding to page_heap", url)

            # check if we are crawling too deep into a domain
            new_domain = urlparse(url).netloc
            depth = 0
            if new_domain == old_domain:
                depth = next_page_to_crawl.depth + 1
            if depth >= MAX_DEPTH_TO_CRAWL:
                continue

            # compute predicted promise
            predicted_promise = utils.compute_promise(next_page_url, url, relevance, search_string)
            new_page = page.Page(url, predicted_promise, depth)
            if FOCUSSED_CRAWL:
                heapq.heappush(page_heap, new_page)
            else:
                page_heap.append(new_page)
            links[url] = [next_page_url]

        # an optimization to ensure heapify operation stays O(log(crawl_limit)
        if len(page_heap) > crawl_limit:
            logger.info("trimming heap")
            del page_heap[math.ceil(crawl_limit * 0.8):]

        # delete incoming links to a page for 'search in links' optimization
        # we will not be using this data again as we don't visit seen urls again
        try:
            del links[next_page_url]
        except Exception:
            logger.error("error removing graph links to :: %s", next_page_url)

    # log stats to file
    output_file.write("\n~~~~~~~~~~~~~~~~~~~Stats~~~~~~~~~~~~~~~~\n\n")
    harvest_percentage = str(100*float(relevant_count)/float(crawl_limit))
    output_file.write("harvest rate   : "+harvest_percentage+" percent\n")
    output_file.write("4xx errors     : "+str(stats_errors)+"\n")
    output_file.write("execution time : "+str((time.time()-stats_start_time)/60)+" minutes\n")
    output_file.write("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
    output_file.flush()
    output_file.close()
Esempio n. 38
0
def main(cfg):

    device = torch.device('cuda' if cfg.cuda else 'cpu')

    autoenc = DeepLabv3Plus()
    model = Siamese(autoenc,
                    in_channels=3,
                    n_edges=cfg.n_edges,
                    sp_pool_use_max=cfg.sp_pooling_max)
    if (cfg.checkpoint_autoenc is not None):
        print('loading checkpoint {}'.format(cfg.checkpoint_autoenc))
        state_dict = torch.load(cfg.checkpoint_autoenc,
                                map_location=lambda storage, loc: storage)
        autoenc.load_state_dict(state_dict)
    elif (cfg.checkpoint_siam is not None):
        print('loading checkpoint {}'.format(cfg.checkpoint_siam))
        state_dict = torch.load(cfg.checkpoint_siam,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(state_dict)

    autoenc.to(device)
    model.to(device)

    transf = iaa.Sequential([
        iaa.Invert(0.5) if 'Dataset1' in 'Dataset' +
        cfg.train_dir else iaa.Noop(),
        iaa.SomeOf(3, [
            iaa.Affine(scale={
                "x": (1 - cfg.aug_scale, 1 + cfg.aug_scale),
                "y": (1 - cfg.aug_scale, 1 + cfg.aug_scale)
            },
                       rotate=(-cfg.aug_rotate, cfg.aug_rotate),
                       shear=(-cfg.aug_shear, cfg.aug_shear)),
            iaa.SomeOf(1, [
                iaa.AdditiveGaussianNoise(scale=cfg.aug_noise * 255),
                iaa.GaussianBlur(sigma=(0., cfg.aug_blur)),
                iaa.GammaContrast((0., cfg.aug_gamma))
            ]),
            iaa.Fliplr(p=0.5),
            iaa.Flipud(p=0.5)
        ]), rescale_augmenter
    ])

    transf_normal = Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])

    dl_train = Loader(pjoin(cfg.in_root, 'Dataset' + cfg.train_dir),
                      augmentation=transf,
                      n_segments=cfg.n_segments_train,
                      delta_segments=cfg.delta_segments_train,
                      normalization=transf_normal)

    dl_test = torch.utils.data.ConcatDataset([
        Loader(pjoin(cfg.in_root, 'Dataset' + d),
               augmentation=transf,
               n_segments=cfg.n_segments_test,
               delta_segments=cfg.delta_segments_test,
               normalization=transf_normal) for d in cfg.test_dirs
    ])

    dataloader_train = DataLoader(dl_train,
                                  batch_size=cfg.batch_size,
                                  sampler=SubsetRandomSampler(
                                      cfg.n_frames_epoch * cfg.train_frames),
                                  collate_fn=dl_train.collate_fn,
                                  drop_last=True,
                                  num_workers=cfg.n_workers)

    dataloader_test = DataLoader(dl_test,
                                 batch_size=cfg.batch_size,
                                 collate_fn=dl_train.collate_fn,
                                 sampler=torch.utils.data.RandomSampler(
                                     dl_test,
                                     replacement=True,
                                     num_samples=cfg.batch_size),
                                 num_workers=cfg.n_workers)

    dataloaders = {'train': dataloader_train, 'test': dataloader_test}

    d = datetime.datetime.now()

    ds_dir = os.path.split('Dataset' + cfg.train_dir)[-1]

    run_dir = pjoin(cfg.out_dir,
                    '{}_{:%Y-%m-%d_%H-%M}_{}'.format(ds_dir, d, cfg.exp_name))

    if (not os.path.exists(run_dir)):
        os.makedirs(run_dir)

    # Save cfg
    with open(pjoin(run_dir, 'cfg.yml'), 'w') as outfile:
        yaml.dump(cfg.__dict__, stream=outfile, default_flow_style=False)

    # convert batch to device
    batch_to_device = lambda batch: {
        k: v.to(device) if (isinstance(v, torch.Tensor)) else v
        for k, v in batch.items()
    }

    optimizer = optim.SGD(params=[{
        'params': model.autoenc.encoder.parameters(),
        'lr': cfg.lr_autoenc
    }, {
        'params': model.autoenc.aspp.parameters(),
        'lr': cfg.lr_autoenc
    }, {
        'params': model.autoenc.decoder.parameters(),
        'lr': cfg.lr_siam
    }, {
        'params': model.linear1.parameters(),
        'lr': cfg.lr_siam
    }, {
        'params': model.linear2.parameters(),
        'lr': cfg.lr_siam
    }],
                          momentum=cfg.momentum,
                          weight_decay=cfg.decay)

    utls.setup_logging(run_dir)
    logger = logging.getLogger('siam')

    logger.info('run_dir: {}'.format(run_dir))

    train(cfg, model, dataloaders, run_dir, batch_to_device, optimizer, logger)

    logger.info('training siam')
Esempio n. 39
0
import utils

log = utils.setup_logging(__name__)


class Watson:
    def __init__(self, wml_client, wos_client):
        self.wml_client = wml_client
        self.wos_client = wos_client

    def get_service_provider_by_name(self, service_provider_name):
        service_providers = self.wos_client.service_providers.list(
        ).result.service_providers
        log.debug("Service providers size: " + str(len(service_providers)))
        service_provider_id = None
        for service_provider in service_providers:
            if service_provider.entity.name == service_provider_name:
                service_provider_id = service_provider.metadata.id
                log.debug("Found the service_provider: {}".format(
                    service_provider_id))
        return service_provider_id
Esempio n. 40
0
def run_experiment(args):
    import os
    # set environment variables for theano
    os.environ['THEANO_FLAGS'] = "lib.cnmem=" + str(args.mem) + ",device=gpu" + str(args.gpu)

    import threading
    import Queue
    import inspect
    import shutil
    import time
    import logging
    import six
    import collections
    import itertools
    import random
    import numpy as np
    import scipy
    import theano
    import theano.tensor as T
    import lasagne
    import lasagne.layers as ll
    import lasagne.nonlinearities as ln
    import parmesan

    import layers
    import utils
    import cfdataset

#----------------------------------------------------------------
# Arguments and Settings
    floatX = theano.config.floatX
    logger = logging.getLogger()
    np.random.seed(args.seed)

    # copy file for reproducibility
    dirname = utils.setup_logging(args.message, args.loglv)
    script_src = os.path.abspath(inspect.getfile(inspect.currentframe()))
    script_dst = os.path.join(dirname, os.path.split(script_src)[1])
    shutil.copyfile(script_src, script_dst)

    # print arguments
    args_dict = collections.OrderedDict(sorted(vars(args).items()))
    for k, v in six.iteritems(args_dict):
        logger.info("  %20s: %s" % (k, v))

    # get arguments
    D_u, D_v = args.D_u, args.D_v
    lr = args.lr
    weight_decay = args.weight_decay
    lookahead = args.lookahead
    max_epoch = args.max_epoch
    batch_size_u, batch_size_v = args.batch_size_u, args.batch_size_v
    nonlin_enc = layers.get_nonlin(args.nonlin_enc)
    nonlin_dec = layers.get_nonlin(args.nonlin_dec)
    negative_ratio = args.negative_ratio

#----------------------------------------------------------------
# Dataset
    dataset = cfdataset.CF_implicit_data(name=args.dataset)

    N_u, N_v = dataset.N_users, dataset.N_items
    T_matrix = dataset.T_matrix.astype(floatX)
    R_matrix = dataset.R_matrix.astype(floatX)
    R_negative_matrix = 1 - R_matrix
    assert np.all(R_matrix == (T_matrix > 0.5))
    assert np.all((R_negative_matrix == 1) == (T_matrix == 0))

    R_test = dataset.R_latest
    T_matrix[np.arange(N_u), R_test] = 0
    R_matrix[np.arange(N_u), R_test] = 0
    assert np.all(R_matrix == (T_matrix > 0.5))

    R_matrix_for_test = R_matrix.copy()

    R_valid = dataset.R_2nd_latest
    T_matrix[np.arange(N_u), R_valid] = 0
    R_matrix[np.arange(N_u), R_valid] = 0
    assert np.all(R_matrix == (T_matrix > 0.5))

    N_interaction = dataset.N_interaction - N_u * 2

    assert np.all(R_valid != R_test)
    assert np.all(R_matrix_for_test[np.arange(N_u), R_valid] == 1)
    assert np.all(R_matrix_for_test[np.arange(N_u), R_test] == 0)
    assert np.all(R_matrix[np.arange(N_u), R_valid] == 0)
    assert np.all(R_matrix[np.arange(N_u), R_test] == 0)
    assert np.all(T_matrix[np.arange(N_u), R_valid] == 0)
    assert np.all(T_matrix[np.arange(N_u), R_test] == 0)
    assert N_interaction == np.count_nonzero(R_matrix)
    assert N_interaction + N_u == np.count_nonzero(R_matrix_for_test)

    logger.info("%d users, %d items, %d training interactions (%d total, 2 * %d held out for validation and test)." % (N_u, N_v, N_interaction, dataset.N_interaction, N_u))

#----------------------------------------------------------------
# numpy variables
    # encoded vectors
    np_enc_u_h = np.zeros((N_u, D_u), dtype=floatX)
    np_enc_v_h = np.zeros((N_v, D_v), dtype=floatX)

#----------------------------------------------------------------
# Symbolic variables
    sym_lr = T.fscalar('lr')

    sym_Ru_pos = T.fmatrix('Ru_pos')
    sym_dr_Ru_pos = T.fscalar('dr_Ru_pos')
    sym_uid_origin_pos = T.ivector('uid_origin_pos')
    sym_uid_minibatch_pos = T.ivector('uid_minibatch_pos')

    sym_Ru_neg = T.fmatrix('Ru_neg')
    sym_dr_Ru_neg = T.fscalar('dr_Ru_neg')
    sym_uid_origin_neg = T.ivector('uid_origin_neg')
    sym_uid_minibatch_neg = T.ivector('uid_minibatch_neg')

    sym_Rv = T.fmatrix('Rv')
    sym_dr_Rv = T.fscalar('dr_Rv')
    sym_vid_origin_pos = T.ivector('vid_origin_pos')
    sym_vid_minibatch_pos = T.ivector('vid_minibatch_pos')
    sym_vid_origin_neg = T.ivector('vid_origin_neg')
    sym_vid_minibatch_neg = T.ivector('vid_minibatch_neg')

    sym_R_minibatch = T.fvector('R_minibatch')

#----------------------------------------------------------------
# Model setup (training model)
    logger.info("Setting up model ...")

    # Input layers
    l_in_Ru_pos = ll.InputLayer((None, N_v), input_var=sym_Ru_pos, name='l_in_Ru_pos')
    l_in_uid_origin_pos = ll.InputLayer((None,), input_var=sym_uid_origin_pos, name='l_in_uid_origin_pos')
    l_in_uid_minibatch_pos = ll.InputLayer((None,), input_var=sym_uid_minibatch_pos, name='l_in_uid_minibatch_pos')

    l_in_Ru_neg = ll.InputLayer((None, N_v), input_var=sym_Ru_neg, name='l_in_Ru_neg')
    l_in_uid_origin_neg = ll.InputLayer((None,), input_var=sym_uid_origin_neg, name='l_in_uid_origin_neg')
    l_in_uid_minibatch_neg = ll.InputLayer((None,), input_var=sym_uid_minibatch_neg, name='l_in_uid_minibatch_neg')

    l_in_Rv = ll.InputLayer((None, N_u), input_var=sym_Rv, name='l_in_Rv')
    l_in_vid_origin_pos = ll.InputLayer((None,), input_var=sym_vid_origin_pos, name='l_in_vid_origin_pos')
    l_in_vid_minibatch_pos = ll.InputLayer((None,), input_var=sym_vid_minibatch_pos, name='l_in_vid_minibatch_pos')
    l_in_vid_origin_neg = ll.InputLayer((None,), input_var=sym_vid_origin_neg, name='l_in_vid_origin_neg')
    l_in_vid_minibatch_neg = ll.InputLayer((None,), input_var=sym_vid_minibatch_neg, name='l_in_vid_minibatch_neg')

    # Dropout layers
    l_in_Ru_pos = ll.DropoutLayer(l_in_Ru_pos, p=sym_dr_Ru_pos, rescale=False, name='Dropout-l_in_Ru_pos')
    l_in_Ru_neg = ll.DropoutLayer(l_in_Ru_neg, p=sym_dr_Ru_neg, rescale=False, name='Dropout-l_in_Ru_neg')
    l_in_Rv = ll.DropoutLayer(l_in_Rv, p=sym_dr_Rv, rescale=False, name='Dropout-l_in_Rv')

    # User encoder model h(Ru)
    l_enc_u_h_pos = ll.DenseLayer(l_in_Ru_pos, num_units=D_u, nonlinearity=nonlin_enc, name='l_enc_u_h_pos')
    l_enc_u_h_neg = ll.DenseLayer(l_in_Ru_neg, num_units=D_u, nonlinearity=nonlin_enc, W=l_enc_u_h_pos.W, b=l_enc_u_h_pos.b, name='l_enc_u_h_neg')

    # Item encoder model h(Rv)
    l_enc_v_h = ll.DenseLayer(l_in_Rv, num_units=D_v, nonlinearity=nonlin_enc, name='l_enc_v_h')

    # User decoder model s(h(Ru))
    l_dec_u_s_pos = layers.SimpleDecodeLayer([l_enc_u_h_pos, l_in_vid_origin_pos, l_in_uid_minibatch_pos], num_units=N_v, nonlinearity=None, name='l_dec_u_s_pos')
    l_dec_u_s_neg = layers.SimpleDecodeLayer([l_enc_u_h_neg, l_in_vid_origin_neg, l_in_uid_minibatch_neg], num_units=N_v, V=l_dec_u_s_pos.V, Q=l_dec_u_s_pos.Q, b=l_dec_u_s_pos.b, nonlinearity=None, name='l_dec_u_s_neg')
    l_dec_u_s_all = ll.ConcatLayer([l_dec_u_s_pos ,l_dec_u_s_neg], axis=0)

    # Item decoder model s(h(Rv))
    l_dec_v_s_pos = layers.SimpleDecodeLayer([l_enc_v_h, l_in_uid_origin_pos, l_in_vid_minibatch_pos], num_units=N_u, nonlinearity=None, name='l_dec_v_s_pos')
    l_dec_v_s_neg = layers.SimpleDecodeLayer([l_enc_v_h, l_in_uid_origin_neg, l_in_vid_minibatch_neg], num_units=N_u, V=l_dec_v_s_pos.V, Q=l_dec_v_s_pos.Q, b=l_dec_v_s_pos.b, nonlinearity=None, name='l_dec_v_s_neg')
    l_dec_v_s_all = ll.ConcatLayer([l_dec_v_s_pos ,l_dec_v_s_neg], axis=0)

    # Likelihood model p(R)
    l_uv_s_train = ll.ElemwiseSumLayer([l_dec_u_s_all, l_dec_v_s_all], name='l_uv_s_train')
    l_r_train = ll.NonlinearityLayer(l_uv_s_train, nonlinearity=ln.sigmoid, name='l_r_train')
    l_uv_s_test = ll.ElemwiseSumLayer([l_dec_u_s_pos, l_dec_v_s_pos], name='l_uv_s_test')
    l_r_test = ll.NonlinearityLayer(l_uv_s_test, nonlinearity=ln.sigmoid, name='l_r_test')

#----------------------------------------------------------------
# Likelihood and RMSE
    # training
    p_r_train, = ll.get_output([l_r_train], deterministic=False)

    log_p_r = T.mean(parmesan.distributions.log_bernoulli(sym_R_minibatch, p_r_train, eps=1e-6))
    regularization = lasagne.regularization.regularize_network_params([l_r_train], lasagne.regularization.l2)
    cost_function = - log_p_r + weight_decay * regularization

    SE_train = T.sum(T.sqr(sym_R_minibatch - p_r_train))

    # test
    sym_enc_u_h = T.fmatrix('enc_u_h')
    sym_enc_v_h = T.fmatrix('enc_v_h')
    enc_u_h_out, enc_v_h_out = ll.get_output([l_enc_u_h_pos, l_enc_v_h], deterministic=True)
    p_r_test, = ll.get_output([l_r_test], inputs={l_enc_u_h_pos:sym_enc_u_h, l_enc_v_h:sym_enc_v_h}, deterministic=True)
    test_scores = p_r_test.reshape((-1, 101))
    ranking = test_scores.argsort()[:,::-1].argmin(axis=1)

#----------------------------------------------------------------
# Gradients
    clip_grad = 1
    max_norm = 5

    params = ll.get_all_params([l_r_train,], trainable=True)
    for p in params:
        logger.debug("%s: %s" % (p, p.get_value().shape))

    grads = T.grad(cost_function, params)
    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]

    #updates = lasagne.updates.adam(cgrads, params, beta1=0.9, beta2=0.999, epsilon=1e-4, learning_rate=sym_lr)
    updates, sym_vars_list = utils.adam(cgrads, params, beta1=0.9, beta2=0.999, epsilon=1e-4, learning_rate=sym_lr)

    # moving average
    params_avg=[]
    for param in params:
        value = param.get_value(borrow=True)
        params_avg.append(theano.shared(np.zeros(value.shape, dtype=value.dtype),
                              broadcastable=param.broadcastable,
                              name=param.name + '_avg'))
    avg_updates = [(a, a + 0.01 * (p - a)) for p, a in zip(params, params_avg)]
    avg_givens = [(p, a) for p, a in zip(params, params_avg)]
    all_updates = updates.items() + avg_updates

#----------------------------------------------------------------
# Compile
    # training function
    logger.info("Compiling train_model ...")
    train_model = theano.function(
            inputs=[sym_lr,
                sym_uid_origin_pos, sym_uid_minibatch_pos, sym_vid_origin_pos, sym_vid_minibatch_pos,
                sym_uid_origin_neg, sym_uid_minibatch_neg, sym_vid_origin_neg, sym_vid_minibatch_neg,
                sym_Ru_pos, sym_Ru_neg, sym_Rv,
                sym_R_minibatch, sym_dr_Ru_pos, sym_dr_Ru_neg, sym_dr_Rv],
            outputs=[log_p_r, SE_train],
            updates=all_updates,
            )

    # encoders
    logger.info("Compiling encode_model ...")
    u_encode_model = theano.function(inputs=[sym_Ru_pos], outputs=enc_u_h_out)
    v_encode_model = theano.function(inputs=[sym_Rv], outputs=enc_v_h_out)

    u_encode_avg_model = theano.function(inputs=[sym_Ru_pos], outputs=enc_u_h_out, givens=avg_givens, on_unused_input='ignore')
    v_encode_avg_model = theano.function(inputs=[sym_Rv], outputs=enc_v_h_out, givens=avg_givens, on_unused_input='ignore')

    # test function
    logger.info("Compiling test_model ...")
    test_model = theano.function(
            inputs=[sym_uid_origin_pos, sym_uid_minibatch_pos, sym_vid_origin_pos, sym_vid_minibatch_pos, sym_enc_u_h, sym_enc_v_h],
            outputs=[ranking],
            )

    test_avg_model = theano.function(
            inputs=[sym_uid_origin_pos, sym_uid_minibatch_pos, sym_vid_origin_pos, sym_vid_minibatch_pos, sym_enc_u_h, sym_enc_v_h],
            outputs=[ranking],
            givens=avg_givens, on_unused_input='ignore',
            )

#----------------------------------------------------------------
# Predict function
    def compute_hidden_for(for_which_set='test', avg_model=False):
        assert for_which_set in ['valid', 'test']
        if for_which_set == 'valid':
            R_matrix_cond = R_matrix
        else:
            R_matrix_cond = R_matrix_for_test

        # preconpute hidden representation
        u_end = 0
        while u_end < N_u:
            u_start, u_end = u_end, min(u_end + batch_size_u, N_u)
            # create user mini-batch
            u_batch_ids = np.arange(u_start, u_end).astype('int32')
            # create conditionals
            Ru_minibatch = R_matrix_cond[u_batch_ids,:]
            # encode
            if avg_model:
                np_enc_u_h[u_batch_ids] = u_encode_avg_model(Ru_minibatch)
            else:
                np_enc_u_h[u_batch_ids] = u_encode_model(Ru_minibatch)

        v_end = 0
        while v_end < N_v:
            v_start, v_end = v_end, min(v_end + batch_size_v, N_v)
            # create item mini-batch
            v_batch_ids = np.arange(v_start, v_end).astype('int32')
            # create conditionals
            Rv_minibatch = R_matrix_cond[:,v_batch_ids].T
            # encode
            if avg_model:
                np_enc_v_h[v_batch_ids] = v_encode_avg_model(Rv_minibatch)
            else:
                np_enc_v_h[v_batch_ids] = v_encode_model(Rv_minibatch)

    def predict_once(which_set='test', avg_model=False):
        assert which_set in ['valid', 'test']
        if which_set == 'valid':
            R_predict = R_valid
        else:
            R_predict = R_test

        # test statistics
        rankings = []

        # loop users
        u_end = 0
        while u_end < N_u:
            u_start, u_end = u_end, min(u_end + batch_size_u, N_u)

            # create user mini-batch and item mini-batch
            u_batch_ids = np.arange(u_start, u_end).astype('int32')

            vid_negative = np.asarray([np.random.choice(np.where(row)[0], 100, replace=False) for row in R_negative_matrix[u_batch_ids]], dtype='int32')
            vid = np.concatenate([R_predict[u_batch_ids].reshape(-1,1), vid_negative], axis=1).flatten()
            uid_origin = np.repeat(u_batch_ids, 101)
            uid_minibatch = uid_origin - u_start

            # get encoded vectors
            Ru_encoded = np_enc_u_h[u_batch_ids]

            if avg_model:
                rankings_minibatch, = test_avg_model(uid_origin, uid_minibatch, vid, vid, Ru_encoded, np_enc_v_h)
            else:
                rankings_minibatch, = test_model(uid_origin, uid_minibatch, vid, vid, Ru_encoded, np_enc_v_h)
            rankings.append(rankings_minibatch)

        rankings = np.concatenate(rankings)
        HR = np.mean(rankings < 10)
        NDCG = np.mean((rankings < 10) / np.log2(rankings + 2))

        return HR, NDCG

    def predict(which_set='test', avg=10, avg_model=False):
        compute_hidden_for(for_which_set=which_set, avg_model=avg_model)
        HR_list = []
        NDCG_list = []
        for i in range(avg):
            hr, ndcg = predict_once(which_set=which_set, avg_model=avg_model)
            HR_list.append(hr)
            NDCG_list.append(ndcg)
        HR_mean = np.mean(HR_list)
        NDCG_mean = np.mean(NDCG_list)
        HR_std = np.std(HR_list)
        NDCG_std = np.std(NDCG_list)
        # print info after test finished
        eval_msg = which_set if not avg_model else which_set + ' (avg model)'
        logger.critical("%-20s HR = %.3f +- %.3f, NDCG = %.3f +- %.3f." % (eval_msg, HR_mean, HR_std, NDCG_mean, NDCG_std))
        return HR_mean, NDCG_mean

#----------------------------------------------------------------
# Training
    best_valid_result = - np.inf
    best_model = None
    best_auxiliary = None
    n_epocs_without_improvement = 0

    minibatch_queue = Queue.Queue(maxsize=10)

    # function for preparing minibatches
    def prepare_minibatch(minibatch_list):
        # loop mini-batches
        for u_batch_ids, v_batch_ids in minibatch_list:
            Rv_minibatch = R_matrix[:,v_batch_ids].T
            Rv_minibatch[:,u_batch_ids] = 0
            Ru_minibatch_neg = R_matrix[u_batch_ids,:]
            #Ru_minibatch_neg[:,v_batch_ids] = 0

            # create training samples mini-batch
            T_matrix_minibatch = T_matrix[np.ix_(u_batch_ids, v_batch_ids)]
            T_matrix_minibatch_sparse = scipy.sparse.coo_matrix(T_matrix_minibatch)
            n_interactions_minibatch = T_matrix_minibatch_sparse.count_nonzero()
            Ru_minibatch_pos = ((T_matrix[u_batch_ids[T_matrix_minibatch_sparse.row]] < T_matrix_minibatch_sparse.data.reshape(n_interactions_minibatch, 1)) & (T_matrix[u_batch_ids[T_matrix_minibatch_sparse.row]] > 0)).astype(floatX)

            uid_minibatch_pos = np.arange(n_interactions_minibatch).astype('int32')
            uid_origin_pos = u_batch_ids[T_matrix_minibatch_sparse.row]
            vid_minibatch_pos = T_matrix_minibatch_sparse.col
            vid_origin_pos = v_batch_ids[vid_minibatch_pos]

            R_matrix_negative_minibatch = 1 - R_matrix[np.ix_(u_batch_ids, v_batch_ids)]
            R_matrix_negative_minibatch_sparse = scipy.sparse.coo_matrix(R_matrix_negative_minibatch)
            n_negative_total = R_matrix_negative_minibatch_sparse.count_nonzero()
            assert n_negative_total + n_interactions_minibatch == u_batch_ids.size * v_batch_ids.size
            choice_negative = np.random.choice(n_negative_total, min(n_negative_total, np.int(n_interactions_minibatch * negative_ratio)), replace=False)

            uid_minibatch_neg = R_matrix_negative_minibatch_sparse.row[choice_negative]
            uid_origin_neg = u_batch_ids[uid_minibatch_neg]
            vid_minibatch_neg = R_matrix_negative_minibatch_sparse.col[choice_negative]
            vid_origin_neg = v_batch_ids[vid_minibatch_neg]

            R_minibatch = np.concatenate([np.ones_like(T_matrix_minibatch_sparse.data), R_matrix_negative_minibatch_sparse.data[choice_negative] * 0])

            n_pred_step = R_minibatch.shape[0]
            if n_pred_step == 0:
                raise ValueError('No interactions in this minibatch.')

            dr_Ru_pos = min(max(1 - 2 * np.random.rand(), 0), 0.8)
            dr_Ru_neg = 0.2
            dr_Rv = min(max(1 - 2 * np.random.rand(), 0), 0.8)

            # package everything into a tuple
            data_minibatch_package = (
                    uid_origin_pos, uid_minibatch_pos, vid_origin_pos, vid_minibatch_pos,
                    uid_origin_neg, uid_minibatch_neg, vid_origin_neg, vid_minibatch_neg,
                    Ru_minibatch_pos, Ru_minibatch_neg, Rv_minibatch,
                    R_minibatch, dr_Ru_pos, dr_Ru_neg, dr_Rv)

            # enqueue
            minibatch_queue.put((n_pred_step, data_minibatch_package))

    logger.warning("Training started.")
    # loop epoch
    for epoch in range(1, 1+max_epoch):
        epoch_start_time = time.time()

        # training statistics
        LL_epoch, SE_epoch= 0, 0
        n_pred_epoch = 0

        u_order = np.array_split(np.random.permutation(N_u).astype('int32'), N_u // batch_size_u + 1)
        v_order = np.array_split(np.random.permutation(N_v).astype('int32'), N_v // batch_size_v + 1)
        minibatch_order = list(itertools.product(u_order, v_order))
        random.shuffle(minibatch_order)

        n_threads = 5
        n_minibatch_thread = len(minibatch_order) // n_threads + 1
        for t in range(n_threads):
            thr = threading.Thread(target=prepare_minibatch, args=(minibatch_order[t*n_minibatch_thread:(t+1)*n_minibatch_thread],))
            thr.setDaemon(True)
            thr.start()

        for step in range(len(minibatch_order)):
            n_pred_step, data_minibatch_package = minibatch_queue.get()
            # update parameters and calculate likelihood and RMSE
            LL_step, SE_step = train_model(lr, *data_minibatch_package)
            minibatch_queue.task_done()
            LL_epoch += LL_step * n_pred_step
            SE_epoch += SE_step
            n_pred_epoch += n_pred_step

        assert minibatch_queue.qsize() == 0

        # print info after epoch finished
        LL_epoch /= n_pred_epoch
        RMSE_epoch = np.sqrt(SE_epoch/n_pred_epoch)

        epoch_end_time = time.time()
        logger.info("Epoch %d, training RMSE = %f, LL = %f (%d training ratings). Elapsed time %.1fs." % (epoch, RMSE_epoch, LL_epoch, n_pred_epoch, epoch_end_time-epoch_start_time))

        # validation
        HR_valid, NDCG_valid = predict('valid')
        HR_test, NDCG_test = predict('test')
        HR_test, NDCG_test = predict('test', avg_model=True)

        # termination
        #if NDCG_valid > best_valid_result:
        if HR_valid > best_valid_result:
            n_epocs_without_improvement = 0
            #best_valid_result = NDCG_valid
            best_valid_result = HR_valid
            best_model = ll.get_all_param_values([l_r_train,], trainable=True)
            best_auxiliary = utils.get_all_shvar_values(sym_vars_list)
            logger.debug("New best model found!")
        else:
            n_epocs_without_improvement += 1
            if n_epocs_without_improvement >= lookahead:
                ll.set_all_param_values([l_r_train,], best_model, trainable=True)
                utils.set_all_shvar_values(sym_vars_list, best_auxiliary)
                if lr > 1e-5:
                    n_epocs_without_improvement = 0
                    lr /= 4
                    logger.error("Learning rate = %f now." % lr)
                else:
                    logger.error("Training finished.")
                    break

#----------------------------------------------------------------
# Test
    HR_test, NDCG_test = predict('test')
    HR_test, NDCG_test = predict('test', avg_model=True)

#----------------------------------------------------------------
# Summarization
    for k, v in six.iteritems(args_dict):
        logger.info("  %20s: %s" % (k, v))
Esempio n. 41
0
import sys, os
import xml.dom.minidom
import subprocess
import signal, utils
import splunk.entity as en
from service import Protocol

logger = utils.setup_logging("rpcstart")

SCHEME = """<scheme>
    <title>Splunk RPC Startup</title>
    <description>Start up RPC service server.</description>
    <use_external_validation>true</use_external_validation>
    <streaming_mode>xml</streaming_mode>

    <endpoint>
        <args>
            <arg name="name">
                <title>Resource name</title>
                <description> Java RPC server name
                </description>
            </arg>

            <arg name="javapath">
                <title>Java Installation</title>
            </arg>

            <arg name="options">
                <title>Java Options</title>
            </arg>
Esempio n. 42
0
def main(
    mistakes_path: Path,
    outdir: Path,
    plan_iters: int = 10,
    optim: Literal["sgd", "adam"] = "sgd",
    lr: float = 0.1,
    momentum: bool = False,
    nesterov: bool = False,
    extra_inits: bool = False,
    replications: Optional[str] = None,
    log_time: bool = False,
    log_best_inits: bool = False,
    n_traj_max: Optional[int] = None,
    verbosity: Literal["INFO", "DEBUG"] = "INFO",
):
    outdir = Path(outdir)
    experiment_dir = outdir / make_experiment(
        optim, lr, plan_iters, momentum, nesterov, extra_inits
    )
    experiment_dir.mkdir(parents=True, exist_ok=True)

    setup_logging(verbosity=verbosity, log_path=experiment_dir / "log.txt")

    if replications is not None:
        replication_indices = parse_replications(replications)
        mistakes_paths = [
            Path(mistakes_path) / str(index) / "planner_mistakes.pkl"
            for index in replication_indices
        ]
    else:
        mistakes_paths = [Path(mistakes_path)]

    if optim == "sgd":
        optimizer = SGD(learning_rate=lr, momentum=momentum, nesterov=nesterov)
    elif optim == "adam":
        optimizer = Adam(learning_rate=lr)

    env = LegacyEnv(reward=np.zeros(4))

    starts, rewards, better_trajs = collect_mistakes(
        mistakes_paths=mistakes_paths, n_max=n_traj_max
    )

    init_controls = (
        np.array(
            [
                [[0.0, 1.0]] * 50,
                [[0.0, -1.0]] * 50,
                [[-0.5, -1.0]] * 50,
                [[0.5, -1.0]] * 50,
                [[0.5, 1.0]] * 50,
                [[-0.5, 1.0]] * 50,
            ]
        )
        if extra_inits
        else None
    )

    logging.info("Making trajectories")
    opt_trajs, losses = make_opt_trajs(
        traj_opt=TrajOptimizer(
            n_planner_iters=plan_iters,
            optim=optimizer,
            init_controls=init_controls,
            log_best_init=log_best_inits,
        ),
        rewards=rewards,
        starts=starts,
        log_time=log_time,
    )

    logging.info("Rolling out trajectories")
    returns = np.empty((len(starts), 2))
    for i, (start, reward_weights, opt_traj, policy_traj, loss) in enumerate(
        zip(starts, rewards, opt_trajs, better_trajs, losses)
    ):
        env.reward = reward_weights

        traj_opt_return = rollout(actions=opt_traj, env=env, start=start)
        policy_return = rollout(actions=policy_traj, env=env, start=start)

        assert (
            abs(traj_opt_return + loss) < 0.001
        ), f"Rollout={traj_opt_return} and loss={loss}, differ by too much. start={start}, reward={reward_weights}"

        returns[i, 0] = traj_opt_return
        returns[i, 1] = policy_return

        logging.debug(
            f"Traj opt return={traj_opt_return}, loss={loss}, policy_return={policy_return}, delta={traj_opt_return-policy_return}"
        )

    np.save(experiment_dir / "returns.npy", returns)

    deltas = returns[:, 0] - returns[:, 1]

    logging.info(
        f"Mean delta={np.mean(deltas)}, mean better={np.mean(deltas > 0)*100:.1f}%, optim={optim}, lr={lr}, n={plan_iters}, momentum={momentum}, nesterov={nesterov}, extra inits={extra_inits}"
    )

    plot_returns(returns, experiment_dir)
Esempio n. 43
0
def main() -> None:
    # Parse start arguments
    parser = argparse.ArgumentParser(
        description="Moonraker - Klipper API Server")
    parser.add_argument("-c",
                        "--configfile",
                        default="~/moonraker.conf",
                        metavar='<configfile>',
                        help="Location of moonraker configuration file")
    parser.add_argument("-l",
                        "--logfile",
                        default="/tmp/moonraker.log",
                        metavar='<logfile>',
                        help="log file name and location")
    parser.add_argument("-n",
                        "--nologfile",
                        action='store_true',
                        help="disable logging to a file")
    cmd_line_args = parser.parse_args()
    cfg_file = cmd_line_args.configfile
    app_args = {'config_file': cfg_file}

    # Setup Logging
    version = utils.get_software_version()
    if cmd_line_args.nologfile:
        app_args['log_file'] = ""
    else:
        app_args['log_file'] = os.path.normpath(
            os.path.expanduser(cmd_line_args.logfile))
    app_args['software_version'] = version
    ql, file_logger, warning = utils.setup_logging(app_args)
    if warning is not None:
        app_args['log_warning'] = warning

    if sys.version_info < (3, 7):
        msg = f"Moonraker requires Python 3.7 or above.  " \
            f"Detected Version: {sys.version}"
        logging.info(msg)
        print(msg)
        ql.stop()
        exit(1)

    # Start asyncio event loop and server
    event_loop = EventLoop()
    alt_config_loaded = False
    estatus = 0
    while True:
        try:
            server = Server(app_args, file_logger, event_loop)
            server.load_components()
        except confighelper.ConfigError as e:
            backup_cfg = confighelper.find_config_backup(cfg_file)
            if alt_config_loaded or backup_cfg is None:
                logging.exception("Server Config Error")
                estatus = 1
                break
            app_args['config_file'] = backup_cfg
            app_args['config_warning'] = (
                f"Server configuration error: {e}\n"
                f"Loaded server from most recent working configuration:"
                f" '{app_args['config_file']}'\n"
                f"Please fix the issue in moonraker.conf and restart "
                f"the server.")
            alt_config_loaded = True
            continue
        except Exception:
            logging.exception("Moonraker Error")
            estatus = 1
            break
        try:
            event_loop.register_callback(server.server_init)
            event_loop.start()
        except Exception:
            logging.exception("Server Running Error")
            estatus = 1
            break
        if server.exit_reason == "terminate":
            break
        # Restore the original config and clear the warning
        # before the server restarts
        if alt_config_loaded:
            app_args['config_file'] = cfg_file
            app_args.pop('config_warning', None)
            alt_config_loaded = False
        event_loop.close()
        # Since we are running outside of the the server
        # it is ok to use a blocking sleep here
        time.sleep(.5)
        logging.info("Attempting Server Restart...")
        for _ in range(5):
            # Sometimes the new loop does not properly instantiate.
            # Give 5 attempts before raising an exception
            new_loop = asyncio.new_event_loop()
            if not new_loop.is_closed():
                break
            logging.info("Failed to create open eventloop, "
                         "retyring in .5 seconds...")
            time.sleep(.5)
        else:
            raise RuntimeError("Unable to create new open eventloop")
        asyncio.set_event_loop(new_loop)
        event_loop.reset()
    event_loop.close()
    logging.info("Server Shutdown")
    ql.stop()
    exit(estatus)
Esempio n. 44
0
        '--functions',
        help='name of the function file',
        required=True,
        default='/storage/users/cnalab/apkdata-tanya/binary/new.large.sframe')
    parser.add_argument(
        '--net', help='name of a network file (extraction only for anchors)')
    parser.add_argument('--output', help='output path', required=True)
    args = parser.parse_args()
    #test_file = '/storage/users/cnalab/apkdata-tanya/binary/test-tc-1000.npy'

    #if test_file:
    #    print(f"Reading test file: {test_file}")
    #    test_apns = np.load(test_file)

    path = setup_path(args=args)
    setup_logging(path=path, parser=parser)
    net_file = args.net
    logging.info(f"Reading reading net file {net_file}")
    gamma, net = load_net(net_file)
    test_apns = list(net.keys())

    logging.info(f"Extracted apn: {len(test_apns)}")

    setup_turi()
    tc.config.set_runtime_config('TURI_DEFAULT_NUM_PYLAMBDA_WORKERS', 16)

    logging.info('Loading functions')
    mw = load_functions_partition(directory='', name=args.functions)

    logging.info('Filter started')
    test_f = mw.filter_by(values=test_apns, column_name='apk')
parser.add_argument('--resume',
                    '-r',
                    action='store_true',
                    help='resume from checkpoint')
parser.add_argument('--results_dir',
                    metavar='RESULTS_DIR',
                    default='./results',
                    help='results dir')
parser.add_argument('--resume_dir', default=None, help='resume dir')
args = parser.parse_args()

args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
save_path = os.path.join(args.results_dir, args.save)
if not os.path.exists(save_path):
    os.makedirs(save_path)
setup_logging(os.path.join(save_path, 'log.txt'))
logging.info("saving to %s", save_path)
logging.info("run arguments: %s", args)

use_cuda = torch.cuda.is_available()
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
Esempio n. 46
0
    ap.add_argument('installdir')
    ap.add_argument('tarball')
    args = ap.parse_args()

    if not exists(args.installdir):
        print 'directory {} does not exist'.format(args.installdir)
        sys.exit(1)

    if os.listdir(args.installdir):
        print 'directory {} is not empty'.format(args.installdir)
        sys.exit(1)

    if not exists(args.tarball):
        print 'file {} does not exist'.format(args.tarball)
        sys.exit(1)

    m = re.match(r'^.*?_([\d\.]+).*?\.tar\.gz$', basename(args.tarball))
    version = m.group(1)

    cfg = ServerConfig(installdir=args.installdir,
                       tarball=args.tarball,
                       version=version)
    setup_server(cfg, args.db)
    start_server(cfg)
    create_test_user(cfg)


if __name__ == '__main__':
    setup_logging()
    main()
Esempio n. 47
0
def main():
    # Parse start arguments
    parser = argparse.ArgumentParser(
        description="Moonraker - Klipper API Server")
    parser.add_argument("-c",
                        "--configfile",
                        default="~/moonraker.conf",
                        metavar='<configfile>',
                        help="Location of moonraker configuration file")
    parser.add_argument("-l",
                        "--logfile",
                        default="/tmp/moonraker.log",
                        metavar='<logfile>',
                        help="log file name and location")
    parser.add_argument("-n",
                        "--nologfile",
                        action='store_true',
                        help="disable logging to a file")
    system_args = parser.parse_args()

    # Setup Logging
    version = utils.get_software_version()
    if system_args.nologfile:
        log_file = ""
    else:
        log_file = os.path.normpath(os.path.expanduser(system_args.logfile))
    system_args.logfile = log_file
    system_args.software_version = version
    ql, file_logger = utils.setup_logging(log_file, version)

    if sys.version_info < (3, 7):
        msg = f"Moonraker requires Python 3.7 or above.  " \
            f"Detected Version: {sys.version}"
        logging.info(msg)
        print(msg)
        ql.stop()
        exit(1)

    # Start IOLoop and Server
    io_loop = IOLoop.current()
    estatus = 0
    while True:
        try:
            server = Server(system_args, file_logger)
        except Exception:
            logging.exception("Moonraker Error")
            estatus = 1
            break
        try:
            server.start()
            io_loop.start()
        except Exception:
            logging.exception("Server Running Error")
            estatus = 1
            break
        # Since we are running outside of the the server
        # it is ok to use a blocking sleep here
        time.sleep(.5)
        logging.info("Attempting Server Restart...")
    io_loop.close(True)
    logging.info("Server Shutdown")
    ql.stop()
    exit(estatus)
Esempio n. 48
0
import RPi.GPIO as GPIO
import time

import sensor_repo as sr
import utils

CONTEXT = "vlotter"
PIN = 36

utils.setup_logging(CONTEXT)
repo = sr.sensor_repo()

GPIO.setmode(GPIO.BOARD)
GPIO.setup(PIN, GPIO.IN)

try:
    repo.set_value(CONTEXT, "0")
    high_level_cnt = 0
    while True:
        if GPIO.input(PIN) == 0:
            high_level_cnt += 1
        else:
            time.sleep(5)
            utils.retry_if_none(lambda: repo.set_value(CONTEXT, "0"))

            high_level_cnt = 0

        if high_level_cnt > 100:
            utils.retry_if_none(lambda: repo.set_value(CONTEXT, "1"))

        time.sleep(0.1)
Esempio n. 49
0
import sys, os
import xml.dom.minidom
import utils
import opc
import splunk.entity as en

logger = utils.setup_logging("opcmeasure")

SCHEME = """<scheme>
    <title>OPC DA Collector</title>
    <description>Setup opc measure.</description>
    <use_external_validation>true</use_external_validation>
    <streaming_mode>xml</streaming_mode>

    <endpoint>
        <args>
            <arg name="name">
                <title>OPC DA Collector</title>
                <description>OPC measure name
                </description>
            </arg>

            <arg name="server">
                <title>Opc Server</title>
                <description>Opc Server alias that is configured in opcservers.conf.</description>
            </arg>
            
            <arg name="measures">
                <title>Measure Items</title>
                <description>Separated with semicolon ; if multiple.</description>
            </arg>
Esempio n. 50
0
def main():
    global args
    args = parser.parse_args()
    if args.save is '':
        args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    setup_logging(os.path.join(save_path, 'log.txt'))
    checkpoint_file = os.path.join(save_path, 'checkpoint_epoch_%s.pth.tar')

    logging.debug("run arguments: %s", args)
    logging.info("using pretrained cnn %s", args.cnn)
    cnn = resnet.__dict__[args.cnn](pretrained=True)

    vocab = build_vocab()
    model = CaptionModel(cnn,
                         vocab,
                         embedding_size=args.embedding_size,
                         rnn_size=args.rnn_size,
                         num_layers=args.num_layers,
                         share_embedding_weights=args.share_weights)

    train_data = get_iterator(get_coco_data(vocab, train=True),
                              batch_size=args.batch_size,
                              max_length=args.max_length,
                              shuffle=True,
                              num_workers=args.workers)
    val_data = get_iterator(get_coco_data(vocab, train=False),
                            batch_size=args.eval_batch_size,
                            max_length=args.max_length,
                            shuffle=False,
                            num_workers=args.workers)

    if 'cuda' in args.type:
        cudnn.benchmark = True
        model.cuda()

    optimizer = select_optimizer(args.optimizer,
                                 params=model.parameters(),
                                 lr=args.lr)
    regime = lambda e: {
        'lr': args.lr * (args.lr_decay**e),
        'momentum': args.momentum,
        'weight_decay': args.weight_decay
    }
    model.finetune_cnn(False)

    def forward(model, data, training=True, optimizer=None):
        use_cuda = 'cuda' in args.type
        loss = nn.CrossEntropyLoss()
        perplexity = AverageMeter()
        batch_time = AverageMeter()
        data_time = AverageMeter()

        if training:
            model.train()
        else:
            model.eval()

        end = time.time()
        for i, (imgs, (captions, lengths)) in enumerate(data):
            data_time.update(time.time() - end)
            if use_cuda:
                imgs = imgs.cuda()
                captions = captions.cuda(async=True)
            imgs = Variable(imgs, volatile=not training)
            captions = Variable(captions, volatile=not training)
            input_captions = captions[:-1]
            target_captions = pack_padded_sequence(captions, lengths)[0]

            pred, _ = model(imgs, input_captions, lengths)
            err = loss(pred, target_captions)
            perplexity.update(math.exp(err.data[0]))

            if training:
                optimizer.zero_grad()
                err.backward()
                clip_grad_norm(model.rnn.parameters(), args.grad_clip)
                optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % args.print_freq == 0:
                logging.info(
                    '{phase} - Epoch: [{0}][{1}/{2}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format(
                        epoch,
                        i,
                        len(data),
                        phase='TRAINING' if training else 'EVALUATING',
                        batch_time=batch_time,
                        data_time=data_time,
                        perp=perplexity))

        return perplexity.avg

    for epoch in range(args.start_epoch, args.epochs):
        if epoch >= args.finetune_epoch:
            model.finetune_cnn(True)
        optimizer = adjust_optimizer(optimizer, epoch, regime)
        # Train
        train_perp = forward(model,
                             train_data,
                             training=True,
                             optimizer=optimizer)
        # Evaluate
        val_perp = forward(model, val_data, training=False)

        logging.info('\n Epoch: {0}\t'
                     'Training Perplexity {train_perp:.4f} \t'
                     'Validation Perplexity {val_perp:.4f} \n'.format(
                         epoch + 1, train_perp=train_perp, val_perp=val_perp))
        model.save_checkpoint(checkpoint_file % (epoch + 1))
import logging
import argparse

from utils import setup_logging
from telegram.client import Telegram
"""
Prints short description of a webpage (using Telegram's instant view)

Usage:
    python examples/get_instant_view.py api_id api_hash phone https://hackernoon.com/im-harvesting-credit-card-numbers-and-passwords-from-your-site-here-s-how-9a8cb347c5b5
"""

if __name__ == '__main__':
    setup_logging(level=logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('api_id',
                        help='API id')  # https://my.telegram.org/apps
    parser.add_argument('api_hash', help='API hash')
    parser.add_argument('phone', help='Phone')
    parser.add_argument('url', help='Webpage URL')
    args = parser.parse_args()

    tg = Telegram(
        api_id=args.api_id,
        api_hash=args.api_hash,
        phone=args.phone,
        database_encryption_key='changeme1234',
    )
    # you must call login method before others
    tg.login()
Esempio n. 52
0
import argparse

from telegram_api.client import Telegram
import utils

if __name__ == '__main__':
    utils.setup_logging()

    parser = argparse.ArgumentParser()
    utils.add_api_args(parser)
    utils.add_proxy_args(parser)
    args = parser.parse_args()

    tg = Telegram(api_id=args.api_id,
                  api_hash=args.api_hash,
                  phone=args.phone,
                  database_encryption_key='changeme1234',
                  proxy_server=args.proxy_server,
                  proxy_port=args.proxy_port,
                  proxy_type=utils.parse_proxy_type(args))
    # you must call login method before others
    tg.login()

    result = tg.get_me()
    result.wait()
    print(result.update)
    formatter_class=RawTextHelpFormatter,
    description='''Script for adding data into data-aquistion service.
Input data must be in json format. Example data can be created using create_simple_data.py. Data format (json) is list of dictionaries, each dict contains basic information on dataset:
    [
      {
        "category": DATA_CATEGORY,
        "orgUUID": ORGANISATION NAME (NOT UUID, uuid will be found from name),
        "publicRequest": false/true,
        "source": URL OF FILE TO ADD,
        "title": FILE NAME
      },
      {
          ...
      },
      ...
    ]

''')
parser.add_argument(
    'token',
    help="OAUTH token. For delete and insert it must have admin privileges")
parser.add_argument('file', help="Input file in json format")
parser.add_argument('--debug', action="store_true", help="Debug logging")

args = parser.parse_args()

from cf_config import URL, CF_URL

setup_logging(debug=args.debug)
parse_and_send_data(args.token, args.file)
Esempio n. 54
0
                    choices=[0, 1, 2, 3, 4])
parser.add_argument("-inspect", default=False, action="store_true")
parser.add_argument("-hidden", type=int, default=128)
parser.add_argument("-c", default=0.1, type=float)
parser.add_argument("-xi", default=0.1, type=float)
parser.add_argument("-lr", default=0.0001, type=float)
parser.add_argument("-nepochs", default=20, type=int)
args = parser.parse_args()

config = {
    'overwrite_name':
    'si-h%d-lr%g-c%g-xi%g-dataset%d' %
    (args.hidden, args.lr, args.c, args.xi, args.dataset),
}

utils.setup_logging(args.seed, config['overwrite_name'])
print("Seed: %d" % args.seed)
session_config = utils.set_seed(args.seed, args.dataset)
n_permute_tasks, it, layer_sizes = utils.setup_dataset(args.dataset,
                                                       args.inspect)

config = {
    **config,
    'c': args.c,
    'xi': args.xi,
    'lr': args.lr,
}

if args.hidden != None:
    layer_sizes = layer_sizes[:1] + [
        args.hidden for ln in range(len(layer_sizes) - 2)
Esempio n. 55
0
import splunk.entity as en
import jdbc, utils

logger = utils.setup_logging("rpcinits")


def load_db(config):
    ents = en.getEntities(["admin", "conf-inputs"],
                          namespace="splunk-demo-opcda",
                          owner="nobody",
                          sessionKey=config["session_key"],
                          hostPath=config["server_uri"])
    # logger.debug("%s" % ents)
    for dbn, dbv in [(n, v) for n, v in ents.items()
                     if n.startswith("database://")]:
        name = dbn.replace("database://", "")
        logger.debug("name=%s" % name)
        logger.debug("values=%s" % dbv)
        jdbc.updateDatabase(name, dbv["dburl"], dbv["jdbcdriver"], dbv["user"],
                            dbv["password"], dbv["parameters"])
import sys
sys.path.append("..")

import logging
from dag import Experiment, Recipe
import dill
import os
import utils

logger = logging.getLogger("main")
utils.setup_logging(debug=True)

directory = "../output/08-06-19_seed4"
experiment = Experiment(directory=directory)

# this materializes immediately
x = experiment.spawn_new_tree(
    dataset_name="mnist",
    model_name="models.LeNet",
    init_schema="",
    optimizer_name="sgd",
    seed=4,
)

x = Recipe(train={"n_epochs": 30})(x)

for _ in range(20):
    # finetune
    pruned = Recipe(
        prune_schema="../schemas/pruning_schema_lenet_structuredl1.py", )(x)
    x = Recipe(
Esempio n. 57
0
from threading import Thread

import cli_args as cli
from cli_args import LOG_LEVEL, GRPC_HOST
from cli_args import setup_cli_args
from flask import Flask
from location_client import LocationClient
from utils import setup_logging
from utils import sleep

if __name__ == "__main__":
    # Parse CLI args
    args = setup_cli_args(cli.grpc_host, cli.verbose)

    # Setup logging
    setup_logging(level=args[LOG_LEVEL])

    http = Flask(__name__)

    @http.route("/count")
    def val_count():
        global count
        return "Read {0} values".format(count)

    def read_values():
        global count
        while True:
            print("Got location: {0}".format(client.get_xy()))
            count += 1

    # Start client
def test_setup_logging(tmp_path):
    config = Namespace(verbose=True, output_dir=tmp_path)
    logger = setup_logging(config)
    assert logger.level == logging.INFO
    assert isinstance(logger, logging.Logger)
    assert next(tmp_path.rglob("*.log")).is_file()
Esempio n. 59
0
def main(args):
    # Setup logging
    logger = setup_logging(args)

    # Read params of model
    params = fetch_model_params(args.model)

    # Fetch appropriate input functions
    input_fn = params.get("input_fn", "sequential_input")
    if input_fn == "sequential_input":
        input_fn = sequential_input
    elif input_fn == "generic_text":
        input_fn = generic_text
    pred_input_fn = pred_input
    handle_pred_output_fn = handle_pred_output

    # get current step
    current_step = int(estimator_lib._load_global_step_from_checkpoint_dir(params["model_path"]))
    logger.info(f"Current step {current_step}")

    if params["mlm_training"]:
        mlm_sample_text_fn = partial(mlm_sample_text, params)
        input_fn = partial(generic_text, sample_text_fn=mlm_sample_text_fn)
        if args.check_dataset:
            check_dataset(input_fn, params)


    # Fetch encoder per params
    encoder = fetch_encoder(params)

    pred_input_fn = partial(pred_input_fn, path_to_prompt=args.prompt, logger=logger, enc=encoder)

    # Sample from Dataset if check dataset flag is on
    if args.check_dataset:
        check_dataset(input_fn, params, global_step=current_step)

    # Confirm deletion of checkpoint files if --new flag is set
    if args.new:
        if yes_or_no(f"Are you sure you want to remove '{params['model_path']}' to start afresh?"):
            remove_gs_or_filepath(params["model_path"])
        else:
            exit()

    # Save config to logdir for experiment management
    save_config(params, params["model_path"])

    # Add to params: auto_layout, auto_layout_and_mesh_shape, use_tpu, num_cores
    mesh_shape = mtf.convert_to_shape(params["mesh_shape"])
    params["num_cores"] = mesh_shape.size
    params["auto_layout"] = args.auto_layout
    params["auto_layout_and_mesh_shape"] = args.auto_layout_and_mesh_shape
    params["use_tpu"] = True if not args.tpu is None else False
    params["gpu_ids"] = args.gpu_ids
    params["steps_per_checkpoint"] = args.steps_per_checkpoint
    # Expand attention types param
    params["attention_types"] = expand_attention_types_params(params["attention_types"])
    assert len(params["attention_types"]) == params["n_layer"]  # Assert that the length of expanded list = num layers
    params["predict_batch_size"] = params.get("predict_batch_size", 1)  # Default to 1
    params["predict"] = args.predict
    params['model'] = params.get("model", "GPT") # Default model selection to GPT since it's the only option for now
    params["export"] = args.export
    # Set sampling parameters
    params["sampling_use_entmax"] = args.entmax_sampling

    # Sample quality of MoE models suffers when using the faster sampling method, so default to slow_sampling if
    # moe layers are present
    params["slow_sampling"] = True if params["moe_layers"] is not None else False

    logger.info(f"params = {params}")

    # Get eval tasks from params
    eval_tasks = params.get("eval_tasks", [])
    has_predict_or_eval_steps_or_eval_tasks = params["predict_steps"] > 0 or params["eval_steps"] > 0 or len(
        eval_tasks) > 0

    for t in eval_tasks:
        assert t in task_descriptors, f"Eval task '{t}' is not known"
        task_descriptors[t]["init_fn"](params)

    # Set up TPUs and Estimator
    if args.tpu == "colab":
        tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver() if params["use_tpu"] else None
    else:
        tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(args.tpu) if params["use_tpu"] else None

    config = tpu_config.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=params["model_path"],
        save_checkpoints_steps=None,  # Disable the default saver
        save_checkpoints_secs=None,  # Disable the default saver
        log_step_count_steps=params["iterations"],
        save_summary_steps=params["iterations"],
        tpu_config=tpu_config.TPUConfig(
            num_shards=mesh_shape.size,
            iterations_per_loop=params["iterations"],
            num_cores_per_replica=1,
            per_host_input_for_training=tpu_config.InputPipelineConfig.BROADCAST))

    estimator = tpu_estimator.TPUEstimator(
        use_tpu=params["use_tpu"],
        model_fn=model_fn,
        config=config,
        train_batch_size=params["train_batch_size"],
        eval_batch_size=params["train_batch_size"],
        predict_batch_size=params["predict_batch_size"],
        params=params)

    def _make_task_estimator(task):
        task_params = params.copy()
        task_params["eval_task"] = task
        return tpu_estimator.TPUEstimator(
            use_tpu=params["use_tpu"],
            model_fn=model_fn,
            config=config,
            train_batch_size=params["train_batch_size"],
            eval_batch_size=params["eval_batch_size"],
            predict_batch_size=params["predict_batch_size"],
            params=task_params)

    eval_task_estimators = {
        task: _make_task_estimator(task)
        for task in eval_tasks
    }

    if args.export:
        export_model(estimator, "export", params)
        return

    if args.predict:
        # Predict
        predictions = estimator.predict(input_fn=pred_input_fn)
        logger.info("Predictions generated")
        enc = fetch_encoder(params)
        handle_pred_output_fn(predictions, logger, enc, params, out_name=f"predictions_{args.sacred_id}_{current_step}")
        return

    def save_eval_results(task, eval_results):
        def as_python(x):
            if isinstance(x, numpy.generic):
                return x.item()
            return x
        eval_results = {k: as_python(v) for k, v in eval_results.items()}
        with open(f'eval_{args.sacred_id}.jsonl', 'a') as fh:
            json.dump({'task': task, 'current_step': current_step, **eval_results}, fh)
            fh.write('\n')

    def run_eval():
        logger.info("Running evaluation...")
        eval_results = estimator.evaluate(
                input_fn=partial(input_fn, eval=True),
                steps=params["eval_steps"])
        logger.info(f"Eval results: {eval_results}")
        save_eval_results('validation', eval_results)

    def run_eval_tasks():
        for task in eval_tasks:
            logger.info(f"Starting evaluation task '{task}'")
            task_info = task_descriptors[task]["get_task_info_fn"](params)
            task_estimator = eval_task_estimators[task]
            task_input_fn = task_descriptors[task]["input_fn"]
            eval_results = task_estimator.evaluate(
                input_fn=task_input_fn,
                steps=task_info["n_steps"],
                name=task)
            logger.info(f"Eval task '{task}' results: {eval_results}")
            save_eval_results(task, eval_results)
    
    if args.eval:
        run_eval_tasks()
        if params["eval_steps"] > 0:
            run_eval()
        return


    elif has_predict_or_eval_steps_or_eval_tasks:
        # Eval and train - stop and predict and/or eval every checkpoint
        while current_step < params["train_steps"]:
            next_checkpoint = min(current_step + args.steps_per_checkpoint,
                                  params["train_steps"])

            estimator.train(input_fn=partial(input_fn, global_step=current_step, eval=False), max_steps=next_checkpoint)
            current_step = next_checkpoint

            if params["predict_steps"] > 0:
                logger.info("Running prediction...")
                predictions = estimator.predict(input_fn=pred_input_fn)
                enc = fetch_encoder(params)
                handle_pred_output_fn(predictions, logger, enc, params, out_name=f"predictions_{args.sacred_id}_{current_step}")

            if params["eval_steps"] > 0:
                run_eval()

            if eval_tasks:
                run_eval_tasks()
                
        return
    else:
        # Else, just train, don't stop and restart
        estimator.train(input_fn=partial(input_fn, global_step=current_step, eval=False), max_steps=params["train_steps"])
Esempio n. 60
0
        super(IPKernelApp, self).initialize(
            argv)  # Skipping IPKernelApp.initialize on purpose

        self.init_connection_file()
        self.init_poller()
        self.init_sockets()
        self.init_heartbeat()
        self.init_signal()

    def initialize_kernel(self):
        self.kernel = ForwardingKernel(parent=self)


if __name__ == '__main__':
    RABBIT_MQ_ADDRESS = (os.environ['MQ_HOST'], int(os.environ['MQ_PORT']))
    RABBIT_MQ_CREDENTIALS = (os.environ['MQ_USER'], os.environ['MQ_PASS'])
    MISSED_HEARTBEAT_LIMIT = int(os.environ['MISSED_HEARTBEAT_LIMIT'])
    HEARTBEAT_INTERVAL = float(os.environ['HEARTBEAT_INTERVAL'])
    KERNEL_NAME = os.environ['KERNEL_NAME']

    app = ForwardingKernelApp.instance()
    app.initialize()

    setup_logging(
        os.path.join(
            os.getcwd(), 'forwarding_kernel_logs', 'kernel_{}.log'.format(
                ForwardingKernel.get_kernel_id(app.connection_file))))

    app.initialize_kernel()
    app.start()