Esempio n. 1
0
    def initialize(self):
        if not self.name:
            self.name = "oc" + random_time_str()

        self.status = ManagerStatus(self.name)
        self.status.mode = self.mode

        if self.mode == 'standalone' and not self.options.workertype:
            logger.error(
                "when --mode is standalone, --workertype must be specified")
            sys.exit(2)

        if self.mode == 'factory' and not self.options.warehouse:
            logger.error(
                "when --mode is factory, --warehouse must be specified")
            sys.exit(2)

        if self.mode == 'local':
            self.scheduler = LocalScheduler(self)
            self.isLocal = True

        elif self.mode == 'process':
            self.scheduler = MultiProcessScheduler(self, self.options.parallel)
            self.isLocal = False

        elif self.mode == 'standalone':
            self.scheduler = StandaloneScheduler(self, self.options.workertype)
            self.isLocal = False

        elif self.mode == 'factory':
            self.scheduler = FactoryScheduler(self, Conf.getWareHouseAddr(),
                                              self.options.warehouse)
            self.isLocal = False

        elif self.mode == 'mesos':
            master = Conf.getMesosMaster()
            self.scheduler = MesosScheduler(self, master, self.options)
            self.isLocal = False

        else:
            logger.error(
                "error mode, --mode should be one of [local, process, standalone, factory, mesos]"
            )
            sys.exit(1)

        if self.options.parallel:
            self.defaultParallelism = self.options.parallel
        else:
            self.defaultParallelism = self.scheduler.defaultParallelism()

        self.initialized = True
Esempio n. 2
0
def start_factory_mesos():
    global pyroLoopCondition
    parser = OptionParser(
        usage="Usage: python factorymesos.py [options] <command>")
    parser.allow_interspersed_args = False
    parser.add_option("-s",
                      "--master",
                      type="string",
                      default="",
                      help="url of master (mesos://172.31.252.180:5050)")
    parser.add_option("-f",
                      "--factory",
                      type="string",
                      default="",
                      help="host:port of master (172.31.252.180:6666)")
    parser.add_option(
        "-w",
        "--warehouse_addr",
        type="string",
        default="",
        help=
        "kafka-172.31.252.182:9092|mysql-172.31.254.25:3306,db,username,password"
    )
    parser.add_option("-p",
                      "--task_per_node",
                      type="int",
                      default=0,
                      help="max number of tasks on one node (default: 0)")
    parser.add_option("-I",
                      "--image",
                      type="string",
                      help="image name for Docker")
    parser.add_option("-V",
                      "--volumes",
                      type="string",
                      help="volumes to mount into Docker")
    parser.add_option("-r",
                      "--retry",
                      type="int",
                      default=0,
                      help="retry times when failed (default: 0)")
    parser.add_option(
        "-e",
        "--config",
        type="string",
        default="/work/opencluster/config.ini",
        help=
        "absolute path of configuration file(default:/work/opencluster/config.ini)"
    )

    parser.add_option("-g",
                      "--group",
                      type="string",
                      default='',
                      help="which group to run (default: ''")
    parser.add_option(
        "-q",
        "--quiet",
        action="store_true",
        help="be quiet",
    )
    parser.add_option(
        "-v",
        "--verbose",
        action="store_true",
        help="show more useful log",
    )

    (options, command) = parser.parse_args()

    if not options:
        parser.print_help()
        sys.exit(2)

    if options.config:
        Conf.setConfigFile(options.config)

    options.master = options.master or Conf.getMesosMaster()
    options.warehouse_addr = options.warehouse_addr or Conf.getWareHouseAddr()

    servers = options.factory or Conf.getFactoryServers()
    servs = servers.split(",")
    server = servs[0].split(":")

    options.logLevel = (options.quiet and logging.ERROR
                        or options.verbose and logging.DEBUG or logging.INFO)
    setLogger(Conf.getFactoryServiceName(), "MESOS", options.logLevel)

    implicitAcknowledgements = 1
    if os.getenv("MESOS_EXPLICIT_ACKNOWLEDGEMENTS"):
        implicitAcknowledgements = 0
    sched = FactoryMesos(options, command, implicitAcknowledgements)

    driver = MesosSchedulerDriver(sched, sched.framework, options.master,
                                  implicitAcknowledgements)
    driver.start()
    logger.debug("Mesos Scheudler driver started")

    warehouse_addrs = options.warehouse_addr.split(",")

    def fetchTasksFromMySQL():
        global pyroLoopCondition
        mysqlIpAndPort = warehouse_addrs[0].split(":")
        last_data_time = time.time()

        while pyroLoopCondition:
            db = MySQLdb.connect(host=mysqlIpAndPort[0],
                                 port=int(mysqlIpAndPort[1]),
                                 db=warehouse_addrs[1],
                                 user=warehouse_addrs[2],
                                 passwd=warehouse_addrs[3])
            try:
                cur = db.cursor()
                curUpt = db.cursor()
                dataResults = cur.execute(
                    "select task_id,task_desc,task_start_time,status from t_task where status=0 order by priority asc limit 200"
                )
                results = cur.fetchmany(dataResults)
                for r in results:
                    sched.append_task(cPickle.loads(r[1]))
                    curUpt.execute(
                        "update t_task set task_start_time=now(),status=1 where task_id='"
                        + r[0] + "'")
                if len(results) > 0:
                    db.commit()
                    last_data_time = time.time()
                    driver.reviveOffers()

                if sched.tasks_total_len() > MAX_WAITING_TASK:
                    time.sleep(2)
                if time.time() - last_data_time > MAX_EMPTY_TASK_PERIOD:
                    time.sleep(10)

                if cur:
                    cur.close()
                if curUpt:
                    curUpt.close()
            finally:
                db.close()

    def fetchTasksFromKafka(priority):
        global pyroLoopCondition

        consumer = KafkaConsumer('OpenCluster%s' % priority,
                                 bootstrap_servers=[options.warehouse_addr],
                                 group_id="cnlab",
                                 auto_commit_enable=True,
                                 auto_commit_interval_ms=30 * 1000,
                                 auto_offset_reset='smallest')

        last_data_time = time.time()
        while pyroLoopCondition:
            for message in consumer.fetch_messages():
                logger.error("%s:%s:%s: key=%s " %
                             (message.topic, message.partition, message.offset,
                              message.key))
                sched.append_task(cPickle.loads(message.value))
                consumer.task_done(message)
                last_data_time = time.time()
            if sched.tasks_len(priority) > MAX_WAITING_TASK:
                time.sleep(2)
            if time.time() - last_data_time > MAX_EMPTY_TASK_PERIOD:
                time.sleep(10)

    if len(warehouse_addrs) > 2:
        spawn(fetchTasksFromMySQL)
    else:
        for i in range(1, sched.priority_size + 1):
            spawn(fetchTasksFromKafka, i)

    def handler(signm, frame):
        logger.warning("got signal %d, exit now", signm)
        sched.stop(3)

    signal.signal(signal.SIGTERM, handler)
    signal.signal(signal.SIGABRT, handler)

    try:
        while not sched.stopped:
            time.sleep(0.5)
            sched.check(driver)

            now = time.time()
            if now > sched.last_offer_time + 60 + random.randint(0, 5):
                logger.warning("too long to get offer, reviving...")
                sched.last_offer_time = now
                driver.reviveOffers()

    except KeyboardInterrupt:
        logger.warning(
            'stopped by KeyboardInterrupt. The Program is exiting gracefully! Please wait...'
        )
        sched.stop(4)

    #terminate pyrothread
    pyroLoopCondition = False

    time.sleep(5)
    driver.stop(False)
    sys.exit(sched.status)