예제 #1
0
 def __init__(self, package_name):
     logging.getLogger('dtlpy').setLevel(logging.WARN)
     self.package_name = package_name
     self.path_to_metrics = 'metrics.json'
     self.path_to_tensorboard_dir = 'runs'
     self.path_to_logs = 'logger.conf'
     self.logger = init_logging(__name__, filename=self.path_to_logs)
     self.logger.info(self.package_name + ' initialized')
예제 #2
0
 def __init__(self, package_name, service_name):
     self.package_name = package_name
     self.service_name = service_name
     self.path_to_best_checkpoint = 'checkpoint.pt'
     self.path_to_metrics = 'metrics.json'
     self.path_to_tensorboard_dir = 'runs'
     self.path_to_logs = 'logger.conf'
     self.logger = init_logging(__name__, filename=self.path_to_logs)
     self.logger.info(self.package_name + ' initialized')
예제 #3
0
def create_app():
    """Create VmaaS application and servers"""

    vmaas_app = Application()

    server = tornado.httpserver.HTTPServer(vmaas_app)
    server.bind(PUBLIC_API_PORT)
    num_servers = int(os.getenv("MAX_VMAAS_SERVERS", MAX_SERVERS))
    server.start(num_servers)  # start forking here
    init_logging(num_servers)
    LOGGER.info("Starting (version %s).", VMAAS_VERSION)
    LOGGER.info('Hotcache enabled: %s', os.getenv("HOTCACHE_ENABLED", "YES"))

    # The rest stuff must be done only after forking
    BaseHandler.db_cache = Cache()
    load_cache_to_apis()

    vmaas_app.websocket_reconnect()
    vmaas_app.reconnect_callback = PeriodicCallback(
        vmaas_app.websocket_reconnect, WEBSOCKET_RECONNECT_INTERVAL * 1000)
    vmaas_app.reconnect_callback.start()
예제 #4
0
def main():
    """ The main function. It creates cryptochat application, run everything."""

    async def shutdown():
        server.stop()
        await tornado.gen.sleep(_SHUTDOWN_TIMEOUT)
        tornado.ioloop.IOLoop.current().stop()
        LOGGER.info("Server was successfully shut down.")

    def exit_handler(sig, frame):  # pylint: disable=unused-argument
        def get_sig_name(sig):
            return dict((k, v) for v, k in reversed(sorted(signal.__dict__.items()))
                        if v.startswith('SIG') and not v.startswith('SIG_')).pop(sig)

        LOGGER.warning("Registered %s, shutting down.", get_sig_name(sig))
        tornado.ioloop.IOLoop.instance().add_callback_from_signal(shutdown)

    signal.signal(signal.SIGTERM, exit_handler)
    signal.signal(signal.SIGINT, exit_handler)

    init_logging()
    cryptochat_db = DB(DATABASE_LOCATION)

    cryptochat_app = Application()
    server = tornado.httpserver.HTTPServer(cryptochat_app)
    server.bind(PUBLIC_API_PORT)
    server.start()
    LOGGER.info("Starting cryptochat (version %s).", SERVER_VERSION)

    BaseHandler.messages_new_api = MessagesNewAPI(cryptochat_db)
    BaseHandler.messages_updates_api = MessagesUpdatesAPI(cryptochat_db)
    BaseHandler.users_api = UsersAPI(cryptochat_db)
    BaseHandler.chats_api = ChatsAPI(cryptochat_db)
    BaseHandler.chats_user_api = ChatsUserAPI(cryptochat_db)
    BaseHandler.contacts_new_api = ContactsAPI(cryptochat_db)

    tornado.ioloop.IOLoop.current().start()
예제 #5
0
import psutil
import glob
import shutil
import time
import torch.optim as optim
from tqdm import tqdm
from . import csv_eval
from dataloader import *

from networks import get_model
from torch.utils.data import DataLoader

from logging_utils import logginger, init_logging

logger = logginger(__name__)
mem_log = init_logging('Memory', 'mem_log.log')

print('CUDA available: {}'.format(torch.cuda.is_available()))


class ModelTrainer:
    def __init__(self, device_index=0):
        self.device = torch.device(
            type='cuda',
            index=device_index) if torch.cuda.is_available() else torch.device(
                type='cpu')

    def load(self,
             data_path,
             save_trial_id,
             resume_trial_id=None,
예제 #6
0
파일: zazu.py 프로젝트: karangurtu/ZazuML
        zazu_service = dl.services.get('zazu')
        # get project id for billing bla bla bla
        dataset_obj = get_dataset_obj(configs['dataloop'])
        id = dataset_obj.project.id

        if args.search:
            zazu_service.execute(function_name='search',
                                 execution_input=inputs,
                                 project_id=id)
        if args.predict:
            zazu_service.execute(function_name='predict',
                                 execution_input=inputs,
                                 project_id=id)

    else:
        logger = init_logging(__name__)
        this_path = path = os.getcwd()
        configs_path = os.path.join(this_path, 'configs.json')
        configs = ConfigSpec(configs_path)
        opt_model = OptModel()
        opt_model.add_child_spec(configs, 'configs')
        zazu = ZaZu(opt_model, remote=args.remote)
        if args.search:
            zazu.find_best_model()
            zazu.hp_search()
        if args.train:
            zazu.train_new_model()
        if args.predict:
            zazu.run_inference()
        if args.predict_once:
            zazu.one_time_inference('/home/noam/0120122798.jpg',
예제 #7
0
def main():
    init_logging()
    LOGGER.info("Starting (version %s).", CLIENT_VERSION)
    status = app.run()
    sys.exit(status)
예제 #8
0
    def __init__(self, configs, time, test_dataset_id, query):
        logger.info('dtlpy version: ' + str(dl.__version__))
        logger.info('dtlpy info: ' + str(dl.info()))
        time = int(time)
        dl.setenv('prod')
        configs = json.loads(configs)
        query = json.loads(query)
        self.configs_input = dl.FunctionIO(type='Json', name='configs', value=configs)
        self.service = dl.services.get('zazu')
        project_name = configs['dataloop']['project']
        self.project = dl.projects.get(project_name)
        test_dataset = self.project.datasets.get(dataset_id=test_dataset_id)
        maybe_download_pred_data(dataset_obj=test_dataset, val_query=query)

        # add gt annotations
        filters = dl.Filters()
        filters.custom_filter = query
        dataset_name = test_dataset.name
        path_to_dataset = os.path.join(os.getcwd(), dataset_name)
        # only download if doesnt exist
        if not os.path.exists(path_to_dataset):
            download_and_organize(path_to_dataset=path_to_dataset, dataset_obj=test_dataset, filters=filters)

        json_file_path = os.path.join(path_to_dataset, 'json')
        self.model_obj = self.project.models.get(model_name='retinanet')
        self.adapter = self.model_obj.build(local_path=os.getcwd())
        logger.info('model built')
        while 1:

            self.compute = precision_recall_compute()
            self.compute.add_dataloop_local_annotations(json_file_path)
            logger.info("running new execution")
            execution_obj = self.service.execute(function_name='search', execution_input=[self.configs_input],
                                                 project_id='72bb623f-517f-472b-ad69-104fed8ee94a')
            while execution_obj.latest_status['status'] != 'success':
                sleep(5)
                execution_obj = dl.executions.get(execution_id=execution_obj.id)
                if execution_obj.latest_status['status'] == 'failed':
                    raise Exception("plugin execution failed")
            logger.info("execution object status is successful")
            self.project.artifacts.download(package_name='zazuml',
                                            execution_id=execution_obj.id,
                                            local_path=os.getcwd())
            logs_file_name = 'timer_logs_' + str(execution_obj.id) + '.conf'
            graph_file_name = 'precision_recall_' + str(execution_obj.id) + '.png'
            self.cycle_logger = init_logging(__name__, filename=logs_file_name)
            logger.info('artifact download finished')
            logger.info(str(os.listdir('.')))

            # load new checkpoint and change to unique name
            new_checkpoint_name = 'checkpoint_' + str(execution_obj.id) + '.pt'
            logger.info(str(os.listdir('.')))

            os.rename('checkpoint0.pt', new_checkpoint_name)
            new_model_name = new_checkpoint_name[:-3]
            logger.info(str(os.listdir('.')))
            new_checkpoint = torch.load(new_checkpoint_name, map_location=torch.device('cpu'))
            # self.model_obj = self.project.models.get(model_name=new_checkpoint['model_specs']['name'])
            # self.adapter = self.model_obj.build(local_path=os.getcwd())
            # logger.info('model built')
            self.new_home_path = new_checkpoint['model_specs']['data']['home_path']

            self._compute_predictions(checkpoint_path=new_checkpoint_name,
                                      model_name=new_model_name)

            if len(self.compute.by_model_name.keys()) < 2:
                # if the model cant predict anything then just skip it
                logger.info('''model couldn't make any predictions, trying to train again''')
                continue

            # if previous best checkpoint doesnt exist there must not be a service, launch prediction service with new
            # new_checkpoint and create trigger

            if 'check0' not in [checkp.name for checkp in self.model_obj.checkpoints.list()]:
                logger.info('there is no check0, will add upload new checkpoint as check0 and '
                            'deploy prediction service')
                new_checkpoint_obj = self.model_obj.checkpoints.upload(checkpoint_name='check0',
                                                                       local_path=new_checkpoint_name)
                logger.info('uploaded this checkpoint as the new check0 : ' + new_checkpoint_name[:-3])

                self._maybe_launch_predict(new_checkpoint_obj)
                continue
            logger.info('i guess check0 does exist')
            best_checkpoint = self.model_obj.checkpoints.get('check0')
            check0_path = best_checkpoint.download(local_path=os.getcwd())
            logger.info('downloading best checkpoint')
            logger.info(str(os.listdir('.')))
            logger.info('check0 path is: ' + str(check0_path))
            self._compute_predictions(checkpoint_path=check0_path, model_name=best_checkpoint.name)

            # compute metrics
            new_checkpoint_mAP = self.compute.get_metric(model_name=new_model_name, precision_to_recall_ratio=1.)
            best_checkpoint_mAP = self.compute.get_metric(model_name=best_checkpoint.name, precision_to_recall_ratio=1.)
            logger.info('best checkpoint: ' + str(best_checkpoint_mAP))
            logger.info('new checkpoint: ' + str(new_checkpoint_mAP))

            # if new checkpoint performs better switch out prediction
            if new_checkpoint_mAP > best_checkpoint_mAP:
                logger.info('new checkpoint is better')
                logger.info('uploading old best checkpoint under new name')
                self.model_obj.checkpoints.upload(checkpoint_name='checkpoint_' + check0_path.split('_')[-1][:-3],
                                                  local_path=check0_path)
                logger.info('deleting old best checkpoint')
                best_checkpoint.delete()
                logger.info('uploading new best checkpoint as check0')
                new_best_checkpoint_obj = self.model_obj.checkpoints.upload(checkpoint_name='check0',
                                                                            local_path=new_checkpoint_name)
                if 'predict' not in [s.name for s in dl.services.list()]:
                    self._maybe_launch_predict(new_best_checkpoint_obj)
                else:
                    self._update_predict_service(new_best_checkpoint_obj)
                logger.info('switched with new checkpoint')

            self.compute.save_plot_metrics(save_path=graph_file_name)

            self.project.artifacts.upload(filepath=logs_file_name,
                                          package_name='zazuml',
                                          execution_id=execution_obj.id)
            self.project.artifacts.upload(filepath=graph_file_name,
                                          package_name='zazuml',
                                          execution_id=execution_obj.id)
            logger.info('waiting ' + str(time) + ' seconds for next execution . . . .')
            sleep(time)